Compare commits
3 Commits
d66efdecf5
...
coolify
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b697963186 | ||
|
|
ef6237ffdf | ||
|
|
41a8f3b183 |
@@ -6,31 +6,22 @@
|
||||
|
||||
| Geraet | Rolle | Aufgaben |
|
||||
|--------|-------|----------|
|
||||
| **MacBook** | Entwicklung | Claude Terminal, Code-Entwicklung, Browser (Frontend-Tests) |
|
||||
| **Mac Mini** | Server | Docker, alle Services, Tests, Builds, Deployment |
|
||||
| **MacBook** | Client | Claude Terminal, Browser (Frontend-Tests) |
|
||||
| **Mac Mini** | Server | Docker, alle Services, Code-Ausfuehrung, Tests, Git |
|
||||
|
||||
**WICHTIG:** Code wird direkt auf dem MacBook in diesem Repo bearbeitet. Docker und Services laufen auf dem Mac Mini.
|
||||
**WICHTIG:** Die Entwicklung findet vollstaendig auf dem **Mac Mini** statt!
|
||||
|
||||
### Entwicklungsworkflow
|
||||
### SSH-Verbindung
|
||||
|
||||
```bash
|
||||
# 1. Code auf MacBook bearbeiten (dieses Verzeichnis)
|
||||
# 2. Committen und pushen:
|
||||
git push origin main && git push gitea main
|
||||
ssh macmini
|
||||
# Projektverzeichnis:
|
||||
cd /Users/benjaminadmin/Projekte/breakpilot-lehrer
|
||||
|
||||
# 3. Auf Mac Mini pullen und Container neu bauen:
|
||||
ssh macmini "git -C /Users/benjaminadmin/Projekte/breakpilot-lehrer pull --no-rebase origin main"
|
||||
ssh macmini "/usr/local/bin/docker compose -f /Users/benjaminadmin/Projekte/breakpilot-lehrer/docker-compose.yml build --no-cache <service>"
|
||||
ssh macmini "/usr/local/bin/docker compose -f /Users/benjaminadmin/Projekte/breakpilot-lehrer/docker-compose.yml up -d <service>"
|
||||
# Einzelbefehle (BEVORZUGT):
|
||||
ssh macmini "cd /Users/benjaminadmin/Projekte/breakpilot-lehrer && <cmd>"
|
||||
```
|
||||
|
||||
### SSH-Verbindung (fuer Docker/Tests)
|
||||
|
||||
**WICHTIG:** `cd` in SSH-Kommandos funktioniert NICHT zuverlaessig! Stattdessen:
|
||||
- Git: `git -C /Users/benjaminadmin/Projekte/breakpilot-lehrer <cmd>`
|
||||
- Docker: `/usr/local/bin/docker compose -f /Users/benjaminadmin/Projekte/breakpilot-lehrer/docker-compose.yml <cmd>`
|
||||
- Logs: `/usr/local/bin/docker logs -f bp-lehrer-<service>`
|
||||
|
||||
---
|
||||
|
||||
## Voraussetzung
|
||||
@@ -172,10 +163,10 @@ breakpilot-lehrer/
|
||||
|
||||
```bash
|
||||
# Lehrer-Services starten (Core muss laufen!)
|
||||
ssh macmini "/usr/local/bin/docker compose -f /Users/benjaminadmin/Projekte/breakpilot-lehrer/docker-compose.yml up -d"
|
||||
ssh macmini "cd /Users/benjaminadmin/Projekte/breakpilot-lehrer && /usr/local/bin/docker compose up -d"
|
||||
|
||||
# Einzelnen Service neu bauen
|
||||
ssh macmini "/usr/local/bin/docker compose -f /Users/benjaminadmin/Projekte/breakpilot-lehrer/docker-compose.yml build --no-cache <service>"
|
||||
ssh macmini "cd /Users/benjaminadmin/Projekte/breakpilot-lehrer && /usr/local/bin/docker compose build --no-cache <service>"
|
||||
|
||||
# Logs
|
||||
ssh macmini "/usr/local/bin/docker logs -f bp-lehrer-<service>"
|
||||
@@ -185,7 +176,6 @@ ssh macmini "/usr/local/bin/docker ps --filter name=bp-lehrer"
|
||||
```
|
||||
|
||||
**WICHTIG:** Docker-Pfad auf Mac Mini ist `/usr/local/bin/docker` (nicht im Standard-SSH-PATH).
|
||||
**WICHTIG:** Immer `-f` mit vollem Pfad zur docker-compose.yml nutzen, `cd` in SSH funktioniert nicht!
|
||||
|
||||
### Frontend-Entwicklung
|
||||
|
||||
|
||||
79
.env.coolify.example
Normal file
79
.env.coolify.example
Normal file
@@ -0,0 +1,79 @@
|
||||
# =========================================================
|
||||
# BreakPilot Lehrer — Coolify Environment Variables
|
||||
# =========================================================
|
||||
# Copy these into Coolify's environment variable UI
|
||||
# for the breakpilot-lehrer Docker Compose resource.
|
||||
# =========================================================
|
||||
|
||||
# --- External PostgreSQL (Coolify-managed, same as Core) ---
|
||||
POSTGRES_HOST=<coolify-postgres-hostname>
|
||||
POSTGRES_PORT=5432
|
||||
POSTGRES_USER=breakpilot
|
||||
POSTGRES_PASSWORD=CHANGE_ME_SAME_AS_CORE
|
||||
POSTGRES_DB=breakpilot_db
|
||||
|
||||
# --- Security ---
|
||||
JWT_SECRET=CHANGE_ME_SAME_AS_CORE
|
||||
|
||||
# --- External S3 Storage (same as Core) ---
|
||||
S3_ENDPOINT=<s3-endpoint-host:port>
|
||||
S3_ACCESS_KEY=CHANGE_ME_SAME_AS_CORE
|
||||
S3_SECRET_KEY=CHANGE_ME_SAME_AS_CORE
|
||||
S3_BUCKET=breakpilot-rag
|
||||
S3_SECURE=true
|
||||
|
||||
# --- External Qdrant (Coolify-managed, same as Core) ---
|
||||
QDRANT_URL=http://<coolify-qdrant-hostname>:6333
|
||||
|
||||
# --- Session ---
|
||||
SESSION_TTL_HOURS=24
|
||||
|
||||
# --- SMTP (Real mail server) ---
|
||||
SMTP_HOST=smtp.example.com
|
||||
SMTP_PORT=587
|
||||
SMTP_USERNAME=noreply@breakpilot.ai
|
||||
SMTP_PASSWORD=CHANGE_ME_SMTP_PASSWORD
|
||||
SMTP_FROM_NAME=BreakPilot
|
||||
SMTP_FROM_ADDR=noreply@breakpilot.ai
|
||||
|
||||
# --- LLM / Ollama (optional) ---
|
||||
OLLAMA_BASE_URL=
|
||||
OLLAMA_URL=
|
||||
OLLAMA_ENABLED=false
|
||||
OLLAMA_DEFAULT_MODEL=
|
||||
OLLAMA_VISION_MODEL=
|
||||
OLLAMA_CORRECTION_MODEL=
|
||||
OLLAMA_TIMEOUT=120
|
||||
|
||||
# --- Anthropic (optional) ---
|
||||
ANTHROPIC_API_KEY=
|
||||
|
||||
# --- vast.ai GPU (optional) ---
|
||||
VAST_API_KEY=
|
||||
VAST_INSTANCE_ID=
|
||||
|
||||
# --- Game Settings ---
|
||||
GAME_USE_DATABASE=true
|
||||
GAME_REQUIRE_AUTH=true
|
||||
GAME_REQUIRE_BILLING=true
|
||||
GAME_LLM_MODEL=
|
||||
|
||||
# --- Frontend URLs (build args) ---
|
||||
NEXT_PUBLIC_API_URL=https://api-lehrer.breakpilot.ai
|
||||
NEXT_PUBLIC_KLAUSUR_SERVICE_URL=https://klausur.breakpilot.ai
|
||||
NEXT_PUBLIC_VOICE_SERVICE_URL=wss://voice.breakpilot.ai
|
||||
NEXT_PUBLIC_BILLING_API_URL=https://api-core.breakpilot.ai
|
||||
NEXT_PUBLIC_APP_URL=https://app.breakpilot.ai
|
||||
NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=
|
||||
|
||||
# --- Edu Search ---
|
||||
EDU_SEARCH_URL=
|
||||
EDU_SEARCH_API_KEY=
|
||||
OPENSEARCH_PASSWORD=CHANGE_ME_OPENSEARCH_PASSWORD
|
||||
|
||||
# --- Misc ---
|
||||
CONTROL_API_KEY=
|
||||
ALERTS_AGENT_ENABLED=false
|
||||
PADDLEOCR_SERVICE_URL=
|
||||
TROCR_SERVICE_URL=
|
||||
CAMUNDA_URL=
|
||||
17
.env.example
17
.env.example
@@ -30,23 +30,6 @@ OLLAMA_VISION_MODEL=llama3.2-vision
|
||||
OLLAMA_CORRECTION_MODEL=llama3.2
|
||||
OLLAMA_TIMEOUT=120
|
||||
|
||||
# OCR-Pipeline: LLM-Review (Schritt 6)
|
||||
# Kleine Modelle reichen fuer Zeichen-Korrekturen (0->O, 1->l, 5->S)
|
||||
# Optionen: qwen3:0.6b, qwen3:1.7b, gemma3:1b, qwen3.5:35b-a3b
|
||||
OLLAMA_REVIEW_MODEL=qwen3:0.6b
|
||||
# Eintraege pro Ollama-Call. Groesser = weniger HTTP-Overhead.
|
||||
OLLAMA_REVIEW_BATCH_SIZE=20
|
||||
|
||||
# OCR-Pipeline: Engine fuer Schritt 5 (Worterkennung)
|
||||
# Optionen: auto (bevorzugt RapidOCR), rapid, tesseract,
|
||||
# trocr-printed, trocr-handwritten, lighton
|
||||
OCR_ENGINE=auto
|
||||
|
||||
# Klausur-HTR: Primaerem Modell fuer Handschriftenerkennung (qwen2.5vl bereits auf Mac Mini)
|
||||
OLLAMA_HTR_MODEL=qwen2.5vl:32b
|
||||
# HTR Fallback: genutzt wenn Ollama nicht erreichbar (auto-download ~340 MB)
|
||||
HTR_FALLBACK_MODEL=trocr-large
|
||||
|
||||
# Anthropic (optional)
|
||||
ANTHROPIC_API_KEY=
|
||||
|
||||
|
||||
32
.gitea/workflows/deploy-coolify.yml
Normal file
32
.gitea/workflows/deploy-coolify.yml
Normal file
@@ -0,0 +1,32 @@
|
||||
name: Deploy to Coolify
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- coolify
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Wait for Core deployment
|
||||
run: |
|
||||
echo "Waiting 30s for Core services to stabilize..."
|
||||
sleep 30
|
||||
|
||||
- name: Deploy via Coolify API
|
||||
run: |
|
||||
echo "Deploying breakpilot-lehrer to Coolify..."
|
||||
HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" \
|
||||
-X POST \
|
||||
-H "Authorization: Bearer ${{ secrets.COOLIFY_API_TOKEN }}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"uuid": "${{ secrets.COOLIFY_RESOURCE_UUID }}", "force_rebuild": true}' \
|
||||
"${{ secrets.COOLIFY_BASE_URL }}/api/v1/deploy")
|
||||
|
||||
echo "HTTP Status: $HTTP_STATUS"
|
||||
if [ "$HTTP_STATUS" -ne 200 ] && [ "$HTTP_STATUS" -ne 201 ]; then
|
||||
echo "Deployment failed with status $HTTP_STATUS"
|
||||
exit 1
|
||||
fi
|
||||
echo "Deployment triggered successfully!"
|
||||
@@ -34,8 +34,8 @@ WORKDIR /app
|
||||
ENV NODE_ENV=production
|
||||
|
||||
# Create non-root user
|
||||
RUN addgroup --system --gid 1001 nodejs
|
||||
RUN adduser --system --uid 1001 nextjs
|
||||
RUN addgroup -S -g 1001 nodejs
|
||||
RUN adduser -S -u 1001 -G nodejs nextjs
|
||||
|
||||
# Copy built assets
|
||||
COPY --from=builder /app/public ./public
|
||||
|
||||
@@ -273,6 +273,52 @@ Dein Ziel ist die rechtzeitige Erkennung und Kommunikation relevanter Ereignisse
|
||||
createdAt: '2024-12-01T00:00:00Z',
|
||||
updatedAt: '2025-01-12T02:00:00Z'
|
||||
},
|
||||
'compliance-advisor': {
|
||||
id: 'compliance-advisor',
|
||||
name: 'Compliance Advisor',
|
||||
description: 'DSGVO/Compliance-Berater fuer SDK-Nutzer',
|
||||
soulFile: 'compliance-advisor.soul.md',
|
||||
soulContent: `# Compliance Advisor Agent
|
||||
|
||||
## Identitaet
|
||||
Du bist der BreakPilot Compliance-Berater. Du hilfst Nutzern des AI Compliance SDK,
|
||||
Datenschutz- und Compliance-Fragen in verstaendlicher Sprache zu beantworten.
|
||||
Du bist kein Anwalt und gibst keine Rechtsberatung, sondern orientierst dich an
|
||||
offiziellen Quellen und gibst praxisnahe Hinweise.
|
||||
|
||||
## Kernprinzipien
|
||||
- **Quellenbasiert**: Verweise immer auf konkrete Rechtsgrundlagen (DSGVO-Artikel, BDSG-Paragraphen)
|
||||
- **Verstaendlich**: Erklaere rechtliche Konzepte in einfacher, praxisnaher Sprache
|
||||
- **Ehrlich**: Bei Unsicherheit empfehle professionelle Rechtsberatung
|
||||
- **Kontextbewusst**: Nutze das RAG-System fuer aktuelle Rechtstexte und Leitfaeden
|
||||
- **Scope-bewusst**: Nutze alle verfuegbaren RAG-Quellen AUSSER NIBIS-Dokumenten
|
||||
|
||||
## Kompetenzbereich
|
||||
- DSGVO Art. 1-99 + Erwaegsgruende
|
||||
- BDSG (Bundesdatenschutzgesetz)
|
||||
- AI Act (EU KI-Verordnung)
|
||||
- TTDSG, ePrivacy-Richtlinie
|
||||
- DSK-Kurzpapiere (Nr. 1-20)
|
||||
- SDM V3.0, BSI-Grundschutz, BSI-TR-03161
|
||||
- EDPB Guidelines, Bundes-/Laender-Muss-Listen
|
||||
- ISO 27001/27701 (Ueberblick)
|
||||
|
||||
## Kommunikationsstil
|
||||
- Sachlich, aber verstaendlich
|
||||
- Deutsch als Hauptsprache
|
||||
- Strukturierte Antworten mit Quellenangabe
|
||||
- Praxisbeispiele wo hilfreich`,
|
||||
color: '#6366f1',
|
||||
status: 'running',
|
||||
activeSessions: 0,
|
||||
totalProcessed: 0,
|
||||
avgResponseTime: 0,
|
||||
errorRate: 0,
|
||||
lastRestart: new Date().toISOString(),
|
||||
version: '1.0.0',
|
||||
createdAt: new Date().toISOString(),
|
||||
updatedAt: new Date().toISOString()
|
||||
},
|
||||
'orchestrator': {
|
||||
id: 'orchestrator',
|
||||
name: 'Orchestrator',
|
||||
|
||||
@@ -94,6 +94,19 @@ const mockAgents: AgentConfig[] = [
|
||||
totalProcessed: 8934,
|
||||
avgResponseTime: 12,
|
||||
lastActivity: 'just now'
|
||||
},
|
||||
{
|
||||
id: 'compliance-advisor',
|
||||
name: 'Compliance Advisor',
|
||||
description: 'DSGVO/Compliance-Berater fuer SDK-Nutzer',
|
||||
soulFile: 'compliance-advisor.soul.md',
|
||||
color: '#6366f1',
|
||||
icon: 'message',
|
||||
status: 'running',
|
||||
activeSessions: 0,
|
||||
totalProcessed: 0,
|
||||
avgResponseTime: 0,
|
||||
lastActivity: new Date().toISOString()
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
@@ -179,6 +179,7 @@ export default function GPUInfrastructurePage() {
|
||||
databases: ['PostgreSQL (Logs)'],
|
||||
}}
|
||||
relatedPages={[
|
||||
{ name: 'LLM Vergleich', href: '/ai/llm-compare', description: 'KI-Provider testen' },
|
||||
{ name: 'Test Quality (BQAS)', href: '/ai/test-quality', description: 'Golden Suite & Tests' },
|
||||
{ name: 'Magic Help', href: '/ai/magic-help', description: 'TrOCR Testing' },
|
||||
]}
|
||||
|
||||
503
admin-lehrer/app/(admin)/ai/llm-compare/page.tsx
Normal file
503
admin-lehrer/app/(admin)/ai/llm-compare/page.tsx
Normal file
@@ -0,0 +1,503 @@
|
||||
'use client'
|
||||
|
||||
/**
|
||||
* LLM Comparison Tool
|
||||
*
|
||||
* Vergleicht Antworten von verschiedenen LLM-Providern:
|
||||
* - OpenAI/ChatGPT
|
||||
* - Claude
|
||||
* - Self-hosted + Tavily
|
||||
* - Self-hosted + EduSearch
|
||||
*/
|
||||
|
||||
import { useState, useEffect, useCallback } from 'react'
|
||||
import { PagePurpose } from '@/components/common/PagePurpose'
|
||||
import { AIToolsSidebarResponsive } from '@/components/ai/AIToolsSidebar'
|
||||
|
||||
interface LLMResponse {
|
||||
provider: string
|
||||
model: string
|
||||
response: string
|
||||
latency_ms: number
|
||||
tokens_used?: number
|
||||
search_results?: Array<{
|
||||
title: string
|
||||
url: string
|
||||
content: string
|
||||
score?: number
|
||||
}>
|
||||
error?: string
|
||||
timestamp: string
|
||||
}
|
||||
|
||||
interface ComparisonResult {
|
||||
comparison_id: string
|
||||
prompt: string
|
||||
system_prompt?: string
|
||||
responses: LLMResponse[]
|
||||
created_at: string
|
||||
}
|
||||
|
||||
const providerColors: Record<string, { bg: string; border: string; text: string }> = {
|
||||
openai: { bg: 'bg-emerald-50', border: 'border-emerald-300', text: 'text-emerald-700' },
|
||||
claude: { bg: 'bg-orange-50', border: 'border-orange-300', text: 'text-orange-700' },
|
||||
selfhosted_tavily: { bg: 'bg-blue-50', border: 'border-blue-300', text: 'text-blue-700' },
|
||||
selfhosted_edusearch: { bg: 'bg-purple-50', border: 'border-purple-300', text: 'text-purple-700' },
|
||||
}
|
||||
|
||||
const providerLabels: Record<string, string> = {
|
||||
openai: 'OpenAI GPT-4o-mini',
|
||||
claude: 'Claude 3.5 Sonnet',
|
||||
selfhosted_tavily: 'Self-hosted + Tavily',
|
||||
selfhosted_edusearch: 'Self-hosted + EduSearch',
|
||||
}
|
||||
|
||||
export default function LLMComparePage() {
|
||||
// State
|
||||
const [prompt, setPrompt] = useState('')
|
||||
const [systemPrompt, setSystemPrompt] = useState('Du bist ein hilfreicher Assistent fuer Lehrkraefte in Deutschland.')
|
||||
|
||||
// Provider toggles
|
||||
const [enableOpenAI, setEnableOpenAI] = useState(true)
|
||||
const [enableClaude, setEnableClaude] = useState(true)
|
||||
const [enableTavily, setEnableTavily] = useState(true)
|
||||
const [enableEduSearch, setEnableEduSearch] = useState(true)
|
||||
|
||||
// Parameters
|
||||
const [model, setModel] = useState('llama3.2:3b')
|
||||
const [temperature, setTemperature] = useState(0.7)
|
||||
const [maxTokens, setMaxTokens] = useState(2048)
|
||||
|
||||
// Results
|
||||
const [isLoading, setIsLoading] = useState(false)
|
||||
const [result, setResult] = useState<ComparisonResult | null>(null)
|
||||
const [history, setHistory] = useState<ComparisonResult[]>([])
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
|
||||
// UI State
|
||||
const [showSettings, setShowSettings] = useState(false)
|
||||
const [showHistory, setShowHistory] = useState(false)
|
||||
|
||||
// API Base URL
|
||||
const API_URL = process.env.NEXT_PUBLIC_LLM_GATEWAY_URL || 'http://localhost:8082'
|
||||
const API_KEY = process.env.NEXT_PUBLIC_LLM_API_KEY || 'dev-key'
|
||||
|
||||
// Load history
|
||||
const loadHistory = useCallback(async () => {
|
||||
try {
|
||||
const response = await fetch(`${API_URL}/v1/comparison/history?limit=20`, {
|
||||
headers: { Authorization: `Bearer ${API_KEY}` },
|
||||
})
|
||||
if (response.ok) {
|
||||
const data = await response.json()
|
||||
setHistory(data.comparisons || [])
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to load history:', e)
|
||||
}
|
||||
}, [API_URL, API_KEY])
|
||||
|
||||
useEffect(() => {
|
||||
loadHistory()
|
||||
}, [loadHistory])
|
||||
|
||||
const runComparison = async () => {
|
||||
if (!prompt.trim()) {
|
||||
setError('Bitte geben Sie einen Prompt ein')
|
||||
return
|
||||
}
|
||||
|
||||
setIsLoading(true)
|
||||
setError(null)
|
||||
setResult(null)
|
||||
|
||||
try {
|
||||
const response = await fetch(`${API_URL}/v1/comparison/run`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
Authorization: `Bearer ${API_KEY}`,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
prompt,
|
||||
system_prompt: systemPrompt || undefined,
|
||||
enable_openai: enableOpenAI,
|
||||
enable_claude: enableClaude,
|
||||
enable_selfhosted_tavily: enableTavily,
|
||||
enable_selfhosted_edusearch: enableEduSearch,
|
||||
selfhosted_model: model,
|
||||
temperature,
|
||||
max_tokens: maxTokens,
|
||||
}),
|
||||
})
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`API Error: ${response.status}`)
|
||||
}
|
||||
|
||||
const data = await response.json()
|
||||
setResult(data)
|
||||
loadHistory()
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||
} finally {
|
||||
setIsLoading(false)
|
||||
}
|
||||
}
|
||||
|
||||
const ResponseCard = ({ response }: { response: LLMResponse }) => {
|
||||
const colors = providerColors[response.provider] || {
|
||||
bg: 'bg-slate-50',
|
||||
border: 'border-slate-300',
|
||||
text: 'text-slate-700',
|
||||
}
|
||||
const label = providerLabels[response.provider] || response.provider
|
||||
|
||||
return (
|
||||
<div className={`rounded-xl border-2 ${colors.border} ${colors.bg} overflow-hidden`}>
|
||||
<div className={`px-4 py-3 border-b ${colors.border} flex items-center justify-between`}>
|
||||
<div>
|
||||
<h3 className={`font-semibold ${colors.text}`}>{label}</h3>
|
||||
<p className="text-xs text-slate-500">{response.model}</p>
|
||||
</div>
|
||||
<div className="text-right text-xs text-slate-500">
|
||||
<div>{response.latency_ms}ms</div>
|
||||
{response.tokens_used && <div>{response.tokens_used} tokens</div>}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="p-4">
|
||||
{response.error ? (
|
||||
<div className="text-red-600 text-sm">
|
||||
<strong>Fehler:</strong> {response.error}
|
||||
</div>
|
||||
) : (
|
||||
<pre className="whitespace-pre-wrap text-sm text-slate-700 font-sans">
|
||||
{response.response}
|
||||
</pre>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{response.search_results && response.search_results.length > 0 && (
|
||||
<div className="px-4 pb-4">
|
||||
<details className="text-xs">
|
||||
<summary className="cursor-pointer text-slate-500 hover:text-slate-700">
|
||||
{response.search_results.length} Suchergebnisse anzeigen
|
||||
</summary>
|
||||
<ul className="mt-2 space-y-2">
|
||||
{response.search_results.map((sr, idx) => (
|
||||
<li key={idx} className="bg-white rounded p-2 border border-slate-200">
|
||||
<a
|
||||
href={sr.url}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="text-blue-600 hover:underline font-medium"
|
||||
>
|
||||
{sr.title || 'Untitled'}
|
||||
</a>
|
||||
<p className="text-slate-500 truncate">{sr.content}</p>
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</details>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
return (
|
||||
<div>
|
||||
{/* Page Purpose */}
|
||||
<PagePurpose
|
||||
title="LLM Vergleich"
|
||||
purpose="Vergleichen Sie Antworten verschiedener KI-Provider (OpenAI, Claude, Self-hosted) fuer Qualitaetssicherung. Optimieren Sie Parameter und System Prompts fuer beste Ergebnisse. Standalone-Werkzeug ohne direkten Datenfluss zur KI-Pipeline."
|
||||
audience={['Entwickler', 'Data Scientists', 'QA']}
|
||||
architecture={{
|
||||
services: ['llm-gateway (Python)', 'Ollama', 'OpenAI API', 'Claude API'],
|
||||
databases: ['PostgreSQL (History)', 'Qdrant (RAG)'],
|
||||
}}
|
||||
relatedPages={[
|
||||
{ name: 'Test Quality (BQAS)', href: '/ai/test-quality', description: 'Golden Suite & Synthetic Tests' },
|
||||
{ name: 'GPU Infrastruktur', href: '/ai/gpu', description: 'GPU-Ressourcen verwalten' },
|
||||
{ name: 'Agent Management', href: '/ai/agents', description: 'Multi-Agent System' },
|
||||
]}
|
||||
collapsible={true}
|
||||
defaultCollapsed={true}
|
||||
/>
|
||||
|
||||
{/* KI-Werkzeuge Sidebar */}
|
||||
<AIToolsSidebarResponsive currentTool="llm-compare" />
|
||||
|
||||
<div className="grid grid-cols-1 lg:grid-cols-3 gap-6">
|
||||
{/* Left Column: Input & Settings */}
|
||||
<div className="lg:col-span-1 space-y-4">
|
||||
{/* Prompt Input */}
|
||||
<div className="bg-white rounded-xl border border-slate-200 p-4">
|
||||
<h2 className="font-semibold text-slate-900 mb-3">Prompt</h2>
|
||||
|
||||
{/* System Prompt */}
|
||||
<div className="mb-3">
|
||||
<label className="block text-sm text-slate-600 mb-1">System Prompt</label>
|
||||
<textarea
|
||||
value={systemPrompt}
|
||||
onChange={(e) => setSystemPrompt(e.target.value)}
|
||||
rows={3}
|
||||
className="w-full px-3 py-2 border border-slate-300 rounded-lg text-sm resize-none"
|
||||
placeholder="System Prompt (optional)"
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* User Prompt */}
|
||||
<div className="mb-3">
|
||||
<label className="block text-sm text-slate-600 mb-1">User Prompt</label>
|
||||
<textarea
|
||||
value={prompt}
|
||||
onChange={(e) => setPrompt(e.target.value)}
|
||||
rows={4}
|
||||
className="w-full px-3 py-2 border border-slate-300 rounded-lg text-sm resize-none"
|
||||
placeholder="z.B.: Erstelle ein Arbeitsblatt zum Thema Bruchrechnung fuer Klasse 6..."
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Provider Toggles */}
|
||||
<div className="mb-4">
|
||||
<label className="block text-sm text-slate-600 mb-2">Provider</label>
|
||||
<div className="grid grid-cols-2 gap-2">
|
||||
<label className="flex items-center gap-2 text-sm">
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={enableOpenAI}
|
||||
onChange={(e) => setEnableOpenAI(e.target.checked)}
|
||||
className="rounded"
|
||||
/>
|
||||
OpenAI
|
||||
</label>
|
||||
<label className="flex items-center gap-2 text-sm">
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={enableClaude}
|
||||
onChange={(e) => setEnableClaude(e.target.checked)}
|
||||
className="rounded"
|
||||
/>
|
||||
Claude
|
||||
</label>
|
||||
<label className="flex items-center gap-2 text-sm">
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={enableTavily}
|
||||
onChange={(e) => setEnableTavily(e.target.checked)}
|
||||
className="rounded"
|
||||
/>
|
||||
Self + Tavily
|
||||
</label>
|
||||
<label className="flex items-center gap-2 text-sm">
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={enableEduSearch}
|
||||
onChange={(e) => setEnableEduSearch(e.target.checked)}
|
||||
className="rounded"
|
||||
/>
|
||||
Self + EduSearch
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Run Button */}
|
||||
<button
|
||||
onClick={runComparison}
|
||||
disabled={isLoading || !prompt.trim()}
|
||||
className="w-full py-3 bg-teal-600 text-white rounded-lg font-medium hover:bg-teal-700 disabled:opacity-50 disabled:cursor-not-allowed"
|
||||
>
|
||||
{isLoading ? (
|
||||
<span className="flex items-center justify-center gap-2">
|
||||
<svg className="animate-spin w-5 h-5" fill="none" viewBox="0 0 24 24">
|
||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" />
|
||||
</svg>
|
||||
Vergleiche...
|
||||
</span>
|
||||
) : (
|
||||
'Vergleich starten'
|
||||
)}
|
||||
</button>
|
||||
|
||||
{error && (
|
||||
<div className="mt-3 p-3 bg-red-50 border border-red-200 rounded-lg text-red-700 text-sm">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Settings Panel */}
|
||||
<div className="bg-white rounded-xl border border-slate-200 overflow-hidden">
|
||||
<button
|
||||
onClick={() => setShowSettings(!showSettings)}
|
||||
className="w-full px-4 py-3 flex items-center justify-between hover:bg-slate-50"
|
||||
>
|
||||
<span className="font-semibold text-slate-900">Parameter</span>
|
||||
<svg
|
||||
className={`w-5 h-5 transition-transform ${showSettings ? 'rotate-180' : ''}`}
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
viewBox="0 0 24 24"
|
||||
>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 9l-7 7-7-7" />
|
||||
</svg>
|
||||
</button>
|
||||
|
||||
{showSettings && (
|
||||
<div className="p-4 border-t border-slate-200 space-y-4">
|
||||
<div>
|
||||
<label className="block text-sm text-slate-600 mb-1">Self-hosted Modell</label>
|
||||
<select
|
||||
value={model}
|
||||
onChange={(e) => setModel(e.target.value)}
|
||||
className="w-full px-3 py-2 border border-slate-300 rounded-lg text-sm"
|
||||
>
|
||||
<option value="llama3.2:3b">Llama 3.2 3B</option>
|
||||
<option value="llama3.1:8b">Llama 3.1 8B</option>
|
||||
<option value="mistral:7b">Mistral 7B</option>
|
||||
<option value="qwen2.5:7b">Qwen 2.5 7B</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<label className="block text-sm text-slate-600 mb-1">
|
||||
Temperature: {temperature.toFixed(2)}
|
||||
</label>
|
||||
<input
|
||||
type="range"
|
||||
min="0"
|
||||
max="2"
|
||||
step="0.1"
|
||||
value={temperature}
|
||||
onChange={(e) => setTemperature(parseFloat(e.target.value))}
|
||||
className="w-full"
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<label className="block text-sm text-slate-600 mb-1">Max Tokens: {maxTokens}</label>
|
||||
<input
|
||||
type="range"
|
||||
min="256"
|
||||
max="4096"
|
||||
step="256"
|
||||
value={maxTokens}
|
||||
onChange={(e) => setMaxTokens(parseInt(e.target.value))}
|
||||
className="w-full"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* History Panel */}
|
||||
<div className="bg-white rounded-xl border border-slate-200 overflow-hidden">
|
||||
<button
|
||||
onClick={() => setShowHistory(!showHistory)}
|
||||
className="w-full px-4 py-3 flex items-center justify-between hover:bg-slate-50"
|
||||
>
|
||||
<span className="font-semibold text-slate-900">Verlauf ({history.length})</span>
|
||||
<svg
|
||||
className={`w-5 h-5 transition-transform ${showHistory ? 'rotate-180' : ''}`}
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
viewBox="0 0 24 24"
|
||||
>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 9l-7 7-7-7" />
|
||||
</svg>
|
||||
</button>
|
||||
|
||||
{showHistory && history.length > 0 && (
|
||||
<div className="border-t border-slate-200 max-h-64 overflow-y-auto">
|
||||
{history.map((h) => (
|
||||
<button
|
||||
key={h.comparison_id}
|
||||
onClick={() => {
|
||||
setResult(h)
|
||||
setPrompt(h.prompt)
|
||||
if (h.system_prompt) setSystemPrompt(h.system_prompt)
|
||||
}}
|
||||
className="w-full px-4 py-2 text-left hover:bg-slate-50 border-b border-slate-100 last:border-0"
|
||||
>
|
||||
<div className="text-sm text-slate-700 truncate">{h.prompt}</div>
|
||||
<div className="text-xs text-slate-400">
|
||||
{new Date(h.created_at).toLocaleString('de-DE')}
|
||||
</div>
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Right Column: Results */}
|
||||
<div className="lg:col-span-2">
|
||||
{result ? (
|
||||
<div className="space-y-4">
|
||||
<div className="bg-white rounded-xl border border-slate-200 p-4">
|
||||
<div className="flex items-center justify-between">
|
||||
<div>
|
||||
<h2 className="font-semibold text-slate-900">Ergebnisse</h2>
|
||||
<p className="text-sm text-slate-500">ID: {result.comparison_id}</p>
|
||||
</div>
|
||||
<div className="text-sm text-slate-500">
|
||||
{new Date(result.created_at).toLocaleString('de-DE')}
|
||||
</div>
|
||||
</div>
|
||||
<div className="mt-2 p-3 bg-slate-50 rounded-lg">
|
||||
<p className="text-sm text-slate-700">{result.prompt}</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="grid grid-cols-1 xl:grid-cols-2 gap-4">
|
||||
{result.responses.map((response, idx) => (
|
||||
<ResponseCard key={`${response.provider}-${idx}`} response={response} />
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<div className="bg-white rounded-xl border border-slate-200 p-12 text-center">
|
||||
<svg
|
||||
className="w-16 h-16 mx-auto text-slate-300 mb-4"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
viewBox="0 0 24 24"
|
||||
>
|
||||
<path
|
||||
strokeLinecap="round"
|
||||
strokeLinejoin="round"
|
||||
strokeWidth={1.5}
|
||||
d="M9 3v2m6-2v2M9 19v2m6-2v2M5 9H3m2 6H3m18-6h-2m2 6h-2M7 19h10a2 2 0 002-2V7a2 2 0 00-2-2H7a2 2 0 00-2 2v10a2 2 0 002 2zM9 9h6v6H9V9z"
|
||||
/>
|
||||
</svg>
|
||||
<h3 className="text-lg font-medium text-slate-700 mb-2">LLM-Vergleich starten</h3>
|
||||
<p className="text-slate-500 max-w-md mx-auto">
|
||||
Geben Sie einen Prompt ein und klicken Sie auf "Vergleich starten", um
|
||||
die Antworten verschiedener LLM-Provider zu vergleichen.
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Info Box */}
|
||||
<div className="mt-8 bg-teal-50 border border-teal-200 rounded-xl p-6">
|
||||
<div className="flex items-start gap-4">
|
||||
<svg className="w-6 h-6 text-teal-600 flex-shrink-0 mt-0.5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
|
||||
</svg>
|
||||
<div>
|
||||
<h3 className="font-semibold text-teal-900">Qualitaetssicherung</h3>
|
||||
<p className="text-sm text-teal-800 mt-1">
|
||||
Dieses Tool dient zur Qualitaetssicherung der KI-Antworten. Vergleichen Sie verschiedene Provider,
|
||||
um die optimalen Parameter und System Prompts zu finden. Die Ergebnisse werden fuer Audits gespeichert.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -685,6 +685,7 @@ export default function OCRComparePage() {
|
||||
databases: ['PostgreSQL (Sessions)'],
|
||||
}}
|
||||
relatedPages={[
|
||||
{ name: 'LLM Vergleich', href: '/ai/llm-compare', description: 'KI-Provider vergleichen' },
|
||||
{ name: 'OCR-Labeling', href: '/ai/ocr-labeling', description: 'Ground Truth erstellen' },
|
||||
]}
|
||||
collapsible={true}
|
||||
|
||||
@@ -1,548 +0,0 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useState } from 'react'
|
||||
import { PagePurpose } from '@/components/common/PagePurpose'
|
||||
import { PipelineStepper } from '@/components/ocr-pipeline/PipelineStepper'
|
||||
import { StepOrientation } from '@/components/ocr-pipeline/StepOrientation'
|
||||
import { StepDeskew } from '@/components/ocr-pipeline/StepDeskew'
|
||||
import { StepDewarp } from '@/components/ocr-pipeline/StepDewarp'
|
||||
import { StepCrop } from '@/components/ocr-pipeline/StepCrop'
|
||||
import { StepStructureDetection } from '@/components/ocr-pipeline/StepStructureDetection'
|
||||
import { StepRowDetection } from '@/components/ocr-pipeline/StepRowDetection'
|
||||
import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition'
|
||||
import { OverlayReconstruction } from '@/components/ocr-overlay/OverlayReconstruction'
|
||||
import { PaddleDirectStep } from '@/components/ocr-overlay/PaddleDirectStep'
|
||||
import { GridEditor } from '@/components/grid-editor/GridEditor'
|
||||
import { OVERLAY_PIPELINE_STEPS, PADDLE_DIRECT_STEPS, KOMBI_STEPS, DOCUMENT_CATEGORIES, dbStepToOverlayUi, type PipelineStep, type SessionListItem, type DocumentCategory } from './types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
export default function OcrOverlayPage() {
|
||||
const [mode, setMode] = useState<'pipeline' | 'paddle-direct' | 'kombi'>('pipeline')
|
||||
const [currentStep, setCurrentStep] = useState(0)
|
||||
const [sessionId, setSessionId] = useState<string | null>(null)
|
||||
const [sessionName, setSessionName] = useState<string>('')
|
||||
const [sessions, setSessions] = useState<SessionListItem[]>([])
|
||||
const [loadingSessions, setLoadingSessions] = useState(true)
|
||||
const [editingName, setEditingName] = useState<string | null>(null)
|
||||
const [editNameValue, setEditNameValue] = useState('')
|
||||
const [editingCategory, setEditingCategory] = useState<string | null>(null)
|
||||
const [activeCategory, setActiveCategory] = useState<DocumentCategory | undefined>(undefined)
|
||||
const [steps, setSteps] = useState<PipelineStep[]>(
|
||||
OVERLAY_PIPELINE_STEPS.map((s, i) => ({
|
||||
...s,
|
||||
status: i === 0 ? 'active' : 'pending',
|
||||
})),
|
||||
)
|
||||
|
||||
useEffect(() => {
|
||||
loadSessions()
|
||||
}, [])
|
||||
|
||||
const loadSessions = async () => {
|
||||
setLoadingSessions(true)
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`)
|
||||
if (res.ok) {
|
||||
const data = await res.json()
|
||||
// Filter to only show top-level sessions (no sub-sessions)
|
||||
setSessions((data.sessions || []).filter((s: SessionListItem) => !s.parent_session_id))
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to load sessions:', e)
|
||||
} finally {
|
||||
setLoadingSessions(false)
|
||||
}
|
||||
}
|
||||
|
||||
const openSession = useCallback(async (sid: string) => {
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`)
|
||||
if (!res.ok) return
|
||||
const data = await res.json()
|
||||
|
||||
setSessionId(sid)
|
||||
setSessionName(data.name || data.filename || '')
|
||||
setActiveCategory(data.document_category || undefined)
|
||||
|
||||
// Check if this session was processed with paddle_direct, kombi, or rapid_kombi
|
||||
const ocrEngine = data.word_result?.ocr_engine
|
||||
const isPaddleDirect = ocrEngine === 'paddle_direct'
|
||||
const isKombi = ocrEngine === 'kombi' || ocrEngine === 'rapid_kombi'
|
||||
|
||||
if (isPaddleDirect || isKombi) {
|
||||
const m = isKombi ? 'kombi' : 'paddle-direct'
|
||||
const baseSteps = isKombi ? KOMBI_STEPS : PADDLE_DIRECT_STEPS
|
||||
setMode(m)
|
||||
|
||||
// For Kombi: if grid_editor_result exists, jump to grid editor step (6)
|
||||
// If structure_result exists, jump to grid editor (6)
|
||||
// If word_result exists, jump to structure step (5)
|
||||
const hasGrid = isKombi && data.grid_editor_result
|
||||
const hasStructure = isKombi && data.structure_result
|
||||
const hasWords = isKombi && data.word_result
|
||||
const activeStep = hasGrid ? 6 : hasStructure ? 6 : hasWords ? 5 : 4
|
||||
setSteps(
|
||||
baseSteps.map((s, i) => ({
|
||||
...s,
|
||||
status: i < activeStep ? 'completed' : i === activeStep ? 'active' : 'pending',
|
||||
})),
|
||||
)
|
||||
setCurrentStep(activeStep)
|
||||
} else {
|
||||
setMode('pipeline')
|
||||
// Map DB step to overlay UI step
|
||||
const dbStep = data.current_step || 1
|
||||
const uiStep = dbStepToOverlayUi(dbStep)
|
||||
|
||||
setSteps(
|
||||
OVERLAY_PIPELINE_STEPS.map((s, i) => ({
|
||||
...s,
|
||||
status: i < uiStep ? 'completed' : i === uiStep ? 'active' : 'pending',
|
||||
})),
|
||||
)
|
||||
setCurrentStep(uiStep)
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to open session:', e)
|
||||
}
|
||||
}, [])
|
||||
|
||||
const deleteSession = useCallback(async (sid: string) => {
|
||||
try {
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, { method: 'DELETE' })
|
||||
setSessions((prev) => prev.filter((s) => s.id !== sid))
|
||||
if (sessionId === sid) {
|
||||
setSessionId(null)
|
||||
setCurrentStep(0)
|
||||
const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
|
||||
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to delete session:', e)
|
||||
}
|
||||
}, [sessionId, mode])
|
||||
|
||||
const renameSession = useCallback(async (sid: string, newName: string) => {
|
||||
try {
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, {
|
||||
method: 'PUT',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ name: newName }),
|
||||
})
|
||||
setSessions((prev) => prev.map((s) => (s.id === sid ? { ...s, name: newName } : s)))
|
||||
if (sessionId === sid) setSessionName(newName)
|
||||
} catch (e) {
|
||||
console.error('Failed to rename session:', e)
|
||||
}
|
||||
setEditingName(null)
|
||||
}, [sessionId])
|
||||
|
||||
const updateCategory = useCallback(async (sid: string, category: DocumentCategory) => {
|
||||
try {
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, {
|
||||
method: 'PUT',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ document_category: category }),
|
||||
})
|
||||
setSessions((prev) => prev.map((s) => (s.id === sid ? { ...s, document_category: category } : s)))
|
||||
if (sessionId === sid) setActiveCategory(category)
|
||||
} catch (e) {
|
||||
console.error('Failed to update category:', e)
|
||||
}
|
||||
setEditingCategory(null)
|
||||
}, [sessionId])
|
||||
|
||||
const handleStepClick = (index: number) => {
|
||||
if (index <= currentStep || steps[index].status === 'completed') {
|
||||
setCurrentStep(index)
|
||||
}
|
||||
}
|
||||
|
||||
const goToStep = (step: number) => {
|
||||
setCurrentStep(step)
|
||||
setSteps((prev) =>
|
||||
prev.map((s, i) => ({
|
||||
...s,
|
||||
status: i < step ? 'completed' : i === step ? 'active' : 'pending',
|
||||
})),
|
||||
)
|
||||
}
|
||||
|
||||
const handleNext = () => {
|
||||
if (currentStep >= steps.length - 1) {
|
||||
// Last step completed — return to session list
|
||||
const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
|
||||
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||
setCurrentStep(0)
|
||||
setSessionId(null)
|
||||
loadSessions()
|
||||
return
|
||||
}
|
||||
|
||||
const nextStep = currentStep + 1
|
||||
setSteps((prev) =>
|
||||
prev.map((s, i) => {
|
||||
if (i === currentStep) return { ...s, status: 'completed' }
|
||||
if (i === nextStep) return { ...s, status: 'active' }
|
||||
return s
|
||||
}),
|
||||
)
|
||||
setCurrentStep(nextStep)
|
||||
}
|
||||
|
||||
const handleOrientationComplete = (sid: string) => {
|
||||
setSessionId(sid)
|
||||
loadSessions()
|
||||
handleNext()
|
||||
}
|
||||
|
||||
const handleNewSession = () => {
|
||||
setSessionId(null)
|
||||
setSessionName('')
|
||||
setCurrentStep(0)
|
||||
const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
|
||||
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||
}
|
||||
|
||||
const stepNames: Record<number, string> = {
|
||||
1: 'Orientierung',
|
||||
2: 'Begradigung',
|
||||
3: 'Entzerrung',
|
||||
4: 'Zuschneiden',
|
||||
5: 'Zeilen',
|
||||
6: 'Woerter',
|
||||
7: 'Overlay',
|
||||
}
|
||||
|
||||
const reprocessFromStep = useCallback(async (uiStep: number) => {
|
||||
if (!sessionId) return
|
||||
// Map overlay UI step to DB step
|
||||
const dbStepMap: Record<number, number> = { 0: 2, 1: 3, 2: 4, 3: 5, 4: 7, 5: 8, 6: 9 }
|
||||
const dbStep = dbStepMap[uiStep] || uiStep + 1
|
||||
if (!confirm(`Ab Schritt ${uiStep + 1} (${stepNames[uiStep + 1] || '?'}) neu verarbeiten?`)) return
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reprocess`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ from_step: dbStep }),
|
||||
})
|
||||
if (!res.ok) {
|
||||
const data = await res.json().catch(() => ({}))
|
||||
console.error('Reprocess failed:', data.detail || res.status)
|
||||
return
|
||||
}
|
||||
goToStep(uiStep)
|
||||
} catch (e) {
|
||||
console.error('Reprocess error:', e)
|
||||
}
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [sessionId, goToStep])
|
||||
|
||||
const renderStep = () => {
|
||||
if (mode === 'paddle-direct' || mode === 'kombi') {
|
||||
switch (currentStep) {
|
||||
case 0:
|
||||
return <StepOrientation sessionId={sessionId} onNext={handleOrientationComplete} />
|
||||
case 1:
|
||||
return <StepDeskew sessionId={sessionId} onNext={handleNext} />
|
||||
case 2:
|
||||
return <StepDewarp sessionId={sessionId} onNext={handleNext} />
|
||||
case 3:
|
||||
return <StepCrop sessionId={sessionId} onNext={handleNext} />
|
||||
case 4:
|
||||
if (mode === 'kombi') {
|
||||
return (
|
||||
<PaddleDirectStep
|
||||
sessionId={sessionId}
|
||||
onNext={handleNext}
|
||||
endpoint="paddle-kombi"
|
||||
title="Kombi-Modus"
|
||||
description="PP-OCRv5 und Tesseract laufen parallel. Koordinaten werden gewichtet gemittelt fuer optimale Positionierung."
|
||||
icon="🔀"
|
||||
buttonLabel="PP-OCRv5 + Tesseract starten"
|
||||
runningLabel="PP-OCRv5 + Tesseract laufen..."
|
||||
engineKey="kombi"
|
||||
/>
|
||||
)
|
||||
}
|
||||
return <PaddleDirectStep sessionId={sessionId} onNext={handleNext} />
|
||||
case 5:
|
||||
return mode === 'kombi' ? (
|
||||
<StepStructureDetection sessionId={sessionId} onNext={handleNext} />
|
||||
) : null
|
||||
case 6:
|
||||
return mode === 'kombi' ? (
|
||||
<GridEditor sessionId={sessionId} onNext={handleNext} />
|
||||
) : null
|
||||
default:
|
||||
return null
|
||||
}
|
||||
}
|
||||
switch (currentStep) {
|
||||
case 0:
|
||||
return <StepOrientation sessionId={sessionId} onNext={handleOrientationComplete} />
|
||||
case 1:
|
||||
return <StepDeskew sessionId={sessionId} onNext={handleNext} />
|
||||
case 2:
|
||||
return <StepDewarp sessionId={sessionId} onNext={handleNext} />
|
||||
case 3:
|
||||
return <StepCrop sessionId={sessionId} onNext={handleNext} />
|
||||
case 4:
|
||||
return <StepRowDetection sessionId={sessionId} onNext={handleNext} />
|
||||
case 5:
|
||||
return <StepWordRecognition sessionId={sessionId} onNext={handleNext} goToStep={goToStep} skipHealGaps />
|
||||
case 6:
|
||||
return <OverlayReconstruction sessionId={sessionId} onNext={handleNext} />
|
||||
default:
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
<PagePurpose
|
||||
title="OCR Overlay"
|
||||
purpose="Ganzseitige Overlay-Rekonstruktion: Scan begradigen, Zeilen und Woerter erkennen, dann pixelgenau ueber das Bild legen. Ohne Spaltenerkennung — ideal fuer Arbeitsblaetter."
|
||||
audience={['Entwickler']}
|
||||
architecture={{
|
||||
services: ['klausur-service (FastAPI)', 'OpenCV', 'Tesseract'],
|
||||
databases: ['PostgreSQL Sessions'],
|
||||
}}
|
||||
relatedPages={[
|
||||
{ name: 'OCR Pipeline', href: '/ai/ocr-pipeline', description: 'Volle Pipeline mit Spalten' },
|
||||
{ name: 'OCR Vergleich', href: '/ai/ocr-compare', description: 'Methoden-Vergleich' },
|
||||
]}
|
||||
defaultCollapsed
|
||||
/>
|
||||
|
||||
{/* Session List */}
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4">
|
||||
<div className="flex items-center justify-between mb-3">
|
||||
<h3 className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||
Sessions ({sessions.length})
|
||||
</h3>
|
||||
<button
|
||||
onClick={handleNewSession}
|
||||
className="text-xs px-3 py-1.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors"
|
||||
>
|
||||
+ Neue Session
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{loadingSessions ? (
|
||||
<div className="text-sm text-gray-400 py-2">Lade Sessions...</div>
|
||||
) : sessions.length === 0 ? (
|
||||
<div className="text-sm text-gray-400 py-2">Noch keine Sessions vorhanden.</div>
|
||||
) : (
|
||||
<div className="space-y-1.5 max-h-[320px] overflow-y-auto">
|
||||
{sessions.map((s) => {
|
||||
const catInfo = DOCUMENT_CATEGORIES.find(c => c.value === s.document_category)
|
||||
return (
|
||||
<div
|
||||
key={s.id}
|
||||
className={`relative flex items-start gap-3 px-3 py-2.5 rounded-lg text-sm transition-colors cursor-pointer ${
|
||||
sessionId === s.id
|
||||
? 'bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700'
|
||||
: 'hover:bg-gray-50 dark:hover:bg-gray-700/50'
|
||||
}`}
|
||||
>
|
||||
{/* Thumbnail */}
|
||||
<div
|
||||
className="flex-shrink-0 w-12 h-12 rounded-md overflow-hidden bg-gray-100 dark:bg-gray-700"
|
||||
onClick={() => openSession(s.id)}
|
||||
>
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
<img
|
||||
src={`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${s.id}/thumbnail?size=96`}
|
||||
alt=""
|
||||
className="w-full h-full object-cover"
|
||||
loading="lazy"
|
||||
onError={(e) => { (e.target as HTMLImageElement).style.display = 'none' }}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Info */}
|
||||
<div className="flex-1 min-w-0" onClick={() => openSession(s.id)}>
|
||||
{editingName === s.id ? (
|
||||
<input
|
||||
autoFocus
|
||||
value={editNameValue}
|
||||
onChange={(e) => setEditNameValue(e.target.value)}
|
||||
onBlur={() => renameSession(s.id, editNameValue)}
|
||||
onKeyDown={(e) => {
|
||||
if (e.key === 'Enter') renameSession(s.id, editNameValue)
|
||||
if (e.key === 'Escape') setEditingName(null)
|
||||
}}
|
||||
onClick={(e) => e.stopPropagation()}
|
||||
className="w-full px-1 py-0.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600"
|
||||
/>
|
||||
) : (
|
||||
<div className="truncate font-medium text-gray-700 dark:text-gray-300">
|
||||
{s.name || s.filename}
|
||||
</div>
|
||||
)}
|
||||
<button
|
||||
onClick={(e) => {
|
||||
e.stopPropagation()
|
||||
navigator.clipboard.writeText(s.id)
|
||||
const btn = e.currentTarget
|
||||
btn.textContent = 'Kopiert!'
|
||||
setTimeout(() => { btn.textContent = `ID: ${s.id.slice(0, 8)}` }, 1500)
|
||||
}}
|
||||
className="text-[10px] font-mono text-gray-400 hover:text-teal-500 transition-colors"
|
||||
title={`Volle ID: ${s.id} — Klick zum Kopieren`}
|
||||
>
|
||||
ID: {s.id.slice(0, 8)}
|
||||
</button>
|
||||
<div className="text-xs text-gray-400 flex gap-2 mt-0.5">
|
||||
<span>{new Date(s.created_at).toLocaleDateString('de-DE', { day: '2-digit', month: '2-digit', year: '2-digit', hour: '2-digit', minute: '2-digit' })}</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Category Badge */}
|
||||
<div className="flex flex-col gap-1 items-end flex-shrink-0" onClick={(e) => e.stopPropagation()}>
|
||||
<button
|
||||
onClick={() => setEditingCategory(editingCategory === s.id ? null : s.id)}
|
||||
className={`text-[10px] px-1.5 py-0.5 rounded-full border transition-colors ${
|
||||
catInfo
|
||||
? 'bg-teal-50 dark:bg-teal-900/30 border-teal-200 dark:border-teal-700 text-teal-700 dark:text-teal-300'
|
||||
: 'bg-gray-50 dark:bg-gray-700 border-gray-200 dark:border-gray-600 text-gray-400 hover:text-gray-600 dark:hover:text-gray-300'
|
||||
}`}
|
||||
title="Kategorie setzen"
|
||||
>
|
||||
{catInfo ? `${catInfo.icon} ${catInfo.label}` : '+ Kategorie'}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Actions */}
|
||||
<div className="flex flex-col gap-0.5 flex-shrink-0">
|
||||
<button
|
||||
onClick={(e) => {
|
||||
e.stopPropagation()
|
||||
setEditNameValue(s.name || s.filename)
|
||||
setEditingName(s.id)
|
||||
}}
|
||||
className="p-1 text-gray-400 hover:text-gray-600 dark:hover:text-gray-300"
|
||||
title="Umbenennen"
|
||||
>
|
||||
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" d="M15.232 5.232l3.536 3.536m-2.036-5.036a2.5 2.5 0 113.536 3.536L6.5 21.036H3v-3.572L16.732 3.732z" />
|
||||
</svg>
|
||||
</button>
|
||||
<button
|
||||
onClick={(e) => {
|
||||
e.stopPropagation()
|
||||
if (confirm('Session loeschen?')) deleteSession(s.id)
|
||||
}}
|
||||
className="p-1 text-gray-400 hover:text-red-500"
|
||||
title="Loeschen"
|
||||
>
|
||||
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16" />
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Category dropdown */}
|
||||
{editingCategory === s.id && (
|
||||
<div
|
||||
className="absolute right-0 top-full mt-1 z-20 bg-white dark:bg-gray-800 border border-gray-200 dark:border-gray-700 rounded-lg shadow-lg p-2 grid grid-cols-2 gap-1 w-64"
|
||||
onClick={(e) => e.stopPropagation()}
|
||||
>
|
||||
{DOCUMENT_CATEGORIES.map((cat) => (
|
||||
<button
|
||||
key={cat.value}
|
||||
onClick={() => updateCategory(s.id, cat.value)}
|
||||
className={`text-xs px-2 py-1.5 rounded-md text-left transition-colors ${
|
||||
s.document_category === cat.value
|
||||
? 'bg-teal-100 dark:bg-teal-900/40 text-teal-700 dark:text-teal-300'
|
||||
: 'hover:bg-gray-100 dark:hover:bg-gray-700 text-gray-600 dark:text-gray-400'
|
||||
}`}
|
||||
>
|
||||
{cat.icon} {cat.label}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Active session info */}
|
||||
{sessionId && sessionName && (
|
||||
<div className="flex items-center gap-3 text-sm text-gray-500 dark:text-gray-400">
|
||||
<span>Aktive Session: <span className="font-medium text-gray-700 dark:text-gray-300">{sessionName}</span></span>
|
||||
{activeCategory && (() => {
|
||||
const cat = DOCUMENT_CATEGORIES.find(c => c.value === activeCategory)
|
||||
return cat ? <span className="text-xs px-2 py-0.5 rounded-full bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700 text-teal-700 dark:text-teal-300">{cat.icon} {cat.label}</span> : null
|
||||
})()}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Mode Toggle */}
|
||||
<div className="flex items-center gap-1 bg-gray-100 dark:bg-gray-800 rounded-lg p-1 w-fit">
|
||||
<button
|
||||
onClick={() => {
|
||||
if (mode === 'pipeline') return
|
||||
setMode('pipeline')
|
||||
setCurrentStep(0)
|
||||
setSessionId(null)
|
||||
setSteps(OVERLAY_PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||
}}
|
||||
className={`px-3 py-1.5 text-xs font-medium rounded-md transition-colors ${
|
||||
mode === 'pipeline'
|
||||
? 'bg-white dark:bg-gray-700 text-gray-700 dark:text-gray-200 shadow-sm'
|
||||
: 'text-gray-500 dark:text-gray-400 hover:text-gray-700 dark:hover:text-gray-300'
|
||||
}`}
|
||||
>
|
||||
Pipeline (7 Schritte)
|
||||
</button>
|
||||
<button
|
||||
onClick={() => {
|
||||
if (mode === 'paddle-direct') return
|
||||
setMode('paddle-direct')
|
||||
setCurrentStep(0)
|
||||
setSessionId(null)
|
||||
setSteps(PADDLE_DIRECT_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||
}}
|
||||
className={`px-3 py-1.5 text-xs font-medium rounded-md transition-colors ${
|
||||
mode === 'paddle-direct'
|
||||
? 'bg-white dark:bg-gray-700 text-gray-700 dark:text-gray-200 shadow-sm'
|
||||
: 'text-gray-500 dark:text-gray-400 hover:text-gray-700 dark:hover:text-gray-300'
|
||||
}`}
|
||||
>
|
||||
PP-OCRv5 Direct (5 Schritte)
|
||||
</button>
|
||||
<button
|
||||
onClick={() => {
|
||||
if (mode === 'kombi') return
|
||||
setMode('kombi')
|
||||
setCurrentStep(0)
|
||||
setSessionId(null)
|
||||
setSteps(KOMBI_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||
}}
|
||||
className={`px-3 py-1.5 text-xs font-medium rounded-md transition-colors ${
|
||||
mode === 'kombi'
|
||||
? 'bg-white dark:bg-gray-700 text-gray-700 dark:text-gray-200 shadow-sm'
|
||||
: 'text-gray-500 dark:text-gray-400 hover:text-gray-700 dark:hover:text-gray-300'
|
||||
}`}
|
||||
>
|
||||
Kombi (7 Schritte)
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<PipelineStepper
|
||||
steps={steps}
|
||||
currentStep={currentStep}
|
||||
onStepClick={handleStepClick}
|
||||
onReprocess={mode === 'pipeline' && sessionId != null ? reprocessFromStep : undefined}
|
||||
/>
|
||||
|
||||
<div className="min-h-[400px]">{renderStep()}</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -1,87 +0,0 @@
|
||||
import type { PipelineStep } from '../ocr-pipeline/types'
|
||||
|
||||
// Re-export types used by overlay components
|
||||
export type {
|
||||
PipelineStep,
|
||||
PipelineStepStatus,
|
||||
SessionListItem,
|
||||
SessionInfo,
|
||||
DocumentCategory,
|
||||
DocumentTypeResult,
|
||||
OrientationResult,
|
||||
CropResult,
|
||||
DeskewResult,
|
||||
DewarpResult,
|
||||
RowResult,
|
||||
RowItem,
|
||||
GridResult,
|
||||
GridCell,
|
||||
OcrWordBox,
|
||||
WordBbox,
|
||||
ColumnMeta,
|
||||
} from '../ocr-pipeline/types'
|
||||
|
||||
export { DOCUMENT_CATEGORIES } from '../ocr-pipeline/types'
|
||||
|
||||
/**
|
||||
* 7-step pipeline for full-page overlay reconstruction.
|
||||
* Skips: Spalten (columns), LLM-Review (Korrektur), Ground-Truth (Validierung)
|
||||
*/
|
||||
export const OVERLAY_PIPELINE_STEPS: PipelineStep[] = [
|
||||
{ id: 'orientation', name: 'Orientierung', icon: '🔄', status: 'pending' },
|
||||
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
|
||||
{ id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
|
||||
{ id: 'crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' },
|
||||
{ id: 'rows', name: 'Zeilen', icon: '📏', status: 'pending' },
|
||||
{ id: 'words', name: 'Woerter', icon: '🔤', status: 'pending' },
|
||||
{ id: 'reconstruction', name: 'Overlay', icon: '🏗️', status: 'pending' },
|
||||
]
|
||||
|
||||
/** Map from overlay UI step index to DB step number (1-indexed) */
|
||||
export const OVERLAY_UI_TO_DB: Record<number, number> = {
|
||||
0: 2, // orientation
|
||||
1: 3, // deskew
|
||||
2: 4, // dewarp
|
||||
3: 5, // crop
|
||||
4: 6, // rows (skip columns=6 in DB, rows=7 — but we reuse DB step numbering)
|
||||
5: 7, // words
|
||||
6: 9, // reconstruction
|
||||
}
|
||||
|
||||
/**
|
||||
* 5-step pipeline for Paddle Direct mode.
|
||||
* Same preprocessing (orient/deskew/dewarp/crop), then PaddleOCR replaces rows+words+overlay.
|
||||
*/
|
||||
export const PADDLE_DIRECT_STEPS: PipelineStep[] = [
|
||||
{ id: 'orientation', name: 'Orientierung', icon: '🔄', status: 'pending' },
|
||||
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
|
||||
{ id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
|
||||
{ id: 'crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' },
|
||||
{ id: 'paddle-direct', name: 'PP-OCRv5 + Overlay', icon: '⚡', status: 'pending' },
|
||||
]
|
||||
|
||||
/**
|
||||
* 5-step pipeline for Kombi mode (PP-OCRv5 + Tesseract).
|
||||
* Same preprocessing, then both engines run and results are merged.
|
||||
*/
|
||||
export const KOMBI_STEPS: PipelineStep[] = [
|
||||
{ id: 'orientation', name: 'Orientierung', icon: '🔄', status: 'pending' },
|
||||
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
|
||||
{ id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
|
||||
{ id: 'crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' },
|
||||
{ id: 'kombi', name: 'PP-OCRv5 + Tesseract', icon: '🔀', status: 'pending' },
|
||||
{ id: 'structure', name: 'Struktur', icon: '🔍', status: 'pending' },
|
||||
{ id: 'grid-editor', name: 'Tabelle', icon: '📊', status: 'pending' },
|
||||
]
|
||||
|
||||
/** Map from DB step to overlay UI step index */
|
||||
export function dbStepToOverlayUi(dbStep: number): number {
|
||||
// DB: 1=start, 2=orient, 3=deskew, 4=dewarp, 5=crop, 6=columns, 7=rows, 8=words, 9=recon, 10=gt
|
||||
if (dbStep <= 2) return 0 // orientation
|
||||
if (dbStep === 3) return 1 // deskew
|
||||
if (dbStep === 4) return 2 // dewarp
|
||||
if (dbStep === 5) return 3 // crop
|
||||
if (dbStep <= 7) return 4 // rows (skip columns)
|
||||
if (dbStep === 8) return 5 // words
|
||||
return 6 // reconstruction
|
||||
}
|
||||
@@ -1,624 +0,0 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useState } from 'react'
|
||||
import { PagePurpose } from '@/components/common/PagePurpose'
|
||||
import { PipelineStepper } from '@/components/ocr-pipeline/PipelineStepper'
|
||||
import { StepOrientation } from '@/components/ocr-pipeline/StepOrientation'
|
||||
import { StepCrop } from '@/components/ocr-pipeline/StepCrop'
|
||||
import { StepDeskew } from '@/components/ocr-pipeline/StepDeskew'
|
||||
import { StepDewarp } from '@/components/ocr-pipeline/StepDewarp'
|
||||
import { StepStructureDetection } from '@/components/ocr-pipeline/StepStructureDetection'
|
||||
import { StepColumnDetection } from '@/components/ocr-pipeline/StepColumnDetection'
|
||||
import { StepRowDetection } from '@/components/ocr-pipeline/StepRowDetection'
|
||||
import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition'
|
||||
import { StepLlmReview } from '@/components/ocr-pipeline/StepLlmReview'
|
||||
import { StepReconstruction } from '@/components/ocr-pipeline/StepReconstruction'
|
||||
import { StepGroundTruth } from '@/components/ocr-pipeline/StepGroundTruth'
|
||||
import { BoxSessionTabs } from '@/components/ocr-pipeline/BoxSessionTabs'
|
||||
import { PIPELINE_STEPS, DOCUMENT_CATEGORIES, type PipelineStep, type SessionListItem, type DocumentTypeResult, type DocumentCategory, type SubSession } from './types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
export default function OcrPipelinePage() {
|
||||
const [currentStep, setCurrentStep] = useState(0)
|
||||
const [sessionId, setSessionId] = useState<string | null>(null)
|
||||
const [sessionName, setSessionName] = useState<string>('')
|
||||
const [sessions, setSessions] = useState<SessionListItem[]>([])
|
||||
const [loadingSessions, setLoadingSessions] = useState(true)
|
||||
const [editingName, setEditingName] = useState<string | null>(null)
|
||||
const [editNameValue, setEditNameValue] = useState('')
|
||||
const [editingCategory, setEditingCategory] = useState<string | null>(null)
|
||||
const [docTypeResult, setDocTypeResult] = useState<DocumentTypeResult | null>(null)
|
||||
const [activeCategory, setActiveCategory] = useState<DocumentCategory | undefined>(undefined)
|
||||
const [subSessions, setSubSessions] = useState<SubSession[]>([])
|
||||
const [parentSessionId, setParentSessionId] = useState<string | null>(null)
|
||||
const [steps, setSteps] = useState<PipelineStep[]>(
|
||||
PIPELINE_STEPS.map((s, i) => ({
|
||||
...s,
|
||||
status: i === 0 ? 'active' : 'pending',
|
||||
})),
|
||||
)
|
||||
|
||||
// Load session list on mount
|
||||
useEffect(() => {
|
||||
loadSessions()
|
||||
}, [])
|
||||
|
||||
const loadSessions = async () => {
|
||||
setLoadingSessions(true)
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`)
|
||||
if (res.ok) {
|
||||
const data = await res.json()
|
||||
setSessions(data.sessions || [])
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to load sessions:', e)
|
||||
} finally {
|
||||
setLoadingSessions(false)
|
||||
}
|
||||
}
|
||||
|
||||
const openSession = useCallback(async (sid: string, keepSubSessions?: boolean) => {
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`)
|
||||
if (!res.ok) return
|
||||
const data = await res.json()
|
||||
|
||||
setSessionId(sid)
|
||||
setSessionName(data.name || data.filename || '')
|
||||
setActiveCategory(data.document_category || undefined)
|
||||
|
||||
// Sub-session handling
|
||||
if (data.sub_sessions && data.sub_sessions.length > 0) {
|
||||
setSubSessions(data.sub_sessions)
|
||||
setParentSessionId(sid)
|
||||
} else if (data.parent_session_id) {
|
||||
// This is a sub-session — keep parent info but don't reset sub-session list
|
||||
setParentSessionId(data.parent_session_id)
|
||||
} else if (!keepSubSessions) {
|
||||
setSubSessions([])
|
||||
setParentSessionId(null)
|
||||
}
|
||||
|
||||
// Restore doc type result if available
|
||||
const savedDocType: DocumentTypeResult | null = data.doc_type_result || null
|
||||
setDocTypeResult(savedDocType)
|
||||
|
||||
// Determine which step to jump to based on current_step
|
||||
const dbStep = data.current_step || 1
|
||||
// DB steps: 1=start, 2=orientation, 3=deskew, 4=dewarp, 5=crop, 6=columns, ...
|
||||
// UI steps are 0-indexed: 0=orientation, 1=deskew, 2=dewarp, 3=crop, 4=columns, ...
|
||||
let uiStep = Math.max(0, dbStep - 1)
|
||||
const skipSteps = [...(savedDocType?.skip_steps || [])]
|
||||
|
||||
// Sub-sessions: image is already cropped, skip pre-processing steps
|
||||
// Jump directly to columns (UI step 4) unless already further ahead
|
||||
const isSubSession = !!data.parent_session_id
|
||||
const SUB_SESSION_SKIP = ['orientation', 'deskew', 'dewarp', 'crop']
|
||||
if (isSubSession) {
|
||||
for (const s of SUB_SESSION_SKIP) {
|
||||
if (!skipSteps.includes(s)) skipSteps.push(s)
|
||||
}
|
||||
if (uiStep < 4) uiStep = 4 // columns step (index 4)
|
||||
}
|
||||
|
||||
setSteps(
|
||||
PIPELINE_STEPS.map((s, i) => ({
|
||||
...s,
|
||||
status: skipSteps.includes(s.id)
|
||||
? 'skipped'
|
||||
: i < uiStep ? 'completed' : i === uiStep ? 'active' : 'pending',
|
||||
})),
|
||||
)
|
||||
setCurrentStep(uiStep)
|
||||
} catch (e) {
|
||||
console.error('Failed to open session:', e)
|
||||
}
|
||||
}, [])
|
||||
|
||||
const deleteSession = useCallback(async (sid: string) => {
|
||||
try {
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, { method: 'DELETE' })
|
||||
setSessions((prev) => prev.filter((s) => s.id !== sid))
|
||||
if (sessionId === sid) {
|
||||
setSessionId(null)
|
||||
setCurrentStep(0)
|
||||
setDocTypeResult(null)
|
||||
setSubSessions([])
|
||||
setParentSessionId(null)
|
||||
setSteps(PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to delete session:', e)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
const renameSession = useCallback(async (sid: string, newName: string) => {
|
||||
try {
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, {
|
||||
method: 'PUT',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ name: newName }),
|
||||
})
|
||||
setSessions((prev) => prev.map((s) => (s.id === sid ? { ...s, name: newName } : s)))
|
||||
if (sessionId === sid) setSessionName(newName)
|
||||
} catch (e) {
|
||||
console.error('Failed to rename session:', e)
|
||||
}
|
||||
setEditingName(null)
|
||||
}, [sessionId])
|
||||
|
||||
const updateCategory = useCallback(async (sid: string, category: DocumentCategory) => {
|
||||
try {
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, {
|
||||
method: 'PUT',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ document_category: category }),
|
||||
})
|
||||
setSessions((prev) => prev.map((s) => (s.id === sid ? { ...s, document_category: category } : s)))
|
||||
if (sessionId === sid) setActiveCategory(category)
|
||||
} catch (e) {
|
||||
console.error('Failed to update category:', e)
|
||||
}
|
||||
setEditingCategory(null)
|
||||
}, [sessionId])
|
||||
|
||||
const deleteAllSessions = useCallback(async () => {
|
||||
if (!confirm('Alle Sessions loeschen? Dies kann nicht rueckgaengig gemacht werden.')) return
|
||||
try {
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`, { method: 'DELETE' })
|
||||
setSessions([])
|
||||
setSessionId(null)
|
||||
setCurrentStep(0)
|
||||
setDocTypeResult(null)
|
||||
setActiveCategory(undefined)
|
||||
setSubSessions([])
|
||||
setParentSessionId(null)
|
||||
setSteps(PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||
} catch (e) {
|
||||
console.error('Failed to delete all sessions:', e)
|
||||
}
|
||||
}, [])
|
||||
|
||||
const handleStepClick = (index: number) => {
|
||||
if (index <= currentStep || steps[index].status === 'completed') {
|
||||
setCurrentStep(index)
|
||||
}
|
||||
}
|
||||
|
||||
const goToStep = (step: number) => {
|
||||
setCurrentStep(step)
|
||||
setSteps((prev) =>
|
||||
prev.map((s, i) => ({
|
||||
...s,
|
||||
status: i < step ? 'completed' : i === step ? 'active' : 'pending',
|
||||
})),
|
||||
)
|
||||
}
|
||||
|
||||
const handleNext = () => {
|
||||
if (currentStep >= steps.length - 1) {
|
||||
// Last step completed
|
||||
if (parentSessionId && sessionId !== parentSessionId) {
|
||||
// Sub-session completed — update its status and stay in tab view
|
||||
setSubSessions((prev) =>
|
||||
prev.map((s) => s.id === sessionId ? { ...s, status: 'completed', current_step: 10 } : s)
|
||||
)
|
||||
// Switch back to parent
|
||||
handleSessionChange(parentSessionId)
|
||||
return
|
||||
}
|
||||
// Main session: return to session list
|
||||
setSteps(PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||
setCurrentStep(0)
|
||||
setSessionId(null)
|
||||
setSubSessions([])
|
||||
setParentSessionId(null)
|
||||
loadSessions()
|
||||
return
|
||||
}
|
||||
|
||||
// Find the next non-skipped step
|
||||
const skipSteps = docTypeResult?.skip_steps || []
|
||||
let nextStep = currentStep + 1
|
||||
while (nextStep < steps.length && skipSteps.includes(PIPELINE_STEPS[nextStep]?.id)) {
|
||||
nextStep++
|
||||
}
|
||||
if (nextStep >= steps.length) nextStep = steps.length - 1
|
||||
|
||||
setSteps((prev) =>
|
||||
prev.map((s, i) => {
|
||||
if (i === currentStep) return { ...s, status: 'completed' }
|
||||
if (i === nextStep) return { ...s, status: 'active' }
|
||||
// Mark skipped steps between current and next
|
||||
if (i > currentStep && i < nextStep && skipSteps.includes(PIPELINE_STEPS[i]?.id)) {
|
||||
return { ...s, status: 'skipped' }
|
||||
}
|
||||
return s
|
||||
}),
|
||||
)
|
||||
setCurrentStep(nextStep)
|
||||
}
|
||||
|
||||
const handleOrientationComplete = (sid: string) => {
|
||||
setSessionId(sid)
|
||||
// Reload session list to show the new session
|
||||
loadSessions()
|
||||
handleNext()
|
||||
}
|
||||
|
||||
const handleCropNext = async () => {
|
||||
// Auto-detect document type after crop (last image-processing step), then advance
|
||||
if (sessionId) {
|
||||
try {
|
||||
const res = await fetch(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/detect-type`,
|
||||
{ method: 'POST' },
|
||||
)
|
||||
if (res.ok) {
|
||||
const data: DocumentTypeResult = await res.json()
|
||||
setDocTypeResult(data)
|
||||
|
||||
// Mark skipped steps immediately
|
||||
const skipSteps = data.skip_steps || []
|
||||
if (skipSteps.length > 0) {
|
||||
setSteps((prev) =>
|
||||
prev.map((s) =>
|
||||
skipSteps.includes(s.id) ? { ...s, status: 'skipped' } : s,
|
||||
),
|
||||
)
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Doc type detection failed:', e)
|
||||
// Not critical — continue without it
|
||||
}
|
||||
}
|
||||
handleNext()
|
||||
}
|
||||
|
||||
const handleDocTypeChange = (newDocType: DocumentTypeResult['doc_type']) => {
|
||||
if (!docTypeResult) return
|
||||
|
||||
// Build new skip_steps based on doc type
|
||||
let skipSteps: string[] = []
|
||||
if (newDocType === 'full_text') {
|
||||
skipSteps = ['columns', 'rows']
|
||||
}
|
||||
// vocab_table and generic_table: no skips
|
||||
|
||||
const updated: DocumentTypeResult = {
|
||||
...docTypeResult,
|
||||
doc_type: newDocType,
|
||||
skip_steps: skipSteps,
|
||||
pipeline: newDocType === 'full_text' ? 'full_page' : 'cell_first',
|
||||
}
|
||||
setDocTypeResult(updated)
|
||||
|
||||
// Update step statuses
|
||||
setSteps((prev) =>
|
||||
prev.map((s) => {
|
||||
if (skipSteps.includes(s.id)) return { ...s, status: 'skipped' as const }
|
||||
if (s.status === 'skipped') return { ...s, status: 'pending' as const }
|
||||
return s
|
||||
}),
|
||||
)
|
||||
}
|
||||
|
||||
const handleNewSession = () => {
|
||||
setSessionId(null)
|
||||
setSessionName('')
|
||||
setCurrentStep(0)
|
||||
setDocTypeResult(null)
|
||||
setSubSessions([])
|
||||
setParentSessionId(null)
|
||||
setSteps(PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||
}
|
||||
|
||||
const handleSessionChange = useCallback((newSessionId: string) => {
|
||||
openSession(newSessionId, true)
|
||||
}, [openSession])
|
||||
|
||||
const handleBoxSessionsCreated = useCallback((subs: SubSession[]) => {
|
||||
setSubSessions(subs)
|
||||
if (sessionId) setParentSessionId(sessionId)
|
||||
}, [sessionId])
|
||||
|
||||
const stepNames: Record<number, string> = {
|
||||
1: 'Orientierung',
|
||||
2: 'Begradigung',
|
||||
3: 'Entzerrung',
|
||||
4: 'Zuschneiden',
|
||||
5: 'Spalten',
|
||||
6: 'Zeilen',
|
||||
7: 'Woerter',
|
||||
8: 'Struktur',
|
||||
9: 'Korrektur',
|
||||
10: 'Rekonstruktion',
|
||||
11: 'Validierung',
|
||||
}
|
||||
|
||||
const reprocessFromStep = useCallback(async (uiStep: number) => {
|
||||
if (!sessionId) return
|
||||
const dbStep = uiStep + 1 // UI is 0-indexed, DB is 1-indexed
|
||||
if (!confirm(`Ab Schritt ${dbStep} (${stepNames[dbStep] || '?'}) neu verarbeiten? Nachfolgende Daten werden geloescht.`)) return
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reprocess`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ from_step: dbStep }),
|
||||
})
|
||||
if (!res.ok) {
|
||||
const data = await res.json().catch(() => ({}))
|
||||
console.error('Reprocess failed:', data.detail || res.status)
|
||||
return
|
||||
}
|
||||
// Reset UI steps
|
||||
goToStep(uiStep)
|
||||
} catch (e) {
|
||||
console.error('Reprocess error:', e)
|
||||
}
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [sessionId, goToStep])
|
||||
|
||||
const renderStep = () => {
|
||||
switch (currentStep) {
|
||||
case 0:
|
||||
return <StepOrientation sessionId={sessionId} onNext={handleOrientationComplete} />
|
||||
case 1:
|
||||
return <StepDeskew sessionId={sessionId} onNext={handleNext} />
|
||||
case 2:
|
||||
return <StepDewarp sessionId={sessionId} onNext={handleNext} />
|
||||
case 3:
|
||||
return <StepCrop sessionId={sessionId} onNext={handleCropNext} />
|
||||
case 4:
|
||||
return <StepColumnDetection sessionId={sessionId} onNext={handleNext} onBoxSessionsCreated={handleBoxSessionsCreated} />
|
||||
case 5:
|
||||
return <StepRowDetection sessionId={sessionId} onNext={handleNext} />
|
||||
case 6:
|
||||
return <StepWordRecognition sessionId={sessionId} onNext={handleNext} goToStep={goToStep} />
|
||||
case 7:
|
||||
return <StepStructureDetection sessionId={sessionId} onNext={handleNext} />
|
||||
case 8:
|
||||
return <StepLlmReview sessionId={sessionId} onNext={handleNext} />
|
||||
case 9:
|
||||
return <StepReconstruction sessionId={sessionId} onNext={handleNext} />
|
||||
case 10:
|
||||
return <StepGroundTruth sessionId={sessionId} onNext={handleNext} />
|
||||
default:
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
<PagePurpose
|
||||
title="OCR Pipeline"
|
||||
purpose="Schrittweise Seitenrekonstruktion: Scan begradigen, Spalten erkennen, Woerter lokalisieren und die Seite Wort fuer Wort nachbauen. Ziel: 10 Vokabelseiten fehlerfrei rekonstruieren."
|
||||
audience={['Entwickler', 'Data Scientists']}
|
||||
architecture={{
|
||||
services: ['klausur-service (FastAPI)', 'OpenCV', 'Tesseract'],
|
||||
databases: ['PostgreSQL Sessions'],
|
||||
}}
|
||||
relatedPages={[
|
||||
{ name: 'OCR Vergleich', href: '/ai/ocr-compare', description: 'Methoden-Vergleich' },
|
||||
{ name: 'OCR-Labeling', href: '/ai/ocr-labeling', description: 'Trainingsdaten' },
|
||||
]}
|
||||
defaultCollapsed
|
||||
/>
|
||||
|
||||
{/* Session List */}
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4">
|
||||
<div className="flex items-center justify-between mb-3">
|
||||
<h3 className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||
Sessions ({sessions.length})
|
||||
</h3>
|
||||
<div className="flex gap-2">
|
||||
{sessions.length > 0 && (
|
||||
<button
|
||||
onClick={deleteAllSessions}
|
||||
className="text-xs px-3 py-1.5 text-red-600 hover:bg-red-50 dark:hover:bg-red-900/20 rounded-lg transition-colors"
|
||||
title="Alle Sessions loeschen"
|
||||
>
|
||||
Alle loeschen
|
||||
</button>
|
||||
)}
|
||||
<button
|
||||
onClick={handleNewSession}
|
||||
className="text-xs px-3 py-1.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors"
|
||||
>
|
||||
+ Neue Session
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{loadingSessions ? (
|
||||
<div className="text-sm text-gray-400 py-2">Lade Sessions...</div>
|
||||
) : sessions.length === 0 ? (
|
||||
<div className="text-sm text-gray-400 py-2">Noch keine Sessions vorhanden.</div>
|
||||
) : (
|
||||
<div className="space-y-1.5 max-h-[320px] overflow-y-auto">
|
||||
{sessions.map((s) => {
|
||||
const catInfo = DOCUMENT_CATEGORIES.find(c => c.value === s.document_category)
|
||||
return (
|
||||
<div
|
||||
key={s.id}
|
||||
className={`relative flex items-start gap-3 px-3 py-2.5 rounded-lg text-sm transition-colors cursor-pointer ${
|
||||
sessionId === s.id
|
||||
? 'bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700'
|
||||
: 'hover:bg-gray-50 dark:hover:bg-gray-700/50'
|
||||
}`}
|
||||
>
|
||||
{/* Thumbnail */}
|
||||
<div
|
||||
className="flex-shrink-0 w-12 h-12 rounded-md overflow-hidden bg-gray-100 dark:bg-gray-700"
|
||||
onClick={() => openSession(s.id)}
|
||||
>
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
<img
|
||||
src={`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${s.id}/thumbnail?size=96`}
|
||||
alt=""
|
||||
className="w-full h-full object-cover"
|
||||
loading="lazy"
|
||||
onError={(e) => { (e.target as HTMLImageElement).style.display = 'none' }}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Info */}
|
||||
<div className="flex-1 min-w-0" onClick={() => openSession(s.id)}>
|
||||
{editingName === s.id ? (
|
||||
<input
|
||||
autoFocus
|
||||
value={editNameValue}
|
||||
onChange={(e) => setEditNameValue(e.target.value)}
|
||||
onBlur={() => renameSession(s.id, editNameValue)}
|
||||
onKeyDown={(e) => {
|
||||
if (e.key === 'Enter') renameSession(s.id, editNameValue)
|
||||
if (e.key === 'Escape') setEditingName(null)
|
||||
}}
|
||||
onClick={(e) => e.stopPropagation()}
|
||||
className="w-full px-1 py-0.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600"
|
||||
/>
|
||||
) : (
|
||||
<div className="truncate font-medium text-gray-700 dark:text-gray-300">
|
||||
{s.name || s.filename}
|
||||
</div>
|
||||
)}
|
||||
{/* ID row */}
|
||||
<button
|
||||
onClick={(e) => {
|
||||
e.stopPropagation()
|
||||
navigator.clipboard.writeText(s.id)
|
||||
const btn = e.currentTarget
|
||||
btn.textContent = 'Kopiert!'
|
||||
setTimeout(() => { btn.textContent = `ID: ${s.id.slice(0, 8)}` }, 1500)
|
||||
}}
|
||||
className="text-[10px] font-mono text-gray-400 hover:text-teal-500 transition-colors"
|
||||
title={`Volle ID: ${s.id} — Klick zum Kopieren`}
|
||||
>
|
||||
ID: {s.id.slice(0, 8)}
|
||||
</button>
|
||||
<div className="text-xs text-gray-400 flex gap-2 mt-0.5">
|
||||
<span>{new Date(s.created_at).toLocaleDateString('de-DE', { day: '2-digit', month: '2-digit', year: '2-digit', hour: '2-digit', minute: '2-digit' })}</span>
|
||||
<span>Schritt {s.current_step}: {stepNames[s.current_step] || '?'}</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Badges */}
|
||||
<div className="flex flex-col gap-1 items-end flex-shrink-0" onClick={(e) => e.stopPropagation()}>
|
||||
{/* Category Badge */}
|
||||
<button
|
||||
onClick={() => setEditingCategory(editingCategory === s.id ? null : s.id)}
|
||||
className={`text-[10px] px-1.5 py-0.5 rounded-full border transition-colors ${
|
||||
catInfo
|
||||
? 'bg-teal-50 dark:bg-teal-900/30 border-teal-200 dark:border-teal-700 text-teal-700 dark:text-teal-300'
|
||||
: 'bg-gray-50 dark:bg-gray-700 border-gray-200 dark:border-gray-600 text-gray-400 hover:text-gray-600 dark:hover:text-gray-300'
|
||||
}`}
|
||||
title="Kategorie setzen"
|
||||
>
|
||||
{catInfo ? `${catInfo.icon} ${catInfo.label}` : '+ Kategorie'}
|
||||
</button>
|
||||
{/* Doc Type Badge (read-only) */}
|
||||
{s.doc_type && (
|
||||
<span className="text-[10px] px-1.5 py-0.5 rounded-full bg-gray-100 dark:bg-gray-700 text-gray-500 dark:text-gray-400 border border-gray-200 dark:border-gray-600">
|
||||
{s.doc_type}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Action buttons */}
|
||||
<div className="flex flex-col gap-0.5 flex-shrink-0">
|
||||
<button
|
||||
onClick={(e) => {
|
||||
e.stopPropagation()
|
||||
setEditNameValue(s.name || s.filename)
|
||||
setEditingName(s.id)
|
||||
}}
|
||||
className="p-1 text-gray-400 hover:text-gray-600 dark:hover:text-gray-300"
|
||||
title="Umbenennen"
|
||||
>
|
||||
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" d="M15.232 5.232l3.536 3.536m-2.036-5.036a2.5 2.5 0 113.536 3.536L6.5 21.036H3v-3.572L16.732 3.732z" />
|
||||
</svg>
|
||||
</button>
|
||||
<button
|
||||
onClick={(e) => {
|
||||
e.stopPropagation()
|
||||
if (confirm('Session loeschen?')) deleteSession(s.id)
|
||||
}}
|
||||
className="p-1 text-gray-400 hover:text-red-500"
|
||||
title="Loeschen"
|
||||
>
|
||||
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16" />
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Category dropdown (inline) */}
|
||||
{editingCategory === s.id && (
|
||||
<div
|
||||
className="absolute right-0 top-full mt-1 z-20 bg-white dark:bg-gray-800 border border-gray-200 dark:border-gray-700 rounded-lg shadow-lg p-2 grid grid-cols-2 gap-1 w-64"
|
||||
onClick={(e) => e.stopPropagation()}
|
||||
>
|
||||
{DOCUMENT_CATEGORIES.map((cat) => (
|
||||
<button
|
||||
key={cat.value}
|
||||
onClick={() => updateCategory(s.id, cat.value)}
|
||||
className={`text-xs px-2 py-1.5 rounded-md text-left transition-colors ${
|
||||
s.document_category === cat.value
|
||||
? 'bg-teal-100 dark:bg-teal-900/40 text-teal-700 dark:text-teal-300'
|
||||
: 'hover:bg-gray-100 dark:hover:bg-gray-700 text-gray-600 dark:text-gray-400'
|
||||
}`}
|
||||
>
|
||||
{cat.icon} {cat.label}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Active session info */}
|
||||
{sessionId && sessionName && (
|
||||
<div className="flex items-center gap-3 text-sm text-gray-500 dark:text-gray-400">
|
||||
<span>Aktive Session: <span className="font-medium text-gray-700 dark:text-gray-300">{sessionName}</span></span>
|
||||
{activeCategory && (() => {
|
||||
const cat = DOCUMENT_CATEGORIES.find(c => c.value === activeCategory)
|
||||
return cat ? <span className="text-xs px-2 py-0.5 rounded-full bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700 text-teal-700 dark:text-teal-300">{cat.icon} {cat.label}</span> : null
|
||||
})()}
|
||||
{docTypeResult && (
|
||||
<span className="text-xs px-2 py-0.5 rounded-full bg-gray-100 dark:bg-gray-700 text-gray-500 dark:text-gray-400 border border-gray-200 dark:border-gray-600">
|
||||
{docTypeResult.doc_type}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
<PipelineStepper
|
||||
steps={steps}
|
||||
currentStep={currentStep}
|
||||
onStepClick={handleStepClick}
|
||||
onReprocess={sessionId ? reprocessFromStep : undefined}
|
||||
docTypeResult={docTypeResult}
|
||||
onDocTypeChange={handleDocTypeChange}
|
||||
/>
|
||||
|
||||
{subSessions.length > 0 && parentSessionId && sessionId && (
|
||||
<BoxSessionTabs
|
||||
parentSessionId={parentSessionId}
|
||||
subSessions={subSessions}
|
||||
activeSessionId={sessionId}
|
||||
onSessionChange={handleSessionChange}
|
||||
/>
|
||||
)}
|
||||
|
||||
<div className="min-h-[400px]">{renderStep()}</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -1,403 +0,0 @@
|
||||
export type PipelineStepStatus = 'pending' | 'active' | 'completed' | 'failed' | 'skipped'
|
||||
|
||||
export interface PipelineStep {
|
||||
id: string
|
||||
name: string
|
||||
icon: string
|
||||
status: PipelineStepStatus
|
||||
}
|
||||
|
||||
export type DocumentCategory =
|
||||
| 'vokabelseite' | 'buchseite' | 'arbeitsblatt' | 'klausurseite'
|
||||
| 'mathearbeit' | 'statistik' | 'zeitung' | 'formular' | 'handschrift' | 'sonstiges'
|
||||
|
||||
export const DOCUMENT_CATEGORIES: { value: DocumentCategory; label: string; icon: string }[] = [
|
||||
{ value: 'vokabelseite', label: 'Vokabelseite', icon: '📖' },
|
||||
{ value: 'buchseite', label: 'Buchseite', icon: '📚' },
|
||||
{ value: 'arbeitsblatt', label: 'Arbeitsblatt', icon: '📝' },
|
||||
{ value: 'klausurseite', label: 'Klausurseite', icon: '📄' },
|
||||
{ value: 'mathearbeit', label: 'Mathearbeit', icon: '🔢' },
|
||||
{ value: 'statistik', label: 'Statistik', icon: '📊' },
|
||||
{ value: 'zeitung', label: 'Zeitung', icon: '📰' },
|
||||
{ value: 'formular', label: 'Formular', icon: '📋' },
|
||||
{ value: 'handschrift', label: 'Handschrift', icon: '✍️' },
|
||||
{ value: 'sonstiges', label: 'Sonstiges', icon: '📎' },
|
||||
]
|
||||
|
||||
export interface SessionListItem {
|
||||
id: string
|
||||
name: string
|
||||
filename: string
|
||||
status: string
|
||||
current_step: number
|
||||
document_category?: DocumentCategory
|
||||
doc_type?: string
|
||||
created_at: string
|
||||
updated_at?: string
|
||||
parent_session_id?: string | null
|
||||
box_index?: number | null
|
||||
}
|
||||
|
||||
export interface SubSession {
|
||||
id: string
|
||||
name: string
|
||||
box_index: number
|
||||
current_step?: number
|
||||
status?: string
|
||||
}
|
||||
|
||||
export interface PipelineLogEntry {
|
||||
step: string
|
||||
completed_at: string
|
||||
success: boolean
|
||||
duration_ms?: number
|
||||
metrics: Record<string, unknown>
|
||||
}
|
||||
|
||||
export interface PipelineLog {
|
||||
steps: PipelineLogEntry[]
|
||||
}
|
||||
|
||||
export interface DocumentTypeResult {
|
||||
doc_type: 'vocab_table' | 'full_text' | 'generic_table'
|
||||
confidence: number
|
||||
pipeline: 'cell_first' | 'full_page'
|
||||
skip_steps: string[]
|
||||
features?: Record<string, unknown>
|
||||
duration_seconds?: number
|
||||
}
|
||||
|
||||
export interface OrientationResult {
|
||||
orientation_degrees: number
|
||||
corrected: boolean
|
||||
duration_seconds: number
|
||||
}
|
||||
|
||||
export interface CropResult {
|
||||
crop_applied: boolean
|
||||
crop_rect?: { x: number; y: number; width: number; height: number }
|
||||
crop_rect_pct?: { x: number; y: number; width: number; height: number }
|
||||
original_size: { width: number; height: number }
|
||||
cropped_size: { width: number; height: number }
|
||||
detected_format?: string
|
||||
format_confidence?: number
|
||||
aspect_ratio?: number
|
||||
border_fractions?: { top: number; bottom: number; left: number; right: number }
|
||||
skipped?: boolean
|
||||
duration_seconds?: number
|
||||
}
|
||||
|
||||
export interface SessionInfo {
|
||||
session_id: string
|
||||
filename: string
|
||||
name?: string
|
||||
image_width: number
|
||||
image_height: number
|
||||
original_image_url: string
|
||||
current_step?: number
|
||||
document_category?: DocumentCategory
|
||||
doc_type?: string
|
||||
orientation_result?: OrientationResult
|
||||
crop_result?: CropResult
|
||||
deskew_result?: DeskewResult
|
||||
dewarp_result?: DewarpResult
|
||||
column_result?: ColumnResult
|
||||
row_result?: RowResult
|
||||
word_result?: GridResult
|
||||
doc_type_result?: DocumentTypeResult
|
||||
sub_sessions?: SubSession[]
|
||||
parent_session_id?: string
|
||||
box_index?: number
|
||||
}
|
||||
|
||||
export interface DeskewResult {
|
||||
session_id: string
|
||||
angle_hough: number
|
||||
angle_word_alignment: number
|
||||
angle_iterative?: number
|
||||
angle_residual?: number
|
||||
angle_textline?: number
|
||||
angle_applied: number
|
||||
method_used: 'hough' | 'word_alignment' | 'manual' | 'iterative' | 'two_pass' | 'three_pass' | 'manual_combined'
|
||||
confidence: number
|
||||
duration_seconds: number
|
||||
deskewed_image_url: string
|
||||
binarized_image_url: string
|
||||
}
|
||||
|
||||
export interface DeskewGroundTruth {
|
||||
is_correct: boolean
|
||||
corrected_angle?: number
|
||||
notes?: string
|
||||
}
|
||||
|
||||
export interface DewarpDetection {
|
||||
method: string
|
||||
shear_degrees: number
|
||||
confidence: number
|
||||
}
|
||||
|
||||
export interface DewarpResult {
|
||||
session_id: string
|
||||
method_used: string
|
||||
shear_degrees: number
|
||||
confidence: number
|
||||
duration_seconds: number
|
||||
dewarped_image_url: string
|
||||
detections?: DewarpDetection[]
|
||||
}
|
||||
|
||||
export interface DewarpGroundTruth {
|
||||
is_correct: boolean
|
||||
corrected_shear?: number
|
||||
notes?: string
|
||||
}
|
||||
|
||||
export interface PageRegion {
|
||||
type: 'column_en' | 'column_de' | 'column_example' | 'page_ref'
|
||||
| 'column_marker' | 'column_text' | 'column_ignore' | 'header' | 'footer'
|
||||
x: number
|
||||
y: number
|
||||
width: number
|
||||
height: number
|
||||
classification_confidence?: number
|
||||
classification_method?: string
|
||||
}
|
||||
|
||||
export interface PageZone {
|
||||
zone_type: 'content' | 'box'
|
||||
y_start: number
|
||||
y_end: number
|
||||
box?: { x: number; y: number; width: number; height: number }
|
||||
}
|
||||
|
||||
export interface ColumnResult {
|
||||
columns: PageRegion[]
|
||||
duration_seconds: number
|
||||
zones?: PageZone[]
|
||||
}
|
||||
|
||||
export interface ColumnGroundTruth {
|
||||
is_correct: boolean
|
||||
corrected_columns?: PageRegion[]
|
||||
notes?: string
|
||||
}
|
||||
|
||||
export interface ManualColumnDivider {
|
||||
xPercent: number // Position in % of image width (0-100)
|
||||
}
|
||||
|
||||
export type ColumnTypeKey = PageRegion['type']
|
||||
|
||||
export interface RowResult {
|
||||
rows: RowItem[]
|
||||
summary: Record<string, number>
|
||||
total_rows: number
|
||||
duration_seconds: number
|
||||
}
|
||||
|
||||
export interface RowItem {
|
||||
index: number
|
||||
x: number
|
||||
y: number
|
||||
width: number
|
||||
height: number
|
||||
word_count: number
|
||||
row_type: 'content' | 'header' | 'footer'
|
||||
gap_before: number
|
||||
}
|
||||
|
||||
export interface RowGroundTruth {
|
||||
is_correct: boolean
|
||||
corrected_rows?: RowItem[]
|
||||
notes?: string
|
||||
}
|
||||
|
||||
export interface StructureGraphic {
|
||||
x: number
|
||||
y: number
|
||||
w: number
|
||||
h: number
|
||||
area: number
|
||||
shape: string // image, illustration
|
||||
color_name: string
|
||||
color_hex: string
|
||||
confidence: number
|
||||
}
|
||||
|
||||
export interface StructureResult {
|
||||
image_width: number
|
||||
image_height: number
|
||||
content_bounds: { x: number; y: number; w: number; h: number }
|
||||
boxes: StructureBox[]
|
||||
zones: StructureZone[]
|
||||
graphics: StructureGraphic[]
|
||||
color_pixel_counts: Record<string, number>
|
||||
has_words: boolean
|
||||
word_count: number
|
||||
border_ghosts_removed?: number
|
||||
duration_seconds: number
|
||||
}
|
||||
|
||||
export interface StructureBox {
|
||||
x: number
|
||||
y: number
|
||||
w: number
|
||||
h: number
|
||||
confidence: number
|
||||
border_thickness: number
|
||||
bg_color_name?: string
|
||||
bg_color_hex?: string
|
||||
}
|
||||
|
||||
export interface StructureZone {
|
||||
index: number
|
||||
zone_type: 'content' | 'box'
|
||||
x: number
|
||||
y: number
|
||||
w: number
|
||||
h: number
|
||||
}
|
||||
|
||||
export interface WordBbox {
|
||||
x: number
|
||||
y: number
|
||||
w: number
|
||||
h: number
|
||||
}
|
||||
|
||||
export interface OcrWordBox {
|
||||
text: string
|
||||
left: number // absolute image x in px
|
||||
top: number // absolute image y in px
|
||||
width: number // px
|
||||
height: number // px
|
||||
conf: number
|
||||
color?: string // hex color of detected text, e.g. '#dc2626'
|
||||
color_name?: string // 'black' | 'red' | 'blue' | 'green' | 'orange' | 'purple' | 'yellow'
|
||||
recovered?: boolean // true if this word was recovered via color detection
|
||||
}
|
||||
|
||||
export interface GridCell {
|
||||
cell_id: string // "R03_C1"
|
||||
row_index: number
|
||||
col_index: number
|
||||
col_type: string
|
||||
text: string
|
||||
confidence: number
|
||||
bbox_px: WordBbox
|
||||
bbox_pct: WordBbox
|
||||
ocr_engine?: string
|
||||
is_bold?: boolean
|
||||
status?: 'pending' | 'confirmed' | 'edited' | 'skipped'
|
||||
word_boxes?: OcrWordBox[] // per-word bounding boxes from OCR engine
|
||||
}
|
||||
|
||||
export interface ColumnMeta {
|
||||
index: number
|
||||
type: string
|
||||
x: number
|
||||
width: number
|
||||
}
|
||||
|
||||
export interface GridResult {
|
||||
cells: GridCell[]
|
||||
grid_shape: { rows: number; cols: number; total_cells: number }
|
||||
columns_used: ColumnMeta[]
|
||||
layout: 'vocab' | 'generic'
|
||||
image_width: number
|
||||
image_height: number
|
||||
duration_seconds: number
|
||||
ocr_engine?: string
|
||||
vocab_entries?: WordEntry[] // Only when layout='vocab'
|
||||
entries?: WordEntry[] // Backwards compat alias for vocab_entries
|
||||
entry_count?: number
|
||||
summary: {
|
||||
total_cells: number
|
||||
non_empty_cells: number
|
||||
low_confidence: number
|
||||
// Only when layout='vocab':
|
||||
total_entries?: number
|
||||
with_english?: number
|
||||
with_german?: number
|
||||
}
|
||||
llm_review?: {
|
||||
changes: { row_index: number; field: string; old: string; new: string }[]
|
||||
model_used: string
|
||||
duration_ms: number
|
||||
entries_corrected: number
|
||||
applied_count?: number
|
||||
applied_at?: string
|
||||
}
|
||||
}
|
||||
|
||||
export interface WordEntry {
|
||||
row_index: number
|
||||
english: string
|
||||
german: string
|
||||
example: string
|
||||
source_page?: string
|
||||
marker?: string
|
||||
confidence: number
|
||||
bbox: WordBbox
|
||||
bbox_en: WordBbox | null
|
||||
bbox_de: WordBbox | null
|
||||
bbox_ex: WordBbox | null
|
||||
bbox_ref?: WordBbox | null
|
||||
bbox_marker?: WordBbox | null
|
||||
status?: 'pending' | 'confirmed' | 'edited' | 'skipped'
|
||||
}
|
||||
|
||||
/** @deprecated Use GridResult instead */
|
||||
export interface WordResult {
|
||||
entries: WordEntry[]
|
||||
entry_count: number
|
||||
image_width: number
|
||||
image_height: number
|
||||
duration_seconds: number
|
||||
ocr_engine?: string
|
||||
summary: {
|
||||
total_entries: number
|
||||
with_english: number
|
||||
with_german: number
|
||||
low_confidence: number
|
||||
}
|
||||
}
|
||||
|
||||
export interface WordGroundTruth {
|
||||
is_correct: boolean
|
||||
corrected_entries?: WordEntry[]
|
||||
notes?: string
|
||||
}
|
||||
|
||||
export interface ImageRegion {
|
||||
bbox_pct: { x: number; y: number; w: number; h: number }
|
||||
prompt: string
|
||||
description: string
|
||||
image_b64: string | null
|
||||
style: 'educational' | 'cartoon' | 'sketch' | 'clipart' | 'realistic'
|
||||
}
|
||||
|
||||
export type ImageStyle = ImageRegion['style']
|
||||
|
||||
export const IMAGE_STYLES: { value: ImageStyle; label: string }[] = [
|
||||
{ value: 'educational', label: 'Lehrbuch' },
|
||||
{ value: 'cartoon', label: 'Cartoon' },
|
||||
{ value: 'sketch', label: 'Skizze' },
|
||||
{ value: 'clipart', label: 'Clipart' },
|
||||
{ value: 'realistic', label: 'Realistisch' },
|
||||
]
|
||||
|
||||
export const PIPELINE_STEPS: PipelineStep[] = [
|
||||
{ id: 'orientation', name: 'Orientierung', icon: '🔄', status: 'pending' },
|
||||
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
|
||||
{ id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
|
||||
{ id: 'crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' },
|
||||
{ id: 'columns', name: 'Spalten', icon: '📊', status: 'pending' },
|
||||
{ id: 'rows', name: 'Zeilen', icon: '📏', status: 'pending' },
|
||||
{ id: 'words', name: 'Woerter', icon: '🔤', status: 'pending' },
|
||||
{ id: 'structure', name: 'Struktur', icon: '🔍', status: 'pending' },
|
||||
{ id: 'llm-review', name: 'Korrektur', icon: '✏️', status: 'pending' },
|
||||
{ id: 'reconstruction', name: 'Rekonstruktion', icon: '🏗️', status: 'pending' },
|
||||
{ id: 'ground-truth', name: 'Validierung', icon: '✅', status: 'pending' },
|
||||
]
|
||||
@@ -1,675 +0,0 @@
|
||||
'use client'
|
||||
|
||||
import React, { useState, useEffect, useCallback, useRef } from 'react'
|
||||
import { RAG_PDF_MAPPING } from './rag-pdf-mapping'
|
||||
import { REGULATIONS_IN_RAG, REGULATION_INFO } from '../rag-constants'
|
||||
|
||||
interface ChunkBrowserQAProps {
|
||||
apiProxy: string
|
||||
}
|
||||
|
||||
type RegGroupKey = 'eu_regulation' | 'eu_directive' | 'de_law' | 'at_law' | 'ch_law' | 'national_law' | 'bsi_standard' | 'eu_guideline' | 'international_standard' | 'other'
|
||||
|
||||
const GROUP_LABELS: Record<RegGroupKey, string> = {
|
||||
eu_regulation: 'EU Verordnungen',
|
||||
eu_directive: 'EU Richtlinien',
|
||||
de_law: 'DE Gesetze',
|
||||
at_law: 'AT Gesetze',
|
||||
ch_law: 'CH Gesetze',
|
||||
national_law: 'Nationale Gesetze (EU)',
|
||||
bsi_standard: 'BSI Standards',
|
||||
eu_guideline: 'EDPB / Guidelines',
|
||||
international_standard: 'Internationale Standards',
|
||||
other: 'Sonstige',
|
||||
}
|
||||
|
||||
const GROUP_ORDER: RegGroupKey[] = [
|
||||
'eu_regulation', 'eu_directive', 'de_law', 'at_law', 'ch_law',
|
||||
'national_law', 'bsi_standard', 'eu_guideline', 'international_standard', 'other',
|
||||
]
|
||||
|
||||
const COLLECTIONS = [
|
||||
'bp_compliance_gesetze',
|
||||
'bp_compliance_ce',
|
||||
'bp_compliance_datenschutz',
|
||||
'bp_dsfa_corpus',
|
||||
'bp_compliance_recht',
|
||||
'bp_legal_templates',
|
||||
'bp_nibis_eh',
|
||||
]
|
||||
|
||||
export function ChunkBrowserQA({ apiProxy }: ChunkBrowserQAProps) {
|
||||
// Filter-Sidebar
|
||||
const [selectedRegulation, setSelectedRegulation] = useState<string | null>(null)
|
||||
const [regulationCounts, setRegulationCounts] = useState<Record<string, number>>({})
|
||||
const [filterSearch, setFilterSearch] = useState('')
|
||||
const [countsLoading, setCountsLoading] = useState(false)
|
||||
|
||||
// Dokument-Chunks (sequenziell)
|
||||
const [docChunks, setDocChunks] = useState<Record<string, unknown>[]>([])
|
||||
const [docChunkIndex, setDocChunkIndex] = useState(0)
|
||||
const [docTotalChunks, setDocTotalChunks] = useState(0)
|
||||
const [docLoading, setDocLoading] = useState(false)
|
||||
const docChunksRef = useRef(docChunks)
|
||||
docChunksRef.current = docChunks
|
||||
|
||||
// Split-View
|
||||
const [splitViewActive, setSplitViewActive] = useState(true)
|
||||
const [chunksPerPage, setChunksPerPage] = useState(6)
|
||||
const [fullscreen, setFullscreen] = useState(false)
|
||||
|
||||
// Collection — default to bp_compliance_ce where we have PDFs downloaded
|
||||
const [collection, setCollection] = useState('bp_compliance_ce')
|
||||
|
||||
// PDF existence check
|
||||
const [pdfExists, setPdfExists] = useState<boolean | null>(null)
|
||||
|
||||
// Sidebar collapsed groups
|
||||
const [collapsedGroups, setCollapsedGroups] = useState<Set<string>>(new Set())
|
||||
|
||||
// Build grouped regulations for sidebar
|
||||
const regulationsInCollection = Object.entries(REGULATIONS_IN_RAG)
|
||||
.filter(([, info]) => info.collection === collection)
|
||||
.map(([code]) => code)
|
||||
|
||||
const groupedRegulations = React.useMemo(() => {
|
||||
const groups: Record<RegGroupKey, { code: string; name: string; type: string }[]> = {
|
||||
eu_regulation: [], eu_directive: [], de_law: [], at_law: [], ch_law: [],
|
||||
national_law: [], bsi_standard: [], eu_guideline: [], international_standard: [], other: [],
|
||||
}
|
||||
for (const code of regulationsInCollection) {
|
||||
const reg = REGULATION_INFO.find(r => r.code === code)
|
||||
const type = (reg?.type || 'other') as RegGroupKey
|
||||
const groupKey = type in groups ? type : 'other'
|
||||
groups[groupKey].push({
|
||||
code,
|
||||
name: reg?.name || code,
|
||||
type: reg?.type || 'unknown',
|
||||
})
|
||||
}
|
||||
return groups
|
||||
}, [regulationsInCollection.join(',')])
|
||||
|
||||
// Load regulation counts for current collection
|
||||
const loadRegulationCounts = useCallback(async (col: string) => {
|
||||
const entries = Object.entries(REGULATIONS_IN_RAG)
|
||||
.filter(([, info]) => info.collection === col && info.qdrant_id)
|
||||
if (entries.length === 0) return
|
||||
|
||||
// Build qdrant_id -> our_code mapping
|
||||
const qdrantIdToCode: Record<string, string[]> = {}
|
||||
for (const [code, info] of entries) {
|
||||
if (!qdrantIdToCode[info.qdrant_id]) qdrantIdToCode[info.qdrant_id] = []
|
||||
qdrantIdToCode[info.qdrant_id].push(code)
|
||||
}
|
||||
const uniqueQdrantIds = Object.keys(qdrantIdToCode)
|
||||
|
||||
setCountsLoading(true)
|
||||
try {
|
||||
const params = new URLSearchParams({
|
||||
action: 'regulation-counts-batch',
|
||||
collection: col,
|
||||
qdrant_ids: uniqueQdrantIds.join(','),
|
||||
})
|
||||
const res = await fetch(`${apiProxy}?${params}`)
|
||||
if (res.ok) {
|
||||
const data = await res.json()
|
||||
// Map qdrant_id counts back to our codes
|
||||
const mapped: Record<string, number> = {}
|
||||
for (const [qid, count] of Object.entries(data.counts as Record<string, number>)) {
|
||||
const codes = qdrantIdToCode[qid] || []
|
||||
for (const code of codes) {
|
||||
mapped[code] = count
|
||||
}
|
||||
}
|
||||
setRegulationCounts(prev => ({ ...prev, ...mapped }))
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Failed to load regulation counts:', error)
|
||||
} finally {
|
||||
setCountsLoading(false)
|
||||
}
|
||||
}, [apiProxy])
|
||||
|
||||
// Load all chunks for a regulation (paginated scroll)
|
||||
const loadDocumentChunks = useCallback(async (regulationCode: string) => {
|
||||
const ragInfo = REGULATIONS_IN_RAG[regulationCode]
|
||||
if (!ragInfo || !ragInfo.qdrant_id) return
|
||||
|
||||
setDocLoading(true)
|
||||
setDocChunks([])
|
||||
setDocChunkIndex(0)
|
||||
setDocTotalChunks(0)
|
||||
|
||||
const allChunks: Record<string, unknown>[] = []
|
||||
let offset: string | null = null
|
||||
|
||||
try {
|
||||
let safety = 0
|
||||
do {
|
||||
const params = new URLSearchParams({
|
||||
action: 'scroll',
|
||||
collection: ragInfo.collection,
|
||||
limit: '100',
|
||||
filter_key: 'regulation_id',
|
||||
filter_value: ragInfo.qdrant_id,
|
||||
})
|
||||
if (offset) params.append('offset', offset)
|
||||
|
||||
const res = await fetch(`${apiProxy}?${params}`)
|
||||
if (!res.ok) break
|
||||
|
||||
const data = await res.json()
|
||||
const chunks = data.chunks || []
|
||||
allChunks.push(...chunks)
|
||||
offset = data.next_offset || null
|
||||
safety++
|
||||
} while (offset && safety < 200)
|
||||
|
||||
// Sort by chunk_index
|
||||
allChunks.sort((a, b) => {
|
||||
const ai = Number(a.chunk_index ?? a.chunk_id ?? 0)
|
||||
const bi = Number(b.chunk_index ?? b.chunk_id ?? 0)
|
||||
return ai - bi
|
||||
})
|
||||
|
||||
setDocChunks(allChunks)
|
||||
setDocTotalChunks(allChunks.length)
|
||||
setDocChunkIndex(0)
|
||||
} catch (error) {
|
||||
console.error('Failed to load document chunks:', error)
|
||||
} finally {
|
||||
setDocLoading(false)
|
||||
}
|
||||
}, [apiProxy])
|
||||
|
||||
// Initial load
|
||||
useEffect(() => {
|
||||
loadRegulationCounts(collection)
|
||||
}, [collection, loadRegulationCounts])
|
||||
|
||||
// Current chunk
|
||||
const currentChunk = docChunks[docChunkIndex] || null
|
||||
const prevChunk = docChunkIndex > 0 ? docChunks[docChunkIndex - 1] : null
|
||||
const nextChunk = docChunkIndex < docChunks.length - 1 ? docChunks[docChunkIndex + 1] : null
|
||||
|
||||
// PDF page estimation — use pages metadata if available
|
||||
const estimatePdfPage = (chunk: Record<string, unknown> | null, chunkIdx: number): number => {
|
||||
if (chunk) {
|
||||
// Try pages array from payload (e.g. [7] or [7,8])
|
||||
const pages = chunk.pages as number[] | undefined
|
||||
if (Array.isArray(pages) && pages.length > 0) return pages[0]
|
||||
// Try page field
|
||||
const page = chunk.page as number | undefined
|
||||
if (typeof page === 'number' && page > 0) return page
|
||||
}
|
||||
const mapping = selectedRegulation ? RAG_PDF_MAPPING[selectedRegulation] : null
|
||||
const cpp = mapping?.chunksPerPage || chunksPerPage
|
||||
return Math.floor(chunkIdx / cpp) + 1
|
||||
}
|
||||
|
||||
const pdfPage = estimatePdfPage(currentChunk, docChunkIndex)
|
||||
const pdfMapping = selectedRegulation ? RAG_PDF_MAPPING[selectedRegulation] : null
|
||||
const pdfUrl = pdfMapping ? `/rag-originals/${pdfMapping.filename}#page=${pdfPage}` : null
|
||||
|
||||
// Check PDF existence when regulation changes
|
||||
useEffect(() => {
|
||||
if (!selectedRegulation) { setPdfExists(null); return }
|
||||
const mapping = RAG_PDF_MAPPING[selectedRegulation]
|
||||
if (!mapping) { setPdfExists(false); return }
|
||||
const url = `/rag-originals/${mapping.filename}`
|
||||
fetch(url, { method: 'HEAD' })
|
||||
.then(res => setPdfExists(res.ok))
|
||||
.catch(() => setPdfExists(false))
|
||||
}, [selectedRegulation])
|
||||
|
||||
// Handlers
|
||||
const handleSelectRegulation = (code: string) => {
|
||||
setSelectedRegulation(code)
|
||||
loadDocumentChunks(code)
|
||||
}
|
||||
|
||||
const handleCollectionChange = (col: string) => {
|
||||
setCollection(col)
|
||||
setSelectedRegulation(null)
|
||||
setDocChunks([])
|
||||
setDocChunkIndex(0)
|
||||
setDocTotalChunks(0)
|
||||
setRegulationCounts({})
|
||||
}
|
||||
|
||||
const handlePrev = () => {
|
||||
if (docChunkIndex > 0) setDocChunkIndex(i => i - 1)
|
||||
}
|
||||
|
||||
const handleNext = () => {
|
||||
if (docChunkIndex < docChunks.length - 1) setDocChunkIndex(i => i + 1)
|
||||
}
|
||||
|
||||
const handleKeyDown = useCallback((e: KeyboardEvent) => {
|
||||
if (e.key === 'Escape' && fullscreen) {
|
||||
e.preventDefault()
|
||||
setFullscreen(false)
|
||||
} else if (e.key === 'ArrowLeft' || e.key === 'ArrowUp') {
|
||||
e.preventDefault()
|
||||
setDocChunkIndex(i => Math.max(0, i - 1))
|
||||
} else if (e.key === 'ArrowRight' || e.key === 'ArrowDown') {
|
||||
e.preventDefault()
|
||||
setDocChunkIndex(i => Math.min(docChunksRef.current.length - 1, i + 1))
|
||||
}
|
||||
}, [fullscreen])
|
||||
|
||||
useEffect(() => {
|
||||
if (fullscreen || (selectedRegulation && docChunks.length > 0)) {
|
||||
window.addEventListener('keydown', handleKeyDown)
|
||||
return () => window.removeEventListener('keydown', handleKeyDown)
|
||||
}
|
||||
}, [selectedRegulation, docChunks.length, handleKeyDown, fullscreen])
|
||||
|
||||
const toggleGroup = (group: string) => {
|
||||
setCollapsedGroups(prev => {
|
||||
const next = new Set(prev)
|
||||
if (next.has(group)) next.delete(group)
|
||||
else next.add(group)
|
||||
return next
|
||||
})
|
||||
}
|
||||
|
||||
// Get text content from a chunk
|
||||
const getChunkText = (chunk: Record<string, unknown> | null): string => {
|
||||
if (!chunk) return ''
|
||||
return String(chunk.chunk_text || chunk.text || chunk.content || '')
|
||||
}
|
||||
|
||||
// Extract structural metadata for prominent display
|
||||
const getStructuralInfo = (chunk: Record<string, unknown> | null): { article?: string; section?: string; pages?: string } => {
|
||||
if (!chunk) return {}
|
||||
const result: { article?: string; section?: string; pages?: string } = {}
|
||||
// Article / paragraph
|
||||
const article = chunk.article || chunk.artikel || chunk.paragraph || chunk.section_title
|
||||
if (article) result.article = String(article)
|
||||
// Section
|
||||
const section = chunk.section || chunk.chapter || chunk.abschnitt || chunk.kapitel
|
||||
if (section) result.section = String(section)
|
||||
// Pages
|
||||
const pages = chunk.pages as number[] | undefined
|
||||
if (Array.isArray(pages) && pages.length > 0) {
|
||||
result.pages = pages.length === 1 ? `S. ${pages[0]}` : `S. ${pages[0]}-${pages[pages.length - 1]}`
|
||||
} else if (chunk.page) {
|
||||
result.pages = `S. ${chunk.page}`
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// Overlap extraction
|
||||
const getOverlapPrev = (): string => {
|
||||
if (!prevChunk) return ''
|
||||
const text = getChunkText(prevChunk)
|
||||
return text.length > 150 ? '...' + text.slice(-150) : text
|
||||
}
|
||||
|
||||
const getOverlapNext = (): string => {
|
||||
if (!nextChunk) return ''
|
||||
const text = getChunkText(nextChunk)
|
||||
return text.length > 150 ? text.slice(0, 150) + '...' : text
|
||||
}
|
||||
|
||||
// Filter sidebar items
|
||||
const filteredRegulations = React.useMemo(() => {
|
||||
if (!filterSearch.trim()) return groupedRegulations
|
||||
const term = filterSearch.toLowerCase()
|
||||
const filtered: typeof groupedRegulations = {
|
||||
eu_regulation: [], eu_directive: [], de_law: [], at_law: [], ch_law: [],
|
||||
national_law: [], bsi_standard: [], eu_guideline: [], international_standard: [], other: [],
|
||||
}
|
||||
for (const [group, items] of Object.entries(groupedRegulations)) {
|
||||
filtered[group as RegGroupKey] = items.filter(
|
||||
r => r.code.toLowerCase().includes(term) || r.name.toLowerCase().includes(term)
|
||||
)
|
||||
}
|
||||
return filtered
|
||||
}, [groupedRegulations, filterSearch])
|
||||
|
||||
// Regulation name lookup
|
||||
const getRegName = (code: string): string => {
|
||||
const reg = REGULATION_INFO.find(r => r.code === code)
|
||||
return reg?.name || code
|
||||
}
|
||||
|
||||
// Important metadata keys to show prominently
|
||||
const STRUCTURAL_KEYS = new Set([
|
||||
'article', 'artikel', 'paragraph', 'section_title', 'section', 'chapter',
|
||||
'abschnitt', 'kapitel', 'pages', 'page',
|
||||
])
|
||||
const HIDDEN_KEYS = new Set([
|
||||
'text', 'content', 'chunk_text', 'id', 'embedding',
|
||||
])
|
||||
|
||||
const structInfo = getStructuralInfo(currentChunk)
|
||||
|
||||
return (
|
||||
<div
|
||||
className={`flex flex-col ${fullscreen ? 'fixed inset-0 z-50 bg-slate-100 p-4' : ''}`}
|
||||
style={fullscreen ? { height: '100vh' } : { height: 'calc(100vh - 220px)' }}
|
||||
>
|
||||
{/* Header bar — fixed height */}
|
||||
<div className="flex-shrink-0 bg-white rounded-xl border border-slate-200 p-3 mb-3">
|
||||
<div className="flex flex-wrap items-center gap-4">
|
||||
<div>
|
||||
<label className="block text-xs font-medium text-slate-500 mb-1">Collection</label>
|
||||
<select
|
||||
value={collection}
|
||||
onChange={(e) => handleCollectionChange(e.target.value)}
|
||||
className="px-3 py-1.5 border rounded-lg text-sm focus:ring-2 focus:ring-teal-500"
|
||||
>
|
||||
{COLLECTIONS.map(c => (
|
||||
<option key={c} value={c}>{c}</option>
|
||||
))}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
{selectedRegulation && (
|
||||
<>
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-sm font-semibold text-slate-900">
|
||||
{selectedRegulation} — {getRegName(selectedRegulation)}
|
||||
</span>
|
||||
{structInfo.article && (
|
||||
<span className="px-2 py-0.5 bg-blue-100 text-blue-800 text-xs font-medium rounded">
|
||||
{structInfo.article}
|
||||
</span>
|
||||
)}
|
||||
{structInfo.pages && (
|
||||
<span className="px-2 py-0.5 bg-slate-100 text-slate-600 text-xs rounded">
|
||||
{structInfo.pages}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
<div className="flex items-center gap-2 ml-auto">
|
||||
<button
|
||||
onClick={handlePrev}
|
||||
disabled={docChunkIndex === 0}
|
||||
className="px-3 py-1.5 text-sm font-medium border rounded-lg bg-white hover:bg-slate-50 disabled:opacity-30 disabled:cursor-not-allowed"
|
||||
>
|
||||
◀ Zurueck
|
||||
</button>
|
||||
<span className="text-sm font-mono text-slate-600 min-w-[80px] text-center">
|
||||
{docChunkIndex + 1} / {docTotalChunks}
|
||||
</span>
|
||||
<button
|
||||
onClick={handleNext}
|
||||
disabled={docChunkIndex >= docChunks.length - 1}
|
||||
className="px-3 py-1.5 text-sm font-medium border rounded-lg bg-white hover:bg-slate-50 disabled:opacity-30 disabled:cursor-not-allowed"
|
||||
>
|
||||
Weiter ▶
|
||||
</button>
|
||||
<input
|
||||
type="number"
|
||||
min={1}
|
||||
max={docTotalChunks}
|
||||
value={docChunkIndex + 1}
|
||||
onChange={(e) => {
|
||||
const v = parseInt(e.target.value, 10)
|
||||
if (!isNaN(v) && v >= 1 && v <= docTotalChunks) setDocChunkIndex(v - 1)
|
||||
}}
|
||||
className="w-16 px-2 py-1 border rounded text-xs text-center"
|
||||
title="Springe zu Chunk Nr."
|
||||
/>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
<label className="text-xs text-slate-500">Chunks/Seite:</label>
|
||||
<select
|
||||
value={chunksPerPage}
|
||||
onChange={(e) => setChunksPerPage(Number(e.target.value))}
|
||||
className="px-2 py-1 border rounded text-xs"
|
||||
>
|
||||
{[3, 4, 5, 6, 8, 10, 12, 15, 20].map(n => (
|
||||
<option key={n} value={n}>{n}</option>
|
||||
))}
|
||||
</select>
|
||||
<button
|
||||
onClick={() => setSplitViewActive(!splitViewActive)}
|
||||
className={`px-3 py-1 text-xs rounded-lg border ${
|
||||
splitViewActive ? 'bg-teal-50 border-teal-300 text-teal-700' : 'bg-slate-50 border-slate-300 text-slate-600'
|
||||
}`}
|
||||
>
|
||||
{splitViewActive ? 'Split-View an' : 'Split-View aus'}
|
||||
</button>
|
||||
<button
|
||||
onClick={() => setFullscreen(!fullscreen)}
|
||||
className={`px-3 py-1 text-xs rounded-lg border ${
|
||||
fullscreen ? 'bg-indigo-50 border-indigo-300 text-indigo-700' : 'bg-slate-50 border-slate-300 text-slate-600'
|
||||
}`}
|
||||
title={fullscreen ? 'Vollbild beenden (Esc)' : 'Vollbild'}
|
||||
>
|
||||
{fullscreen ? '✕ Vollbild beenden' : '⛶ Vollbild'}
|
||||
</button>
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Main content: Sidebar + Content — fills remaining height */}
|
||||
<div className="flex gap-3 flex-1 min-h-0">
|
||||
{/* Sidebar — scrollable */}
|
||||
<div className="w-56 flex-shrink-0 bg-white rounded-xl border border-slate-200 flex flex-col min-h-0">
|
||||
<div className="flex-shrink-0 p-3 border-b border-slate-100">
|
||||
<input
|
||||
type="text"
|
||||
value={filterSearch}
|
||||
onChange={(e) => setFilterSearch(e.target.value)}
|
||||
placeholder="Suche..."
|
||||
className="w-full px-2 py-1.5 border rounded-lg text-sm focus:ring-2 focus:ring-teal-500"
|
||||
/>
|
||||
{countsLoading && (
|
||||
<div className="text-xs text-slate-400 mt-1 animate-pulse">Counts laden...</div>
|
||||
)}
|
||||
</div>
|
||||
<div className="flex-1 overflow-y-auto min-h-0">
|
||||
{GROUP_ORDER.map(group => {
|
||||
const items = filteredRegulations[group]
|
||||
if (items.length === 0) return null
|
||||
const isCollapsed = collapsedGroups.has(group)
|
||||
return (
|
||||
<div key={group}>
|
||||
<button
|
||||
onClick={() => toggleGroup(group)}
|
||||
className="w-full px-3 py-1.5 text-left text-xs font-semibold text-slate-500 bg-slate-50 hover:bg-slate-100 flex items-center justify-between sticky top-0 z-10"
|
||||
>
|
||||
<span>{GROUP_LABELS[group]}</span>
|
||||
<span className="text-slate-400">{isCollapsed ? '+' : '-'}</span>
|
||||
</button>
|
||||
{!isCollapsed && items.map(reg => {
|
||||
const count = regulationCounts[reg.code] ?? 0
|
||||
const isSelected = selectedRegulation === reg.code
|
||||
return (
|
||||
<button
|
||||
key={reg.code}
|
||||
onClick={() => handleSelectRegulation(reg.code)}
|
||||
className={`w-full px-3 py-1.5 text-left text-sm flex items-center justify-between hover:bg-teal-50 transition-colors ${
|
||||
isSelected ? 'bg-teal-100 text-teal-900 font-medium' : 'text-slate-700'
|
||||
}`}
|
||||
>
|
||||
<span className="truncate text-xs">{reg.name || reg.code}</span>
|
||||
<span className={`text-xs tabular-nums flex-shrink-0 ml-1 ${count > 0 ? 'text-slate-500' : 'text-slate-300'}`}>
|
||||
{count > 0 ? count.toLocaleString() : '—'}
|
||||
</span>
|
||||
</button>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Content area — fills remaining width and height */}
|
||||
{!selectedRegulation ? (
|
||||
<div className="flex-1 flex items-center justify-center bg-white rounded-xl border border-slate-200">
|
||||
<div className="text-center text-slate-400 space-y-2">
|
||||
<div className="text-4xl">🔍</div>
|
||||
<p className="text-sm">Dokument in der Sidebar auswaehlen, um QA zu starten.</p>
|
||||
<p className="text-xs text-slate-300">Pfeiltasten: Chunk vor/zurueck</p>
|
||||
</div>
|
||||
</div>
|
||||
) : docLoading ? (
|
||||
<div className="flex-1 flex items-center justify-center bg-white rounded-xl border border-slate-200">
|
||||
<div className="text-center text-slate-500 space-y-2">
|
||||
<div className="animate-spin text-3xl">⚙</div>
|
||||
<p className="text-sm">Chunks werden geladen...</p>
|
||||
<p className="text-xs text-slate-400">
|
||||
{selectedRegulation}: {REGULATIONS_IN_RAG[selectedRegulation]?.chunks.toLocaleString() || '?'} Chunks erwartet
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<div className={`flex-1 grid gap-3 min-h-0 ${splitViewActive ? 'grid-cols-2' : 'grid-cols-1'}`}>
|
||||
{/* Chunk-Text Panel — fixed height, internal scroll */}
|
||||
<div className="bg-white rounded-xl border border-slate-200 flex flex-col min-h-0 overflow-hidden">
|
||||
{/* Panel header */}
|
||||
<div className="flex-shrink-0 px-4 py-2 bg-slate-50 border-b border-slate-100 flex items-center justify-between">
|
||||
<span className="text-sm font-medium text-slate-700">Chunk-Text</span>
|
||||
<div className="flex items-center gap-2">
|
||||
{structInfo.article && (
|
||||
<span className="px-2 py-0.5 bg-blue-50 text-blue-700 text-xs font-medium rounded border border-blue-200">
|
||||
{structInfo.article}
|
||||
</span>
|
||||
)}
|
||||
{structInfo.section && (
|
||||
<span className="px-2 py-0.5 bg-purple-50 text-purple-700 text-xs rounded border border-purple-200">
|
||||
{structInfo.section}
|
||||
</span>
|
||||
)}
|
||||
<span className="text-xs text-slate-400 tabular-nums">
|
||||
#{docChunkIndex} / {docTotalChunks - 1}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Scrollable content */}
|
||||
<div className="flex-1 overflow-y-auto min-h-0 p-4 space-y-3">
|
||||
{/* Overlap from previous chunk */}
|
||||
{prevChunk && (
|
||||
<div className="text-xs text-slate-400 bg-amber-50 border-l-2 border-amber-300 px-3 py-2 rounded-r">
|
||||
<div className="font-medium text-amber-600 mb-1">↑ Ende vorheriger Chunk #{docChunkIndex - 1}</div>
|
||||
<p className="whitespace-pre-wrap break-words leading-relaxed">{getOverlapPrev()}</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Current chunk text */}
|
||||
{currentChunk ? (
|
||||
<div className="text-sm text-slate-800 whitespace-pre-wrap break-words leading-relaxed border-l-2 border-teal-400 pl-3">
|
||||
{getChunkText(currentChunk)}
|
||||
</div>
|
||||
) : (
|
||||
<div className="text-sm text-slate-400 italic">Kein Chunk-Text vorhanden.</div>
|
||||
)}
|
||||
|
||||
{/* Overlap from next chunk */}
|
||||
{nextChunk && (
|
||||
<div className="text-xs text-slate-400 bg-amber-50 border-l-2 border-amber-300 px-3 py-2 rounded-r">
|
||||
<div className="font-medium text-amber-600 mb-1">↓ Anfang naechster Chunk #{docChunkIndex + 1}</div>
|
||||
<p className="whitespace-pre-wrap break-words leading-relaxed">{getOverlapNext()}</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Metadata */}
|
||||
{currentChunk && (
|
||||
<div className="mt-4 pt-3 border-t border-slate-100">
|
||||
<div className="text-xs font-medium text-slate-500 mb-2">Metadaten</div>
|
||||
<div className="grid grid-cols-2 gap-x-4 gap-y-1 text-xs">
|
||||
{Object.entries(currentChunk)
|
||||
.filter(([k]) => !HIDDEN_KEYS.has(k))
|
||||
.sort(([a], [b]) => {
|
||||
// Structural keys first
|
||||
const aStruct = STRUCTURAL_KEYS.has(a) ? 0 : 1
|
||||
const bStruct = STRUCTURAL_KEYS.has(b) ? 0 : 1
|
||||
return aStruct - bStruct || a.localeCompare(b)
|
||||
})
|
||||
.map(([k, v]) => (
|
||||
<div key={k} className={`flex gap-1 ${STRUCTURAL_KEYS.has(k) ? 'col-span-2 font-medium' : ''}`}>
|
||||
<span className="font-medium text-slate-500 flex-shrink-0">{k}:</span>
|
||||
<span className="text-slate-700 break-all">
|
||||
{Array.isArray(v) ? v.join(', ') : String(v)}
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
{/* Chunk quality indicator */}
|
||||
<div className="mt-3 pt-2 border-t border-slate-50">
|
||||
<div className="text-xs text-slate-400">
|
||||
Chunk-Laenge: {getChunkText(currentChunk).length} Zeichen
|
||||
{getChunkText(currentChunk).length < 50 && (
|
||||
<span className="ml-2 text-orange-500 font-medium">⚠ Sehr kurz</span>
|
||||
)}
|
||||
{getChunkText(currentChunk).length > 2000 && (
|
||||
<span className="ml-2 text-orange-500 font-medium">⚠ Sehr lang</span>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* PDF-Viewer Panel */}
|
||||
{splitViewActive && (
|
||||
<div className="bg-white rounded-xl border border-slate-200 flex flex-col min-h-0 overflow-hidden">
|
||||
<div className="flex-shrink-0 px-4 py-2 bg-slate-50 border-b border-slate-100 flex items-center justify-between">
|
||||
<span className="text-sm font-medium text-slate-700">Original-PDF</span>
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-xs text-slate-400">
|
||||
Seite ~{pdfPage}
|
||||
{pdfMapping?.totalPages ? ` / ${pdfMapping.totalPages}` : ''}
|
||||
</span>
|
||||
{pdfUrl && (
|
||||
<a
|
||||
href={pdfUrl.split('#')[0]}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="text-xs text-teal-600 hover:text-teal-800 underline"
|
||||
>
|
||||
Oeffnen ↗
|
||||
</a>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex-1 min-h-0 relative">
|
||||
{pdfUrl && pdfExists ? (
|
||||
<iframe
|
||||
key={`${selectedRegulation}-${pdfPage}`}
|
||||
src={pdfUrl}
|
||||
className="absolute inset-0 w-full h-full border-0"
|
||||
title="Original PDF"
|
||||
/>
|
||||
) : (
|
||||
<div className="flex items-center justify-center h-full text-slate-400 text-sm p-4">
|
||||
<div className="text-center space-y-2">
|
||||
<div className="text-3xl">📄</div>
|
||||
{!pdfMapping ? (
|
||||
<>
|
||||
<p>Kein PDF-Mapping fuer {selectedRegulation}.</p>
|
||||
<p className="text-xs">rag-pdf-mapping.ts ergaenzen.</p>
|
||||
</>
|
||||
) : pdfExists === false ? (
|
||||
<>
|
||||
<p className="font-medium text-orange-600">PDF nicht vorhanden</p>
|
||||
<p className="text-xs">Datei <code className="bg-slate-100 px-1 rounded">{pdfMapping.filename}</code> fehlt in ~/rag-originals/</p>
|
||||
<p className="text-xs mt-1">Bitte manuell herunterladen und dort ablegen.</p>
|
||||
</>
|
||||
) : (
|
||||
<p>PDF wird geprueft...</p>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -1,126 +0,0 @@
|
||||
export interface RagPdfMapping {
|
||||
filename: string
|
||||
totalPages?: number
|
||||
chunksPerPage?: number
|
||||
language: string
|
||||
}
|
||||
|
||||
export const RAG_PDF_MAPPING: Record<string, RagPdfMapping> = {
|
||||
// EU Verordnungen
|
||||
GDPR: { filename: 'GDPR_DE.pdf', language: 'de', totalPages: 88 },
|
||||
EPRIVACY: { filename: 'EPRIVACY_DE.pdf', language: 'de' },
|
||||
SCC: { filename: 'SCC_DE.pdf', language: 'de' },
|
||||
SCC_FULL_TEXT: { filename: 'SCC_FULL_TEXT_DE.pdf', language: 'de' },
|
||||
AIACT: { filename: 'AIACT_DE.pdf', language: 'de', totalPages: 144 },
|
||||
CRA: { filename: 'CRA_DE.pdf', language: 'de' },
|
||||
NIS2: { filename: 'NIS2_DE.pdf', language: 'de' },
|
||||
DGA: { filename: 'DGA_DE.pdf', language: 'de' },
|
||||
DSA: { filename: 'DSA_DE.pdf', language: 'de' },
|
||||
PLD: { filename: 'PLD_DE.pdf', language: 'de' },
|
||||
E_COMMERCE_RL: { filename: 'E_COMMERCE_RL_DE.pdf', language: 'de' },
|
||||
VERBRAUCHERRECHTE_RL: { filename: 'VERBRAUCHERRECHTE_RL_DE.pdf', language: 'de' },
|
||||
DIGITALE_INHALTE_RL: { filename: 'DIGITALE_INHALTE_RL_DE.pdf', language: 'de' },
|
||||
DMA: { filename: 'DMA_DE.pdf', language: 'de' },
|
||||
DPF: { filename: 'DPF_DE.pdf', language: 'de' },
|
||||
EUCSA: { filename: 'EUCSA_DE.pdf', language: 'de' },
|
||||
DATAACT: { filename: 'DATAACT_DE.pdf', language: 'de' },
|
||||
DORA: { filename: 'DORA_DE.pdf', language: 'de' },
|
||||
PSD2: { filename: 'PSD2_DE.pdf', language: 'de' },
|
||||
AMLR: { filename: 'AMLR_DE.pdf', language: 'de' },
|
||||
MiCA: { filename: 'MiCA_DE.pdf', language: 'de' },
|
||||
EHDS: { filename: 'EHDS_DE.pdf', language: 'de' },
|
||||
EAA: { filename: 'EAA_DE.pdf', language: 'de' },
|
||||
DSM: { filename: 'DSM_DE.pdf', language: 'de' },
|
||||
GPSR: { filename: 'GPSR_DE.pdf', language: 'de' },
|
||||
MACHINERY_REG: { filename: 'MACHINERY_REG_DE.pdf', language: 'de' },
|
||||
BLUE_GUIDE: { filename: 'BLUE_GUIDE_DE.pdf', language: 'de' },
|
||||
// DE Gesetze
|
||||
TDDDG: { filename: 'TDDDG_DE.pdf', language: 'de' },
|
||||
BDSG_FULL: { filename: 'BDSG_FULL_DE.pdf', language: 'de' },
|
||||
DE_DDG: { filename: 'DE_DDG.pdf', language: 'de' },
|
||||
DE_BGB_AGB: { filename: 'DE_BGB_AGB.pdf', language: 'de' },
|
||||
DE_EGBGB: { filename: 'DE_EGBGB.pdf', language: 'de' },
|
||||
DE_HGB_RET: { filename: 'DE_HGB_RET.pdf', language: 'de' },
|
||||
DE_AO_RET: { filename: 'DE_AO_RET.pdf', language: 'de' },
|
||||
DE_UWG: { filename: 'DE_UWG.pdf', language: 'de' },
|
||||
DE_TKG: { filename: 'DE_TKG.pdf', language: 'de' },
|
||||
DE_PANGV: { filename: 'DE_PANGV.pdf', language: 'de' },
|
||||
DE_DLINFOV: { filename: 'DE_DLINFOV.pdf', language: 'de' },
|
||||
DE_BETRVG: { filename: 'DE_BETRVG.pdf', language: 'de' },
|
||||
DE_GESCHGEHG: { filename: 'DE_GESCHGEHG.pdf', language: 'de' },
|
||||
DE_BSIG: { filename: 'DE_BSIG.pdf', language: 'de' },
|
||||
DE_USTG_RET: { filename: 'DE_USTG_RET.pdf', language: 'de' },
|
||||
// BSI Standards
|
||||
'BSI-TR-03161-1': { filename: 'BSI-TR-03161-1.pdf', language: 'de' },
|
||||
'BSI-TR-03161-2': { filename: 'BSI-TR-03161-2.pdf', language: 'de' },
|
||||
'BSI-TR-03161-3': { filename: 'BSI-TR-03161-3.pdf', language: 'de' },
|
||||
// AT Gesetze
|
||||
AT_DSG: { filename: 'AT_DSG.pdf', language: 'de' },
|
||||
AT_DSG_FULL: { filename: 'AT_DSG_FULL.pdf', language: 'de' },
|
||||
AT_ECG: { filename: 'AT_ECG.pdf', language: 'de' },
|
||||
AT_TKG: { filename: 'AT_TKG.pdf', language: 'de' },
|
||||
AT_KSCHG: { filename: 'AT_KSCHG.pdf', language: 'de' },
|
||||
AT_FAGG: { filename: 'AT_FAGG.pdf', language: 'de' },
|
||||
AT_UGB_RET: { filename: 'AT_UGB_RET.pdf', language: 'de' },
|
||||
AT_BAO_RET: { filename: 'AT_BAO_RET.pdf', language: 'de' },
|
||||
AT_MEDIENG: { filename: 'AT_MEDIENG.pdf', language: 'de' },
|
||||
AT_ABGB_AGB: { filename: 'AT_ABGB_AGB.pdf', language: 'de' },
|
||||
AT_UWG: { filename: 'AT_UWG.pdf', language: 'de' },
|
||||
// CH Gesetze
|
||||
CH_DSG: { filename: 'CH_DSG.pdf', language: 'de' },
|
||||
CH_DSV: { filename: 'CH_DSV.pdf', language: 'de' },
|
||||
CH_OR_AGB: { filename: 'CH_OR_AGB.pdf', language: 'de' },
|
||||
CH_UWG: { filename: 'CH_UWG.pdf', language: 'de' },
|
||||
CH_FMG: { filename: 'CH_FMG.pdf', language: 'de' },
|
||||
CH_GEBUV: { filename: 'CH_GEBUV.pdf', language: 'de' },
|
||||
CH_ZERTES: { filename: 'CH_ZERTES.pdf', language: 'de' },
|
||||
CH_ZGB_PERS: { filename: 'CH_ZGB_PERS.pdf', language: 'de' },
|
||||
// LI
|
||||
LI_DSG: { filename: 'LI_DSG.pdf', language: 'de' },
|
||||
// Nationale DSG (andere EU)
|
||||
ES_LOPDGDD: { filename: 'ES_LOPDGDD.pdf', language: 'es' },
|
||||
IT_CODICE_PRIVACY: { filename: 'IT_CODICE_PRIVACY.pdf', language: 'it' },
|
||||
NL_UAVG: { filename: 'NL_UAVG.pdf', language: 'nl' },
|
||||
FR_CNIL_GUIDE: { filename: 'FR_CNIL_GUIDE.pdf', language: 'fr' },
|
||||
IE_DPA_2018: { filename: 'IE_DPA_2018.pdf', language: 'en' },
|
||||
UK_DPA_2018: { filename: 'UK_DPA_2018.pdf', language: 'en' },
|
||||
UK_GDPR: { filename: 'UK_GDPR.pdf', language: 'en' },
|
||||
NO_PERSONOPPLYSNINGSLOVEN: { filename: 'NO_PERSONOPPLYSNINGSLOVEN.pdf', language: 'no' },
|
||||
SE_DATASKYDDSLAG: { filename: 'SE_DATASKYDDSLAG.pdf', language: 'sv' },
|
||||
PL_UODO: { filename: 'PL_UODO.pdf', language: 'pl' },
|
||||
CZ_ZOU: { filename: 'CZ_ZOU.pdf', language: 'cs' },
|
||||
HU_INFOTV: { filename: 'HU_INFOTV.pdf', language: 'hu' },
|
||||
BE_DPA_LAW: { filename: 'BE_DPA_LAW.pdf', language: 'nl' },
|
||||
FI_TIETOSUOJALAKI: { filename: 'FI_TIETOSUOJALAKI.pdf', language: 'fi' },
|
||||
DK_DATABESKYTTELSESLOVEN: { filename: 'DK_DATABESKYTTELSESLOVEN.pdf', language: 'da' },
|
||||
LU_DPA_LAW: { filename: 'LU_DPA_LAW.pdf', language: 'fr' },
|
||||
// DE Gesetze (zusaetzlich)
|
||||
TMG_KOMPLETT: { filename: 'TMG_KOMPLETT.pdf', language: 'de' },
|
||||
DE_URHG: { filename: 'DE_URHG.pdf', language: 'de' },
|
||||
// EDPB Guidelines
|
||||
EDPB_GUIDELINES_5_2020: { filename: 'EDPB_GUIDELINES_5_2020.pdf', language: 'en' },
|
||||
EDPB_GUIDELINES_7_2020: { filename: 'EDPB_GUIDELINES_7_2020.pdf', language: 'en' },
|
||||
EDPB_GUIDELINES_1_2020: { filename: 'EDPB_GUIDELINES_1_2020.pdf', language: 'en' },
|
||||
EDPB_GUIDELINES_1_2022: { filename: 'EDPB_GUIDELINES_1_2022.pdf', language: 'en' },
|
||||
EDPB_GUIDELINES_2_2023: { filename: 'EDPB_GUIDELINES_2_2023.pdf', language: 'en' },
|
||||
EDPB_GUIDELINES_2_2024: { filename: 'EDPB_GUIDELINES_2_2024.pdf', language: 'en' },
|
||||
EDPB_GUIDELINES_4_2019: { filename: 'EDPB_GUIDELINES_4_2019.pdf', language: 'en' },
|
||||
EDPB_GUIDELINES_9_2022: { filename: 'EDPB_GUIDELINES_9_2022.pdf', language: 'en' },
|
||||
EDPB_DPIA_LIST: { filename: 'EDPB_DPIA_LIST.pdf', language: 'en' },
|
||||
EDPB_LEGITIMATE_INTEREST: { filename: 'EDPB_LEGITIMATE_INTEREST.pdf', language: 'en' },
|
||||
// EDPS
|
||||
EDPS_DPIA_LIST: { filename: 'EDPS_DPIA_LIST.pdf', language: 'en' },
|
||||
// Frameworks
|
||||
ENISA_SECURE_BY_DESIGN: { filename: 'ENISA_SECURE_BY_DESIGN.pdf', language: 'en' },
|
||||
ENISA_SUPPLY_CHAIN: { filename: 'ENISA_SUPPLY_CHAIN.pdf', language: 'en' },
|
||||
ENISA_THREAT_LANDSCAPE: { filename: 'ENISA_THREAT_LANDSCAPE.pdf', language: 'en' },
|
||||
ENISA_ICS_SCADA: { filename: 'ENISA_ICS_SCADA.pdf', language: 'en' },
|
||||
ENISA_CYBERSECURITY_2024: { filename: 'ENISA_CYBERSECURITY_2024.pdf', language: 'en' },
|
||||
NIST_SSDF: { filename: 'NIST_SSDF.pdf', language: 'en' },
|
||||
NIST_CSF_2: { filename: 'NIST_CSF_2.pdf', language: 'en' },
|
||||
OECD_AI_PRINCIPLES: { filename: 'OECD_AI_PRINCIPLES.pdf', language: 'en' },
|
||||
// EU-IFRS / EFRAG
|
||||
EU_IFRS_DE: { filename: 'EU_IFRS_DE.pdf', language: 'de' },
|
||||
EU_IFRS_EN: { filename: 'EU_IFRS_EN.pdf', language: 'en' },
|
||||
EFRAG_ENDORSEMENT: { filename: 'EFRAG_ENDORSEMENT.pdf', language: 'en' },
|
||||
}
|
||||
@@ -11,8 +11,6 @@ import React, { useState, useEffect, useCallback } from 'react'
|
||||
import Link from 'next/link'
|
||||
import { PagePurpose } from '@/components/common/PagePurpose'
|
||||
import { AIModuleSidebarResponsive } from '@/components/ai/AIModuleSidebar'
|
||||
import { REGULATIONS_IN_RAG } from './rag-constants'
|
||||
import { ChunkBrowserQA } from './components/ChunkBrowserQA'
|
||||
|
||||
// API uses local proxy route to klausur-service
|
||||
const API_PROXY = '/api/legal-corpus'
|
||||
@@ -75,7 +73,7 @@ interface DsfaCorpusStatus {
|
||||
type RegulationCategory = 'regulations' | 'dsfa' | 'nibis' | 'templates'
|
||||
|
||||
// Tab definitions
|
||||
type TabId = 'overview' | 'regulations' | 'map' | 'search' | 'chunks' | 'data' | 'ingestion' | 'pipeline'
|
||||
type TabId = 'overview' | 'regulations' | 'map' | 'search' | 'data' | 'ingestion' | 'pipeline'
|
||||
|
||||
// Custom document type
|
||||
interface CustomDocument {
|
||||
@@ -1013,264 +1011,8 @@ const REGULATIONS = [
|
||||
keyTopics: ['Bussgeldberechnung', 'Schweregrad', 'Milderungsgruende', 'Bussgeldrahmen'],
|
||||
effectiveDate: '2022'
|
||||
},
|
||||
// =====================================================================
|
||||
// Neu ingestierte EU-Richtlinien (Februar 2026)
|
||||
// =====================================================================
|
||||
{
|
||||
code: 'E_COMMERCE_RL',
|
||||
name: 'E-Commerce-Richtlinie',
|
||||
fullName: 'Richtlinie 2000/31/EG ueber den elektronischen Geschaeftsverkehr',
|
||||
type: 'eu_directive',
|
||||
expected: 30,
|
||||
description: 'EU-Richtlinie ueber den elektronischen Geschaeftsverkehr (E-Commerce). Regelt Herkunftslandprinzip, Informationspflichten, Haftungsprivilegien fuer Vermittler (Mere Conduit, Caching, Hosting).',
|
||||
relevantFor: ['Online-Dienste', 'E-Commerce', 'Hosting-Anbieter', 'Plattformen'],
|
||||
keyTopics: ['Herkunftslandprinzip', 'Haftungsprivileg', 'Informationspflichten', 'Spam-Verbot', 'Vermittlerhaftung'],
|
||||
effectiveDate: '17. Juli 2000'
|
||||
},
|
||||
{
|
||||
code: 'VERBRAUCHERRECHTE_RL',
|
||||
name: 'Verbraucherrechte-Richtlinie',
|
||||
fullName: 'Richtlinie 2011/83/EU ueber die Rechte der Verbraucher',
|
||||
type: 'eu_directive',
|
||||
expected: 25,
|
||||
description: 'EU-weite Harmonisierung der Verbraucherrechte bei Fernabsatz und aussergeschaeftlichen Vertraegen. 14-Tage-Widerrufsrecht, Informationspflichten, digitale Inhalte.',
|
||||
relevantFor: ['Online-Shops', 'E-Commerce', 'Fernabsatz', 'Dienstleister'],
|
||||
keyTopics: ['Widerrufsrecht 14 Tage', 'Informationspflichten', 'Fernabsatzvertraege', 'Digitale Inhalte'],
|
||||
effectiveDate: '13. Juni 2014'
|
||||
},
|
||||
{
|
||||
code: 'DIGITALE_INHALTE_RL',
|
||||
name: 'Digitale-Inhalte-Richtlinie',
|
||||
fullName: 'Richtlinie (EU) 2019/770 ueber digitale Inhalte und Dienstleistungen',
|
||||
type: 'eu_directive',
|
||||
expected: 20,
|
||||
description: 'Gewaehrleistungsrecht fuer digitale Inhalte und Dienstleistungen. Regelt Maengelhaftung, Updates, Vertragsmaessigkeit und Kuendigungsrechte bei digitalen Produkten.',
|
||||
relevantFor: ['SaaS-Anbieter', 'App-Entwickler', 'Cloud-Dienste', 'Streaming-Anbieter', 'Software-Hersteller'],
|
||||
keyTopics: ['Digitale Gewaehrleistung', 'Update-Pflicht', 'Vertragsmaessigkeit', 'Kuendigungsrecht', 'Datenportabilitaet'],
|
||||
effectiveDate: '1. Januar 2022'
|
||||
},
|
||||
{
|
||||
code: 'DMA',
|
||||
name: 'Digital Markets Act',
|
||||
fullName: 'Verordnung (EU) 2022/1925 - Digital Markets Act',
|
||||
type: 'eu_regulation',
|
||||
expected: 50,
|
||||
description: 'Reguliert digitale Gatekeeper-Plattformen. Stellt Verhaltensregeln fuer grosse Plattformen auf (Apple, Google, Meta, Amazon, Microsoft). Verbietet Selbstbevorzugung und erzwingt Interoperabilitaet.',
|
||||
relevantFor: ['Grosse Plattformen', 'App-Stores', 'Suchmaschinen', 'Social Media', 'Messenger-Dienste'],
|
||||
keyTopics: ['Gatekeeper-Pflichten', 'Interoperabilitaet', 'Selbstbevorzugung', 'App-Store-Regeln', 'Datenportabilitaet'],
|
||||
effectiveDate: '2. Mai 2023'
|
||||
},
|
||||
// === Industrie-Compliance (2026-02-28) ===
|
||||
{
|
||||
code: 'MACHINERY_REG',
|
||||
name: 'Maschinenverordnung',
|
||||
fullName: 'Verordnung (EU) 2023/1230 ueber Maschinen (Machinery Regulation)',
|
||||
type: 'eu_regulation',
|
||||
expected: 100,
|
||||
description: 'Loest die alte Maschinenrichtlinie 2006/42/EG ab. Regelt Sicherheitsanforderungen fuer Maschinen und zugehoerige Produkte, CE-Kennzeichnung, Konformitaetsbewertung und Marktaufsicht. Neu: Cybersecurity-Anforderungen fuer vernetzte Maschinen.',
|
||||
relevantFor: ['Maschinenbau', 'Industrie 4.0', 'Automatisierung', 'Hersteller', 'Importeure'],
|
||||
keyTopics: ['CE-Kennzeichnung', 'Konformitaetsbewertung', 'Risikobeurteilung', 'Cybersecurity', 'Betriebsanleitung'],
|
||||
effectiveDate: '20. Januar 2027'
|
||||
},
|
||||
{
|
||||
code: 'BLUE_GUIDE',
|
||||
name: 'Blue Guide',
|
||||
fullName: 'Leitfaden fuer die Umsetzung der EU-Produktvorschriften (Blue Guide 2022)',
|
||||
type: 'eu_guideline',
|
||||
expected: 200,
|
||||
description: 'Umfassender Leitfaden der EU-Kommission zur Umsetzung von Produktvorschriften. Erklaert CE-Kennzeichnung, Konformitaetsbewertungsverfahren, notifizierte Stellen, Marktaufsicht und den New Legislative Framework.',
|
||||
relevantFor: ['Hersteller', 'Importeure', 'Haendler', 'Notifizierte Stellen', 'Marktaufsichtsbehoerden'],
|
||||
keyTopics: ['CE-Kennzeichnung', 'Konformitaetserklaerung', 'Notifizierte Stellen', 'Marktaufsicht', 'New Legislative Framework'],
|
||||
effectiveDate: '29. Juni 2022'
|
||||
},
|
||||
{
|
||||
code: 'ENISA_SECURE_BY_DESIGN',
|
||||
name: 'ENISA Secure by Design',
|
||||
fullName: 'ENISA Secure Software Development Best Practices',
|
||||
type: 'eu_guideline',
|
||||
expected: 50,
|
||||
description: 'ENISA-Leitfaden fuer sichere Softwareentwicklung. Beschreibt Best Practices fuer Security by Design, sichere Entwicklungsprozesse und Schwachstellenmanagement.',
|
||||
relevantFor: ['Softwareentwickler', 'DevOps', 'IT-Sicherheit', 'Produktmanagement'],
|
||||
keyTopics: ['Security by Design', 'SDLC', 'Schwachstellenmanagement', 'Secure Coding', 'Threat Modeling'],
|
||||
effectiveDate: '2023'
|
||||
},
|
||||
{
|
||||
code: 'ENISA_SUPPLY_CHAIN',
|
||||
name: 'ENISA Supply Chain Security',
|
||||
fullName: 'ENISA Threat Landscape for Supply Chain Attacks',
|
||||
type: 'eu_guideline',
|
||||
expected: 50,
|
||||
description: 'ENISA-Analyse der Bedrohungslandschaft fuer Supply-Chain-Angriffe. Beschreibt Angriffsvektoren, Taxonomie und Empfehlungen zur Absicherung von Software-Lieferketten.',
|
||||
relevantFor: ['IT-Sicherheit', 'Beschaffung', 'Softwareentwickler', 'CISO'],
|
||||
keyTopics: ['Supply Chain Security', 'SolarWinds', 'SBOM', 'Lieferantenrisiko', 'Third-Party Risk'],
|
||||
effectiveDate: '2021'
|
||||
},
|
||||
{
|
||||
code: 'NIST_SSDF',
|
||||
name: 'NIST SSDF',
|
||||
fullName: 'NIST SP 800-218 — Secure Software Development Framework (SSDF)',
|
||||
type: 'international_standard',
|
||||
expected: 40,
|
||||
description: 'NIST-Framework fuer sichere Softwareentwicklung. Definiert Praktiken und Aufgaben in vier Gruppen: Prepare, Protect, Produce, Respond. Weit verbreitet als Referenz fuer Software Supply Chain Security.',
|
||||
relevantFor: ['Softwareentwickler', 'DevSecOps', 'IT-Sicherheit', 'Compliance-Manager'],
|
||||
keyTopics: ['SSDF', 'Secure SDLC', 'Software Supply Chain', 'Vulnerability Management', 'Code Review'],
|
||||
effectiveDate: '3. Februar 2022'
|
||||
},
|
||||
{
|
||||
code: 'NIST_CSF_2',
|
||||
name: 'NIST CSF 2.0',
|
||||
fullName: 'NIST Cybersecurity Framework (CSF) 2.0',
|
||||
type: 'international_standard',
|
||||
expected: 50,
|
||||
description: 'Version 2.0 des NIST Cybersecurity Framework. Neue Kernfunktion "Govern" ergaenzt Identify, Protect, Detect, Respond, Recover. Erweitert den Anwendungsbereich ueber kritische Infrastruktur hinaus auf alle Organisationen.',
|
||||
relevantFor: ['CISO', 'IT-Sicherheit', 'Risikomanagement', 'Geschaeftsfuehrung', 'Alle Branchen'],
|
||||
keyTopics: ['Govern', 'Identify', 'Protect', 'Detect', 'Respond', 'Recover', 'Cybersecurity'],
|
||||
effectiveDate: '26. Februar 2024'
|
||||
},
|
||||
{
|
||||
code: 'OECD_AI_PRINCIPLES',
|
||||
name: 'OECD AI Principles',
|
||||
fullName: 'OECD Recommendation on Artificial Intelligence (AI Principles)',
|
||||
type: 'international_standard',
|
||||
expected: 20,
|
||||
description: 'OECD-Empfehlung zu Kuenstlicher Intelligenz. Definiert fuenf Prinzipien fuer verantwortungsvolle KI: Inklusives Wachstum, Menschenzentrierte Werte, Transparenz, Robustheit und Rechenschaftspflicht. Von 46 Laendern angenommen.',
|
||||
relevantFor: ['KI-Entwickler', 'Policy-Maker', 'Ethik-Kommissionen', 'Geschaeftsfuehrung'],
|
||||
keyTopics: ['AI Ethics', 'Transparenz', 'Accountability', 'Trustworthy AI', 'Human-Centered AI'],
|
||||
effectiveDate: '22. Mai 2019'
|
||||
},
|
||||
{
|
||||
code: 'EU_IFRS',
|
||||
name: 'EU-IFRS',
|
||||
fullName: 'Verordnung (EU) 2023/1803 — International Financial Reporting Standards',
|
||||
type: 'eu_regulation',
|
||||
expected: 500,
|
||||
description: 'Konsolidierte Fassung der von der EU uebernommenen IFRS/IAS/IFRIC/SIC. Rechtsverbindlich fuer boersennotierte EU-Unternehmen. Enthalt IFRS 1-17, IAS 1-41, IFRIC 1-23 und SIC 7-32 in der EU-endorsed Fassung (Stand Okt 2023). ACHTUNG: Neuere IASB-Standards sind moeglicherweise noch nicht EU-endorsed.',
|
||||
relevantFor: ['Rechnungswesen', 'Wirtschaftspruefer', 'boersennotierte Unternehmen', 'Finanzberichterstattung', 'CFO'],
|
||||
keyTopics: ['IFRS 16 Leasing', 'IFRS 9 Finanzinstrumente', 'IAS 1 Darstellung', 'IFRS 15 Erloese', 'IFRS 17 Versicherungsvertraege', 'Konsolidierung'],
|
||||
effectiveDate: '16. Oktober 2023'
|
||||
},
|
||||
{
|
||||
code: 'EFRAG_ENDORSEMENT',
|
||||
name: 'EFRAG Endorsement Status',
|
||||
fullName: 'EFRAG EU Endorsement Status Report (Dezember 2025)',
|
||||
type: 'eu_guideline',
|
||||
expected: 30,
|
||||
description: 'Uebersicht des European Financial Reporting Advisory Group (EFRAG) ueber den EU-Endorsement-Stand aller IFRS/IAS-Standards. Zeigt welche Standards von der EU uebernommen wurden und welche noch ausstehend sind. Relevant fuer internationale Ausschreibungen und Compliance-Pruefung.',
|
||||
relevantFor: ['Rechnungswesen', 'Wirtschaftspruefer', 'Compliance Officer', 'internationale Ausschreibungen'],
|
||||
keyTopics: ['EU Endorsement', 'IFRS 18', 'IFRS S1/S2 Sustainability', 'Endorsement Status', 'IASB Updates'],
|
||||
effectiveDate: '18. Dezember 2025'
|
||||
},
|
||||
]
|
||||
|
||||
// Source URLs for original documents (click to view original)
|
||||
const REGULATION_SOURCES: Record<string, string> = {
|
||||
// EU Verordnungen/Richtlinien (EUR-Lex)
|
||||
GDPR: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32016R0679',
|
||||
EPRIVACY: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32002L0058',
|
||||
SCC: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32021D0914',
|
||||
DPF: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32023D1795',
|
||||
AIACT: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32024R1689',
|
||||
CRA: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32024R2847',
|
||||
NIS2: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32022L2555',
|
||||
EUCSA: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32019R0881',
|
||||
DATAACT: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32023R2854',
|
||||
DGA: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32022R0868',
|
||||
DSA: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32022R2065',
|
||||
EAA: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32019L0882',
|
||||
DSM: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32019L0790',
|
||||
PLD: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32024L2853',
|
||||
GPSR: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32023R0988',
|
||||
DORA: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32022R2554',
|
||||
PSD2: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32015L2366',
|
||||
AMLR: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32024R1624',
|
||||
MiCA: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32023R1114',
|
||||
EHDS: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32025R0327',
|
||||
SCC_FULL_TEXT: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32021D0914',
|
||||
E_COMMERCE_RL: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32000L0031',
|
||||
VERBRAUCHERRECHTE_RL: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32011L0083',
|
||||
DIGITALE_INHALTE_RL: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32019L0770',
|
||||
DMA: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32022R1925',
|
||||
MACHINERY_REG: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32023R1230',
|
||||
BLUE_GUIDE: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:52022XC0629(04)',
|
||||
EU_IFRS: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32023R1803',
|
||||
// EDPB Guidelines
|
||||
EDPB_GUIDELINES_2_2019: 'https://www.edpb.europa.eu/our-work-tools/our-documents/guidelines/guidelines-22019-processing-personal-data-under-article-61b_en',
|
||||
EDPB_GUIDELINES_3_2019: 'https://www.edpb.europa.eu/our-work-tools/our-documents/guidelines/guidelines-32019-processing-personal-data-through-video_en',
|
||||
EDPB_GUIDELINES_5_2020: 'https://www.edpb.europa.eu/our-work-tools/our-documents/guidelines/guidelines-052020-consent-under-regulation-2016679_en',
|
||||
EDPB_GUIDELINES_7_2020: 'https://www.edpb.europa.eu/our-work-tools/our-documents/guidelines/guidelines-072020-concepts-controller-and-processor-gdpr_en',
|
||||
EDPB_GUIDELINES_1_2022: 'https://www.edpb.europa.eu/our-work-tools/our-documents/guidelines/guidelines-042022-calculation-administrative-fines-under-gdpr_en',
|
||||
// BSI Technische Richtlinien
|
||||
'BSI-TR-03161-1': 'https://www.bsi.bund.de/SharedDocs/Downloads/DE/BSI/Publikationen/TechnischeRichtlinien/TR03161/BSI-TR-03161-1.html',
|
||||
'BSI-TR-03161-2': 'https://www.bsi.bund.de/SharedDocs/Downloads/DE/BSI/Publikationen/TechnischeRichtlinien/TR03161/BSI-TR-03161-2.html',
|
||||
'BSI-TR-03161-3': 'https://www.bsi.bund.de/SharedDocs/Downloads/DE/BSI/Publikationen/TechnischeRichtlinien/TR03161/BSI-TR-03161-3.html',
|
||||
// Nationale Datenschutzgesetze
|
||||
AT_DSG: 'https://www.ris.bka.gv.at/GeltendeFassung.wxe?Abfrage=Bundesnormen&Gesetzesnummer=10001597',
|
||||
BDSG_FULL: 'https://www.gesetze-im-internet.de/bdsg_2018/',
|
||||
CH_DSG: 'https://www.fedlex.admin.ch/eli/cc/2022/491/de',
|
||||
LI_DSG: 'https://www.gesetze.li/konso/2018.272',
|
||||
BE_DPA_LAW: 'https://www.autoriteprotectiondonnees.be/citoyen/la-loi-du-30-juillet-2018',
|
||||
NL_UAVG: 'https://wetten.overheid.nl/BWBR0040940/',
|
||||
FR_CNIL_GUIDE: 'https://www.cnil.fr/fr/rgpd-par-ou-commencer',
|
||||
ES_LOPDGDD: 'https://www.boe.es/buscar/act.php?id=BOE-A-2018-16673',
|
||||
IT_CODICE_PRIVACY: 'https://www.garanteprivacy.it/home/docweb/-/docweb-display/docweb/9042678',
|
||||
IE_DPA_2018: 'https://www.irishstatutebook.ie/eli/2018/act/7/enacted/en/html',
|
||||
UK_DPA_2018: 'https://www.legislation.gov.uk/ukpga/2018/12/contents',
|
||||
UK_GDPR: 'https://www.legislation.gov.uk/eur/2016/679/contents',
|
||||
NO_PERSONOPPLYSNINGSLOVEN: 'https://lovdata.no/dokument/NL/lov/2018-06-15-38',
|
||||
SE_DATASKYDDSLAG: 'https://www.riksdagen.se/sv/dokument-och-lagar/dokument/svensk-forfattningssamling/lag-2018218-med-kompletterande-bestammelser_sfs-2018-218/',
|
||||
FI_TIETOSUOJALAKI: 'https://www.finlex.fi/fi/laki/ajantasa/2018/20181050',
|
||||
PL_UODO: 'https://isap.sejm.gov.pl/isap.nsf/DocDetails.xsp?id=WDU20180001000',
|
||||
CZ_ZOU: 'https://www.zakonyprolidi.cz/cs/2019-110',
|
||||
HU_INFOTV: 'https://net.jogtar.hu/jogszabaly?docid=a1100112.tv',
|
||||
LU_DPA_LAW: 'https://legilux.public.lu/eli/etat/leg/loi/2018/08/01/a686/jo',
|
||||
DK_DATABESKYTTELSESLOVEN: 'https://www.retsinformation.dk/eli/lta/2018/502',
|
||||
// Deutschland — Weitere Gesetze
|
||||
TDDDG: 'https://www.gesetze-im-internet.de/tdddg/',
|
||||
DE_DDG: 'https://www.gesetze-im-internet.de/ddg/',
|
||||
DE_BGB_AGB: 'https://www.gesetze-im-internet.de/bgb/__305.html',
|
||||
DE_EGBGB: 'https://www.gesetze-im-internet.de/bgbeg/art_246.html',
|
||||
DE_UWG: 'https://www.gesetze-im-internet.de/uwg_2004/',
|
||||
DE_HGB_RET: 'https://www.gesetze-im-internet.de/hgb/__257.html',
|
||||
DE_AO_RET: 'https://www.gesetze-im-internet.de/ao_1977/__147.html',
|
||||
DE_TKG: 'https://www.gesetze-im-internet.de/tkg_2021/',
|
||||
DE_PANGV: 'https://www.gesetze-im-internet.de/pangv_2022/',
|
||||
DE_DLINFOV: 'https://www.gesetze-im-internet.de/dlinfov/',
|
||||
DE_BETRVG: 'https://www.gesetze-im-internet.de/betrvg/__87.html',
|
||||
DE_GESCHGEHG: 'https://www.gesetze-im-internet.de/geschgehg/',
|
||||
DE_BSIG: 'https://www.gesetze-im-internet.de/bsig_2009/',
|
||||
DE_USTG_RET: 'https://www.gesetze-im-internet.de/ustg_1980/__14b.html',
|
||||
// Oesterreich — Weitere Gesetze
|
||||
AT_ECG: 'https://www.ris.bka.gv.at/GeltendeFassung.wxe?Abfrage=Bundesnormen&Gesetzesnummer=20001703',
|
||||
AT_TKG: 'https://www.ris.bka.gv.at/GeltendeFassung.wxe?Abfrage=Bundesnormen&Gesetzesnummer=20007898',
|
||||
AT_KSCHG: 'https://www.ris.bka.gv.at/GeltendeFassung.wxe?Abfrage=Bundesnormen&Gesetzesnummer=10002462',
|
||||
AT_FAGG: 'https://www.ris.bka.gv.at/GeltendeFassung.wxe?Abfrage=Bundesnormen&Gesetzesnummer=20008783',
|
||||
AT_UGB_RET: 'https://www.ris.bka.gv.at/GeltendeFassung.wxe?Abfrage=Bundesnormen&Gesetzesnummer=10001702',
|
||||
AT_BAO_RET: 'https://www.ris.bka.gv.at/GeltendeFassung.wxe?Abfrage=Bundesnormen&Gesetzesnummer=10003940',
|
||||
AT_MEDIENG: 'https://www.ris.bka.gv.at/GeltendeFassung.wxe?Abfrage=Bundesnormen&Gesetzesnummer=10000719',
|
||||
AT_ABGB_AGB: 'https://www.ris.bka.gv.at/GeltendeFassung.wxe?Abfrage=Bundesnormen&Gesetzesnummer=10001622',
|
||||
AT_UWG: 'https://www.ris.bka.gv.at/GeltendeFassung.wxe?Abfrage=Bundesnormen&Gesetzesnummer=10002665',
|
||||
// Schweiz
|
||||
CH_DSV: 'https://www.fedlex.admin.ch/eli/cc/2022/568/de',
|
||||
CH_OR_AGB: 'https://www.fedlex.admin.ch/eli/cc/27/317_321_377/de',
|
||||
CH_UWG: 'https://www.fedlex.admin.ch/eli/cc/1988/223_223_223/de',
|
||||
CH_FMG: 'https://www.fedlex.admin.ch/eli/cc/1997/2187_2187_2187/de',
|
||||
CH_GEBUV: 'https://www.fedlex.admin.ch/eli/cc/2002/249/de',
|
||||
CH_ZERTES: 'https://www.fedlex.admin.ch/eli/cc/2016/752/de',
|
||||
CH_ZGB_PERS: 'https://www.fedlex.admin.ch/eli/cc/24/233_245_233/de',
|
||||
// Industrie-Compliance
|
||||
ENISA_SECURE_BY_DESIGN: 'https://www.enisa.europa.eu/publications/secure-development-best-practices',
|
||||
ENISA_SUPPLY_CHAIN: 'https://www.enisa.europa.eu/publications/threat-landscape-for-supply-chain-attacks',
|
||||
NIST_SSDF: 'https://csrc.nist.gov/pubs/sp/800/218/final',
|
||||
NIST_CSF_2: 'https://www.nist.gov/cyberframework',
|
||||
OECD_AI_PRINCIPLES: 'https://legalinstruments.oecd.org/en/instruments/OECD-LEGAL-0449',
|
||||
// IFRS / EFRAG
|
||||
EU_IFRS_DE: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32023R1803',
|
||||
EU_IFRS_EN: 'https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX:32023R1803',
|
||||
EFRAG_ENDORSEMENT: 'https://www.efrag.org/activities/endorsement-status-report',
|
||||
// Full-text Datenschutzgesetz AT
|
||||
AT_DSG_FULL: 'https://www.ris.bka.gv.at/GeltendeFassung.wxe?Abfrage=Bundesnormen&Gesetzesnummer=10001597',
|
||||
}
|
||||
|
||||
// License info for each regulation
|
||||
const REGULATION_LICENSES: Record<string, { license: string; licenseNote: string }> = {
|
||||
GDPR: { license: 'PUBLIC_DOMAIN', licenseNote: 'Amtliches Werk der EU — frei verwendbar' },
|
||||
@@ -1321,18 +1063,6 @@ const REGULATION_LICENSES: Record<string, { license: string; licenseNote: string
|
||||
EDPB_GUIDELINES_3_2019: { license: 'EDPB-LICENSE', licenseNote: 'EDPB Document License' },
|
||||
EDPB_GUIDELINES_5_2020: { license: 'EDPB-LICENSE', licenseNote: 'EDPB Document License' },
|
||||
EDPB_GUIDELINES_7_2020: { license: 'EDPB-LICENSE', licenseNote: 'EDPB Document License' },
|
||||
// Industrie-Compliance (2026-02-28)
|
||||
MACHINERY_REG: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Verordnung — amtliches Werk' },
|
||||
BLUE_GUIDE: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Leitfaden — amtliches Werk der Kommission' },
|
||||
ENISA_SECURE_BY_DESIGN: { license: 'CC-BY-4.0', licenseNote: 'ENISA Publication — CC BY 4.0' },
|
||||
ENISA_SUPPLY_CHAIN: { license: 'CC-BY-4.0', licenseNote: 'ENISA Publication — CC BY 4.0' },
|
||||
NIST_SSDF: { license: 'PUBLIC_DOMAIN', licenseNote: 'US Government Work — Public Domain' },
|
||||
NIST_CSF_2: { license: 'PUBLIC_DOMAIN', licenseNote: 'US Government Work — Public Domain' },
|
||||
OECD_AI_PRINCIPLES: { license: 'PUBLIC_DOMAIN', licenseNote: 'OECD Legal Instrument — Reuse Notice' },
|
||||
// EU-IFRS / EFRAG (2026-02-28)
|
||||
EU_IFRS_DE: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Verordnung — amtliches Werk' },
|
||||
EU_IFRS_EN: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Verordnung — amtliches Werk' },
|
||||
EFRAG_ENDORSEMENT: { license: 'PUBLIC_DOMAIN', licenseNote: 'EFRAG — oeffentliches Dokument' },
|
||||
// DACH National Laws — Deutschland
|
||||
DE_DDG: { license: 'PUBLIC_DOMAIN', licenseNote: 'Deutsches Bundesgesetz — amtliches Werk (§5 UrhG)' },
|
||||
DE_BGB_AGB: { license: 'PUBLIC_DOMAIN', licenseNote: 'Deutsches Bundesgesetz — amtliches Werk (§5 UrhG)' },
|
||||
@@ -1369,35 +1099,6 @@ const REGULATION_LICENSES: Record<string, { license: string; licenseNote: string
|
||||
LU_DPA_LAW: { license: 'PUBLIC_DOMAIN', licenseNote: 'Amtliches Werk Luxemburg — frei verwendbar' },
|
||||
DK_DATABESKYTTELSESLOVEN: { license: 'PUBLIC_DOMAIN', licenseNote: 'Amtliches Werk Daenemark — frei verwendbar' },
|
||||
EDPB_GUIDELINES_1_2022: { license: 'EDPB-LICENSE', licenseNote: 'EDPB Document License' },
|
||||
// Neue EU-Richtlinien (Februar 2026 ingestiert)
|
||||
E_COMMERCE_RL: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Richtlinie — amtliches Werk' },
|
||||
VERBRAUCHERRECHTE_RL: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Richtlinie — amtliches Werk' },
|
||||
DIGITALE_INHALTE_RL: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Richtlinie — amtliches Werk' },
|
||||
DMA: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Verordnung — amtliches Werk' },
|
||||
}
|
||||
|
||||
// REGULATIONS_IN_RAG is imported from ./rag-constants.ts
|
||||
|
||||
// Helper: Check if regulation is in RAG
|
||||
const isInRag = (code: string): boolean => code in REGULATIONS_IN_RAG
|
||||
|
||||
// Helper: Get known chunk count for a regulation
|
||||
const getKnownChunks = (code: string): number => REGULATIONS_IN_RAG[code]?.chunks || 0
|
||||
|
||||
// Known collection totals (updated: 2026-03-12)
|
||||
// Note: bp_compliance_datenschutz expanded via edpb-crawler.py (55 EDPB/WP29/EDPS documents).
|
||||
// bp_dsfa_corpus expanded with 20 DSFA Muss-Listen (BfDI + DSK + 16 Bundeslaender).
|
||||
// bp_compliance_gesetze: +5263 Chunks durch Phase H Verbraucherschutz (Run #701, inkl. BDSG/DDG/TKG/HGB/AO Duplikate)
|
||||
const COLLECTION_TOTALS = {
|
||||
bp_compliance_gesetze: 63567, // 58304 + 5263 (Phase H)
|
||||
bp_compliance_ce: 18183,
|
||||
bp_legal_templates: 7689,
|
||||
bp_compliance_datenschutz: 17459,
|
||||
bp_dsfa_corpus: 8666,
|
||||
bp_compliance_recht: 1425,
|
||||
bp_nibis_eh: 7996,
|
||||
total_legal: 81750, // gesetze + ce
|
||||
total_all: 124985,
|
||||
}
|
||||
|
||||
// License display labels
|
||||
@@ -1451,10 +1152,7 @@ const INDUSTRY_REGULATION_MAP: Record<string, string[]> = {
|
||||
all: ['GDPR', 'EPRIVACY', 'TDDDG'],
|
||||
health: ['GDPR', 'TDDDG', 'BSI-TR-03161-1', 'BSI-TR-03161-2', 'BSI-TR-03161-3', 'NIS2', 'AIACT', 'PLD', 'EHDS'],
|
||||
finance: ['GDPR', 'TDDDG', 'NIS2', 'EUCSA', 'DSA', 'AIACT', 'DPF', 'DORA', 'PSD2', 'AMLR', 'MiCA'],
|
||||
ecommerce: ['GDPR', 'TDDDG', 'DSA', 'GPSR', 'EAA', 'PLD', 'DPF', 'PSD2',
|
||||
'DE_PANGV', 'DE_VSBG', 'DE_PRODHAFTG', 'DE_VERPACKG', 'DE_ELEKTROG', 'DE_BFSG', 'DE_UWG',
|
||||
'E_COMMERCE_RL', 'VERBRAUCHERRECHTE_RL', 'WARENKAUF_RL', 'KLAUSEL_RL', 'UNLAUTERE_PRAKTIKEN_RL',
|
||||
'PREISANGABEN_RL', 'OMNIBUS_RL', 'DIGITALE_INHALTE_RL'],
|
||||
ecommerce: ['GDPR', 'TDDDG', 'DSA', 'GPSR', 'EAA', 'PLD', 'DPF', 'PSD2'],
|
||||
tech: ['GDPR', 'TDDDG', 'CRA', 'AIACT', 'DPF', 'SCC', 'DATAACT', 'DSM', 'MiCA'],
|
||||
iot: ['GDPR', 'CRA', 'GPSR', 'PLD', 'DATAACT', 'AIACT'],
|
||||
ai: ['GDPR', 'AIACT', 'PLD', 'DSM', 'DATAACT'],
|
||||
@@ -1514,15 +1212,6 @@ const THEMATIC_GROUPS = [
|
||||
regulations: ['EHDS', 'BSI-TR-03161-1', 'BSI-TR-03161-2', 'BSI-TR-03161-3'],
|
||||
description: 'Gesundheitsdatenraum, DiGA-Sicherheit, Patientenrechte'
|
||||
},
|
||||
{
|
||||
id: 'verbraucherschutz',
|
||||
name: 'Verbraucherschutz & E-Commerce',
|
||||
color: 'bg-amber-500',
|
||||
regulations: ['DE_PANGV', 'DE_VSBG', 'DE_PRODHAFTG', 'DE_UWG', 'DE_BFSG',
|
||||
'WARENKAUF_RL', 'KLAUSEL_RL', 'UNLAUTERE_PRAKTIKEN_RL', 'PREISANGABEN_RL',
|
||||
'OMNIBUS_RL', 'E_COMMERCE_RL', 'VERBRAUCHERRECHTE_RL', 'DIGITALE_INHALTE_RL'],
|
||||
description: 'Widerrufsrecht, Preisangaben, Fernabsatz, AGB-Recht, Barrierefreiheit'
|
||||
},
|
||||
]
|
||||
|
||||
// Key overlaps and intersections
|
||||
@@ -1755,8 +1444,6 @@ export default function RAGPage() {
|
||||
const [autoRefresh, setAutoRefresh] = useState(true)
|
||||
const [elapsedTime, setElapsedTime] = useState<string>('')
|
||||
|
||||
// Chunk browser state is now in ChunkBrowserQA component
|
||||
|
||||
// DSFA corpus state
|
||||
const [dsfaSources, setDsfaSources] = useState<DsfaSource[]>([])
|
||||
const [dsfaStatus, setDsfaStatus] = useState<DsfaCorpusStatus | null>(null)
|
||||
@@ -2002,8 +1689,6 @@ export default function RAGPage() {
|
||||
return () => clearInterval(interval)
|
||||
}, [pipelineState?.started_at, pipelineState?.status])
|
||||
|
||||
// Chunk browser functions are now in ChunkBrowserQA component
|
||||
|
||||
const handleSearch = async () => {
|
||||
if (!searchQuery.trim()) return
|
||||
|
||||
@@ -2089,7 +1774,6 @@ export default function RAGPage() {
|
||||
{ id: 'regulations' as TabId, name: 'Regulierungen', icon: '📜' },
|
||||
{ id: 'map' as TabId, name: 'Landkarte', icon: '🗺️' },
|
||||
{ id: 'search' as TabId, name: 'Suche', icon: '🔍' },
|
||||
{ id: 'chunks' as TabId, name: 'Chunk-Browser', icon: '🧩' },
|
||||
{ id: 'data' as TabId, name: 'Daten', icon: '📁' },
|
||||
{ id: 'ingestion' as TabId, name: 'Ingestion', icon: '⚙️' },
|
||||
{ id: 'pipeline' as TabId, name: 'Pipeline', icon: '🔄' },
|
||||
@@ -2120,7 +1804,7 @@ export default function RAGPage() {
|
||||
{/* Page Purpose */}
|
||||
<PagePurpose
|
||||
title="Daten & RAG"
|
||||
purpose={`Verwalten und durchsuchen Sie 7 RAG-Collections mit ${REGULATIONS.length} Regulierungen (${Object.keys(REGULATIONS_IN_RAG).length} im RAG). Legal Corpus, DSFA Corpus (70+ Quellen), NiBiS EH (Bildungsinhalte) und Legal Templates. Teil der KI-Daten-Pipeline fuer Compliance und Klausur-Korrektur.`}
|
||||
purpose="Verwalten und durchsuchen Sie 4 RAG-Collections: Legal Corpus (24 Regulierungen), DSFA Corpus (70+ Quellen inkl. internationaler Datenschutzgesetze), NiBiS EH (Bildungsinhalte) und Legal Templates (Dokumentvorlagen). Teil der KI-Daten-Pipeline fuer Compliance und Klausur-Korrektur."
|
||||
audience={['DSB', 'Compliance Officer', 'Entwickler']}
|
||||
gdprArticles={['§5 UrhG (Amtliche Werke)', 'Art. 5 DSGVO (Rechenschaftspflicht)']}
|
||||
architecture={{
|
||||
@@ -2142,8 +1826,8 @@ export default function RAGPage() {
|
||||
<div className="grid grid-cols-2 md:grid-cols-4 gap-4 mb-6">
|
||||
<div className="bg-white rounded-xl p-4 border border-slate-200">
|
||||
<p className="text-xs font-medium text-blue-600 uppercase mb-1">Legal Corpus</p>
|
||||
<p className="text-2xl font-bold text-slate-900">{COLLECTION_TOTALS.total_legal.toLocaleString()}</p>
|
||||
<p className="text-xs text-slate-500">Chunks · {Object.keys(REGULATIONS_IN_RAG).length}/{REGULATIONS.length} im RAG</p>
|
||||
<p className="text-2xl font-bold text-slate-900">{loading ? '-' : getTotalChunks().toLocaleString()}</p>
|
||||
<p className="text-xs text-slate-500">Chunks · {REGULATIONS.length} Regulierungen</p>
|
||||
</div>
|
||||
<div className="bg-white rounded-xl p-4 border border-slate-200">
|
||||
<p className="text-xs font-medium text-purple-600 uppercase mb-1">DSFA Corpus</p>
|
||||
@@ -2152,12 +1836,12 @@ export default function RAGPage() {
|
||||
</div>
|
||||
<div className="bg-white rounded-xl p-4 border border-slate-200">
|
||||
<p className="text-xs font-medium text-emerald-600 uppercase mb-1">NiBiS EH</p>
|
||||
<p className="text-2xl font-bold text-slate-900">7.996</p>
|
||||
<p className="text-2xl font-bold text-slate-900">28.662</p>
|
||||
<p className="text-xs text-slate-500">Chunks · Bildungs-Erwartungshorizonte</p>
|
||||
</div>
|
||||
<div className="bg-white rounded-xl p-4 border border-slate-200">
|
||||
<p className="text-xs font-medium text-orange-600 uppercase mb-1">Legal Templates</p>
|
||||
<p className="text-2xl font-bold text-slate-900">7.689</p>
|
||||
<p className="text-2xl font-bold text-slate-900">824</p>
|
||||
<p className="text-xs text-slate-500">Chunks · Dokumentvorlagen</p>
|
||||
</div>
|
||||
</div>
|
||||
@@ -2192,8 +1876,8 @@ export default function RAGPage() {
|
||||
className="p-4 rounded-lg border border-blue-200 bg-blue-50 hover:bg-blue-100 transition-colors text-left"
|
||||
>
|
||||
<p className="text-xs font-medium text-blue-600 uppercase">Gesetze & Regulierungen</p>
|
||||
<p className="text-2xl font-bold text-slate-900 mt-1">{COLLECTION_TOTALS.total_legal.toLocaleString()}</p>
|
||||
<p className="text-xs text-slate-500 mt-1">{Object.keys(REGULATIONS_IN_RAG).length}/{REGULATIONS.length} im RAG</p>
|
||||
<p className="text-2xl font-bold text-slate-900 mt-1">{loading ? '-' : getTotalChunks().toLocaleString()}</p>
|
||||
<p className="text-xs text-slate-500 mt-1">{REGULATIONS.length} Regulierungen (EU, DE, BSI)</p>
|
||||
</button>
|
||||
<button
|
||||
onClick={() => { setRegulationCategory('dsfa'); setActiveTab('regulations') }}
|
||||
@@ -2205,12 +1889,12 @@ export default function RAGPage() {
|
||||
</button>
|
||||
<div className="p-4 rounded-lg border border-emerald-200 bg-emerald-50 text-left">
|
||||
<p className="text-xs font-medium text-emerald-600 uppercase">NiBiS EH</p>
|
||||
<p className="text-2xl font-bold text-slate-900 mt-1">7.996</p>
|
||||
<p className="text-2xl font-bold text-slate-900 mt-1">28.662</p>
|
||||
<p className="text-xs text-slate-500 mt-1">Chunks · Bildungs-Erwartungshorizonte</p>
|
||||
</div>
|
||||
<div className="p-4 rounded-lg border border-orange-200 bg-orange-50 text-left">
|
||||
<p className="text-xs font-medium text-orange-600 uppercase">Legal Templates</p>
|
||||
<p className="text-2xl font-bold text-slate-900 mt-1">7.689</p>
|
||||
<p className="text-2xl font-bold text-slate-900 mt-1">824</p>
|
||||
<p className="text-xs text-slate-500 mt-1">Chunks · Dokumentvorlagen (VVT, TOM, DSFA)</p>
|
||||
</div>
|
||||
</div>
|
||||
@@ -2220,13 +1904,12 @@ export default function RAGPage() {
|
||||
<div className="grid grid-cols-1 md:grid-cols-4 gap-4">
|
||||
{Object.entries(TYPE_LABELS).map(([type, label]) => {
|
||||
const regs = REGULATIONS.filter((r) => r.type === type)
|
||||
const inRagCount = regs.filter((r) => isInRag(r.code)).length
|
||||
const totalChunks = regs.reduce((sum, r) => sum + getKnownChunks(r.code), 0)
|
||||
const totalChunks = regs.reduce((sum, r) => sum + getRegulationChunks(r.code), 0)
|
||||
return (
|
||||
<div key={type} className="bg-white rounded-xl p-4 border border-slate-200">
|
||||
<div className="flex items-center gap-2 mb-2">
|
||||
<span className={`px-2 py-0.5 text-xs rounded ${TYPE_COLORS[type]}`}>{label}</span>
|
||||
<span className="text-slate-500 text-sm">{inRagCount}/{regs.length} im RAG</span>
|
||||
<span className="text-slate-500 text-sm">{regs.length} Dok.</span>
|
||||
</div>
|
||||
<p className="text-xl font-bold text-slate-900">{totalChunks.toLocaleString()} Chunks</p>
|
||||
</div>
|
||||
@@ -2240,25 +1923,20 @@ export default function RAGPage() {
|
||||
<h3 className="font-semibold text-slate-900">Top Regulierungen (nach Chunks)</h3>
|
||||
</div>
|
||||
<div className="divide-y">
|
||||
{[...REGULATIONS].sort((a, b) => getKnownChunks(b.code) - getKnownChunks(a.code))
|
||||
.slice(0, 10)
|
||||
{REGULATIONS.sort((a, b) => getRegulationChunks(b.code) - getRegulationChunks(a.code))
|
||||
.slice(0, 5)
|
||||
.map((reg) => {
|
||||
const chunks = getKnownChunks(reg.code)
|
||||
const chunks = getRegulationChunks(reg.code)
|
||||
return (
|
||||
<div key={reg.code} className="px-4 py-3 flex items-center justify-between">
|
||||
<div className="flex items-center gap-3">
|
||||
{isInRag(reg.code) ? (
|
||||
<span className="text-green-500 text-sm">✓</span>
|
||||
) : (
|
||||
<span className="text-red-400 text-sm">✗</span>
|
||||
)}
|
||||
<span className={`px-2 py-0.5 text-xs rounded ${TYPE_COLORS[reg.type]}`}>
|
||||
{TYPE_LABELS[reg.type]}
|
||||
</span>
|
||||
<span className="font-medium text-slate-900">{reg.name}</span>
|
||||
<span className="text-slate-500 text-sm">({reg.code})</span>
|
||||
</div>
|
||||
<span className={`font-bold ${chunks > 0 ? 'text-teal-600' : 'text-slate-300'}`}>{chunks > 0 ? chunks.toLocaleString() + ' Chunks' : '—'}</span>
|
||||
<span className="font-bold text-teal-600">{chunks.toLocaleString()} Chunks</span>
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
@@ -2317,13 +1995,7 @@ export default function RAGPage() {
|
||||
{regulationCategory === 'regulations' && (
|
||||
<div className="bg-white rounded-xl border border-slate-200 overflow-hidden">
|
||||
<div className="px-4 py-3 border-b bg-slate-50 flex items-center justify-between">
|
||||
<h3 className="font-semibold text-slate-900">
|
||||
Alle {REGULATIONS.length} Regulierungen
|
||||
<span className="ml-2 text-sm font-normal text-slate-500">
|
||||
({REGULATIONS.filter(r => isInRag(r.code)).length} im RAG,{' '}
|
||||
{REGULATIONS.filter(r => !isInRag(r.code)).length} ausstehend)
|
||||
</span>
|
||||
</h3>
|
||||
<h3 className="font-semibold text-slate-900">Alle {REGULATIONS.length} Regulierungen</h3>
|
||||
<button
|
||||
onClick={fetchStatus}
|
||||
className="text-sm text-teal-600 hover:text-teal-700"
|
||||
@@ -2335,7 +2007,6 @@ export default function RAGPage() {
|
||||
<table className="w-full">
|
||||
<thead className="bg-slate-50 border-b">
|
||||
<tr>
|
||||
<th className="px-4 py-3 text-center text-xs font-medium text-slate-500 uppercase w-12">RAG</th>
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-slate-500 uppercase">Code</th>
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-slate-500 uppercase">Typ</th>
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-slate-500 uppercase">Name</th>
|
||||
@@ -2346,10 +2017,17 @@ export default function RAGPage() {
|
||||
</thead>
|
||||
<tbody className="divide-y">
|
||||
{REGULATIONS.map((reg) => {
|
||||
const chunks = getKnownChunks(reg.code)
|
||||
const inRag = isInRag(reg.code)
|
||||
let statusColor = inRag ? 'text-green-500' : 'text-red-500'
|
||||
let statusIcon = inRag ? '✓' : '❌'
|
||||
const chunks = getRegulationChunks(reg.code)
|
||||
const ratio = chunks / (reg.expected * 10) // Rough estimate: 10 chunks per requirement
|
||||
let statusColor = 'text-red-500'
|
||||
let statusIcon = '❌'
|
||||
if (ratio > 0.5) {
|
||||
statusColor = 'text-green-500'
|
||||
statusIcon = '✓'
|
||||
} else if (ratio > 0.1) {
|
||||
statusColor = 'text-yellow-500'
|
||||
statusIcon = '⚠'
|
||||
}
|
||||
const isExpanded = expandedRegulation === reg.code
|
||||
|
||||
return (
|
||||
@@ -2358,13 +2036,6 @@ export default function RAGPage() {
|
||||
onClick={() => setExpandedRegulation(isExpanded ? null : reg.code)}
|
||||
className="hover:bg-slate-50 cursor-pointer transition-colors"
|
||||
>
|
||||
<td className="px-4 py-3 text-center">
|
||||
{isInRag(reg.code) ? (
|
||||
<span className="inline-flex items-center justify-center w-6 h-6 bg-green-100 text-green-600 rounded-full text-xs font-bold" title="Im RAG vorhanden">✓</span>
|
||||
) : (
|
||||
<span className="inline-flex items-center justify-center w-6 h-6 bg-red-50 text-red-400 rounded-full text-xs font-bold" title="Nicht im RAG">✗</span>
|
||||
)}
|
||||
</td>
|
||||
<td className="px-4 py-3 font-mono font-medium text-teal-600">
|
||||
<span className="inline-flex items-center gap-2">
|
||||
<span className={`transform transition-transform ${isExpanded ? 'rotate-90' : ''}`}>▶</span>
|
||||
@@ -2377,20 +2048,13 @@ export default function RAGPage() {
|
||||
</span>
|
||||
</td>
|
||||
<td className="px-4 py-3 text-slate-900">{reg.name}</td>
|
||||
<td className="px-4 py-3 text-right font-bold">
|
||||
<span className={chunks > 0 && chunks < 10 && reg.expected >= 10 ? 'text-amber-600' : ''}>
|
||||
{chunks.toLocaleString()}
|
||||
{chunks > 0 && chunks < 10 && reg.expected >= 10 && (
|
||||
<span className="ml-1 inline-block w-4 h-4 text-[10px] leading-4 text-center bg-amber-100 text-amber-700 rounded-full" title="Verdaechtig niedrig — Ingestion pruefen">⚠</span>
|
||||
)}
|
||||
</span>
|
||||
</td>
|
||||
<td className="px-4 py-3 text-right font-bold">{chunks.toLocaleString()}</td>
|
||||
<td className="px-4 py-3 text-right text-slate-500">{reg.expected}</td>
|
||||
<td className={`px-4 py-3 text-center ${statusColor}`}>{statusIcon}</td>
|
||||
</tr>
|
||||
{isExpanded && (
|
||||
<tr key={`${reg.code}-detail`} className="bg-slate-50">
|
||||
<td colSpan={7} className="px-4 py-4">
|
||||
<td colSpan={6} className="px-4 py-4">
|
||||
<div className="bg-white rounded-lg border border-slate-200 p-4 space-y-3">
|
||||
<div>
|
||||
<h4 className="font-semibold text-slate-900 mb-1">{reg.fullName}</h4>
|
||||
@@ -2430,28 +2094,16 @@ export default function RAGPage() {
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
<div className="flex items-center gap-3">
|
||||
{REGULATION_SOURCES[reg.code] && (
|
||||
<a
|
||||
href={REGULATION_SOURCES[reg.code]}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
onClick={(e) => e.stopPropagation()}
|
||||
className="text-blue-600 hover:text-blue-700 font-medium"
|
||||
>
|
||||
Originalquelle →
|
||||
</a>
|
||||
)}
|
||||
<button
|
||||
onClick={(e) => {
|
||||
e.stopPropagation()
|
||||
setActiveTab('chunks')
|
||||
}}
|
||||
className="text-teal-600 hover:text-teal-700 font-medium"
|
||||
>
|
||||
In Chunks suchen →
|
||||
</button>
|
||||
</div>
|
||||
<button
|
||||
onClick={(e) => {
|
||||
e.stopPropagation()
|
||||
setSearchQuery(reg.name)
|
||||
setActiveTab('search')
|
||||
}}
|
||||
className="text-teal-600 hover:text-teal-700 font-medium"
|
||||
>
|
||||
In Chunks suchen →
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</td>
|
||||
@@ -2580,7 +2232,7 @@ export default function RAGPage() {
|
||||
<div className="grid grid-cols-3 gap-4 mb-4">
|
||||
<div className="bg-emerald-50 rounded-lg p-4 border border-emerald-200">
|
||||
<p className="text-sm text-emerald-600 font-medium">Chunks</p>
|
||||
<p className="text-2xl font-bold text-slate-900">7.996</p>
|
||||
<p className="text-2xl font-bold text-slate-900">28.662</p>
|
||||
</div>
|
||||
<div className="bg-emerald-50 rounded-lg p-4 border border-emerald-200">
|
||||
<p className="text-sm text-emerald-600 font-medium">Vector Size</p>
|
||||
@@ -2612,7 +2264,7 @@ export default function RAGPage() {
|
||||
<div className="grid grid-cols-3 gap-4 mb-4">
|
||||
<div className="bg-orange-50 rounded-lg p-4 border border-orange-200">
|
||||
<p className="text-sm text-orange-600 font-medium">Chunks</p>
|
||||
<p className="text-2xl font-bold text-slate-900">7.689</p>
|
||||
<p className="text-2xl font-bold text-slate-900">824</p>
|
||||
</div>
|
||||
<div className="bg-orange-50 rounded-lg p-4 border border-orange-200">
|
||||
<p className="text-sm text-orange-600 font-medium">Vector Size</p>
|
||||
@@ -2680,28 +2332,20 @@ export default function RAGPage() {
|
||||
</div>
|
||||
</div>
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-3">
|
||||
{regs.map((reg) => {
|
||||
const regInRag = isInRag(reg.code)
|
||||
return (
|
||||
{regs.map((reg) => (
|
||||
<div
|
||||
key={reg.code}
|
||||
className={`bg-white p-3 rounded-lg border ${regInRag ? 'border-green-200' : 'border-slate-200'}`}
|
||||
className="bg-white p-3 rounded-lg border border-slate-200"
|
||||
>
|
||||
<div className="flex items-center gap-2 mb-1">
|
||||
<span className={`px-2 py-0.5 text-xs rounded ${TYPE_COLORS[reg.type]}`}>
|
||||
{reg.code}
|
||||
</span>
|
||||
{regInRag ? (
|
||||
<span className="px-1.5 py-0.5 text-[10px] font-bold bg-green-100 text-green-600 rounded">RAG</span>
|
||||
) : (
|
||||
<span className="px-1.5 py-0.5 text-[10px] font-bold bg-red-50 text-red-400 rounded">✗</span>
|
||||
)}
|
||||
</div>
|
||||
<div className="font-medium text-sm text-slate-900">{reg.name}</div>
|
||||
<div className="text-xs text-slate-500 mt-1 line-clamp-2">{reg.description}</div>
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
))}
|
||||
</div>
|
||||
</>
|
||||
)
|
||||
@@ -2728,22 +2372,17 @@ export default function RAGPage() {
|
||||
<div className="flex flex-wrap gap-2">
|
||||
{group.regulations.map((code) => {
|
||||
const reg = REGULATIONS.find(r => r.code === code)
|
||||
const codeInRag = isInRag(code)
|
||||
return (
|
||||
<span
|
||||
key={code}
|
||||
className={`px-3 py-1.5 rounded-full text-sm font-medium cursor-pointer ${
|
||||
codeInRag
|
||||
? 'bg-green-100 text-green-700 hover:bg-green-200'
|
||||
: 'bg-slate-100 text-slate-700 hover:bg-slate-200'
|
||||
}`}
|
||||
className="px-3 py-1.5 bg-slate-100 rounded-full text-sm font-medium text-slate-700 hover:bg-slate-200 cursor-pointer"
|
||||
onClick={() => {
|
||||
setActiveTab('regulations')
|
||||
setExpandedRegulation(code)
|
||||
}}
|
||||
title={`${reg?.fullName || code}${codeInRag ? ' (im RAG)' : ' (nicht im RAG)'}`}
|
||||
title={reg?.fullName || code}
|
||||
>
|
||||
{codeInRag ? '✓ ' : '✗ '}{code}
|
||||
{code}
|
||||
</span>
|
||||
)
|
||||
})}
|
||||
@@ -2767,13 +2406,9 @@ export default function RAGPage() {
|
||||
{intersection.regulations.map((code) => (
|
||||
<span
|
||||
key={code}
|
||||
className={`px-2 py-0.5 text-xs font-medium rounded ${
|
||||
isInRag(code)
|
||||
? 'bg-green-100 text-green-700'
|
||||
: 'bg-red-50 text-red-500'
|
||||
}`}
|
||||
className="px-2 py-0.5 text-xs font-medium bg-teal-100 text-teal-700 rounded"
|
||||
>
|
||||
{isInRag(code) ? '✓ ' : '✗ '}{code}
|
||||
{code}
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
@@ -2808,15 +2443,8 @@ export default function RAGPage() {
|
||||
<tbody className="divide-y">
|
||||
{REGULATIONS.map((reg) => (
|
||||
<tr key={reg.code} className="hover:bg-slate-50">
|
||||
<td className="px-2 py-2 font-medium sticky left-0 bg-white">
|
||||
<span className="flex items-center gap-1">
|
||||
{isInRag(reg.code) ? (
|
||||
<span className="text-green-500 text-[10px]">●</span>
|
||||
) : (
|
||||
<span className="text-red-300 text-[10px]">○</span>
|
||||
)}
|
||||
<span className="text-teal-600">{reg.code}</span>
|
||||
</span>
|
||||
<td className="px-2 py-2 font-medium text-teal-600 sticky left-0 bg-white">
|
||||
{reg.code}
|
||||
</td>
|
||||
{INDUSTRIES.filter(i => i.id !== 'all').map((industry) => {
|
||||
const applies = INDUSTRY_REGULATION_MAP[industry.id]?.includes(reg.code)
|
||||
@@ -2903,32 +2531,26 @@ export default function RAGPage() {
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* RAG Coverage Overview */}
|
||||
{/* Integrated Regulations */}
|
||||
<div className="bg-white rounded-xl border border-slate-200 p-6">
|
||||
<div className="flex items-center gap-3 mb-4">
|
||||
<span className="text-2xl">✅</span>
|
||||
<div>
|
||||
<h3 className="font-semibold text-slate-900">RAG-Abdeckung ({Object.keys(REGULATIONS_IN_RAG).length} von {REGULATIONS.length} Regulierungen)</h3>
|
||||
<p className="text-sm text-slate-500">Stand: Maerz 2026 — Alle im RAG-System verfuegbaren Regulierungen (inkl. Verbraucherschutz Phase H)</p>
|
||||
<h3 className="font-semibold text-slate-900">Neu integrierte Regulierungen</h3>
|
||||
<p className="text-sm text-slate-500">Jetzt im RAG-System verfuegbar (Stand: Januar 2025)</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="flex flex-wrap gap-2">
|
||||
{REGULATIONS.filter(r => isInRag(r.code)).map((reg) => (
|
||||
<span key={reg.code} className="px-2.5 py-1 text-xs font-medium bg-green-100 text-green-700 rounded-full border border-green-200">
|
||||
✓ {reg.code}
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
<div className="mt-4 pt-4 border-t border-slate-100">
|
||||
<p className="text-xs font-medium text-slate-500 mb-2">Noch nicht im RAG:</p>
|
||||
<div className="flex flex-wrap gap-2">
|
||||
{REGULATIONS.filter(r => !isInRag(r.code)).map((reg) => (
|
||||
<span key={reg.code} className="px-2.5 py-1 text-xs font-medium bg-red-50 text-red-400 rounded-full border border-red-100">
|
||||
✗ {reg.code}
|
||||
<div className="grid grid-cols-2 md:grid-cols-5 gap-3">
|
||||
{INTEGRATED_REGULATIONS.map((reg) => (
|
||||
<div key={reg.code} className="rounded-lg border border-green-200 bg-green-50 p-3 text-center">
|
||||
<span className="px-2 py-1 text-sm font-bold bg-green-100 text-green-700 rounded">
|
||||
{reg.code}
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
<p className="text-xs text-slate-600 mt-2">{reg.name}</p>
|
||||
<p className="text-xs text-green-600 mt-1">Im RAG</p>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -3092,10 +2714,6 @@ export default function RAGPage() {
|
||||
</div>
|
||||
)}
|
||||
|
||||
{activeTab === 'chunks' && (
|
||||
<ChunkBrowserQA apiProxy={API_PROXY} />
|
||||
)}
|
||||
|
||||
{activeTab === 'data' && (
|
||||
<div className="space-y-6">
|
||||
{/* Upload Document */}
|
||||
@@ -3281,7 +2899,7 @@ export default function RAGPage() {
|
||||
<span className="flex items-center gap-2 text-teal-600">
|
||||
<svg className="animate-spin h-4 w-4" fill="none" viewBox="0 0 24 24">
|
||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.7.689 3 7.938l3-2.647z" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
|
||||
</svg>
|
||||
Ingestion laeuft...
|
||||
</span>
|
||||
@@ -3351,7 +2969,7 @@ export default function RAGPage() {
|
||||
{pipelineStarting ? (
|
||||
<svg className="animate-spin h-4 w-4" fill="none" viewBox="0 0 24 24">
|
||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.7.689 3 7.938l3-2.647z" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
|
||||
</svg>
|
||||
) : (
|
||||
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
@@ -3370,7 +2988,7 @@ export default function RAGPage() {
|
||||
{pipelineLoading ? (
|
||||
<svg className="animate-spin h-4 w-4" fill="none" viewBox="0 0 24 24">
|
||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.7.689 3 7.938l3-2.647z" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
|
||||
</svg>
|
||||
) : (
|
||||
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
@@ -3403,7 +3021,7 @@ export default function RAGPage() {
|
||||
<>
|
||||
<svg className="animate-spin h-5 w-5" fill="none" viewBox="0 0 24 24">
|
||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.7.689 3 7.938l3-2.647z" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
|
||||
</svg>
|
||||
Startet...
|
||||
</>
|
||||
@@ -3440,7 +3058,7 @@ export default function RAGPage() {
|
||||
{pipelineState.status === 'running' && (
|
||||
<svg className="w-6 h-6 text-blue-600 animate-spin" fill="none" viewBox="0 0 24 24">
|
||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.7.689 3 7.938l3-2.647z" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
|
||||
</svg>
|
||||
)}
|
||||
{pipelineState.status === 'failed' && (
|
||||
|
||||
@@ -1,414 +0,0 @@
|
||||
/**
|
||||
* Shared RAG constants used by both page.tsx and ChunkBrowserQA.
|
||||
* REGULATIONS_IN_RAG maps regulation codes to their Qdrant collection, chunk count, and qdrant_id.
|
||||
* The qdrant_id is the actual `regulation_id` value stored in Qdrant payloads.
|
||||
* REGULATION_INFO provides minimal metadata (code, name, type) for all regulations.
|
||||
*/
|
||||
|
||||
export interface RagRegulationEntry {
|
||||
collection: string
|
||||
chunks: number
|
||||
qdrant_id: string // The actual regulation_id value in Qdrant payload
|
||||
}
|
||||
|
||||
export const REGULATIONS_IN_RAG: Record<string, RagRegulationEntry> = {
|
||||
// === EU Verordnungen/Richtlinien (bp_compliance_ce) ===
|
||||
GDPR: { collection: 'bp_compliance_ce', chunks: 423, qdrant_id: 'eu_2016_679' },
|
||||
EPRIVACY: { collection: 'bp_compliance_ce', chunks: 134, qdrant_id: 'eu_2002_58' },
|
||||
SCC: { collection: 'bp_compliance_ce', chunks: 330, qdrant_id: 'eu_2021_914' },
|
||||
SCC_FULL_TEXT: { collection: 'bp_compliance_ce', chunks: 330, qdrant_id: 'eu_2021_914' },
|
||||
AIACT: { collection: 'bp_compliance_ce', chunks: 726, qdrant_id: 'eu_2024_1689' },
|
||||
CRA: { collection: 'bp_compliance_ce', chunks: 429, qdrant_id: 'eu_2024_2847' },
|
||||
NIS2: { collection: 'bp_compliance_ce', chunks: 342, qdrant_id: 'eu_2022_2555' },
|
||||
DGA: { collection: 'bp_compliance_ce', chunks: 508, qdrant_id: 'eu_2022_868' },
|
||||
DSA: { collection: 'bp_compliance_ce', chunks: 1106, qdrant_id: 'eu_2022_2065' },
|
||||
PLD: { collection: 'bp_compliance_ce', chunks: 44, qdrant_id: 'eu_1985_374' },
|
||||
E_COMMERCE_RL: { collection: 'bp_compliance_ce', chunks: 197, qdrant_id: 'eu_2000_31' },
|
||||
VERBRAUCHERRECHTE_RL: { collection: 'bp_compliance_ce', chunks: 266, qdrant_id: 'eu_2011_83' },
|
||||
DIGITALE_INHALTE_RL: { collection: 'bp_compliance_ce', chunks: 321, qdrant_id: 'eu_2019_770' },
|
||||
// Verbraucherschutz EU-Richtlinien (Phase H2 Ingestion)
|
||||
WARENKAUF_RL: { collection: 'bp_compliance_ce', chunks: 0, qdrant_id: 'sgd' },
|
||||
KLAUSEL_RL: { collection: 'bp_compliance_ce', chunks: 0, qdrant_id: 'uctd' },
|
||||
UNLAUTERE_PRAKTIKEN_RL: { collection: 'bp_compliance_ce', chunks: 0, qdrant_id: 'ucpd' },
|
||||
PREISANGABEN_RL: { collection: 'bp_compliance_ce', chunks: 0, qdrant_id: 'pid' },
|
||||
OMNIBUS_RL: { collection: 'bp_compliance_ce', chunks: 0, qdrant_id: 'omn' },
|
||||
BATTERIE_VO: { collection: 'bp_compliance_ce', chunks: 0, qdrant_id: 'battvo' },
|
||||
DMA: { collection: 'bp_compliance_ce', chunks: 701, qdrant_id: 'eu_2022_1925' },
|
||||
DPF: { collection: 'bp_compliance_ce', chunks: 2464, qdrant_id: 'dpf' },
|
||||
EUCSA: { collection: 'bp_compliance_ce', chunks: 558, qdrant_id: 'eucsa' },
|
||||
DATAACT: { collection: 'bp_compliance_ce', chunks: 809, qdrant_id: 'dataact' },
|
||||
DORA: { collection: 'bp_compliance_ce', chunks: 823, qdrant_id: 'dora' },
|
||||
PSD2: { collection: 'bp_compliance_ce', chunks: 796, qdrant_id: 'psd2' },
|
||||
AMLR: { collection: 'bp_compliance_ce', chunks: 1182, qdrant_id: 'amlr' },
|
||||
MiCA: { collection: 'bp_compliance_ce', chunks: 1640, qdrant_id: 'mica' },
|
||||
EHDS: { collection: 'bp_compliance_ce', chunks: 1212, qdrant_id: 'ehds' },
|
||||
EAA: { collection: 'bp_compliance_ce', chunks: 433, qdrant_id: 'eaa' },
|
||||
DSM: { collection: 'bp_compliance_ce', chunks: 416, qdrant_id: 'dsm' },
|
||||
GPSR: { collection: 'bp_compliance_ce', chunks: 509, qdrant_id: 'gpsr' },
|
||||
MACHINERY_REG: { collection: 'bp_compliance_ce', chunks: 1271, qdrant_id: 'eu_2023_1230' },
|
||||
BLUE_GUIDE: { collection: 'bp_compliance_ce', chunks: 2271, qdrant_id: 'eu_blue_guide_2022' },
|
||||
EU_IFRS_DE: { collection: 'bp_compliance_ce', chunks: 34388, qdrant_id: 'eu_2023_1803' },
|
||||
EU_IFRS_EN: { collection: 'bp_compliance_ce', chunks: 34388, qdrant_id: 'eu_2023_1803' },
|
||||
// International standards in bp_compliance_ce
|
||||
NIST_SSDF: { collection: 'bp_compliance_ce', chunks: 111, qdrant_id: 'nist_sp_800_218' },
|
||||
NIST_CSF_2: { collection: 'bp_compliance_ce', chunks: 67, qdrant_id: 'nist_csf_2_0' },
|
||||
OECD_AI_PRINCIPLES: { collection: 'bp_compliance_ce', chunks: 34, qdrant_id: 'oecd_ai_principles' },
|
||||
ENISA_SECURE_BY_DESIGN: { collection: 'bp_compliance_ce', chunks: 97, qdrant_id: 'cisa_secure_by_design' },
|
||||
ENISA_SUPPLY_CHAIN: { collection: 'bp_compliance_ce', chunks: 110, qdrant_id: 'enisa_supply_chain_good_practices' },
|
||||
ENISA_THREAT_LANDSCAPE: { collection: 'bp_compliance_ce', chunks: 118, qdrant_id: 'enisa_threat_landscape_supply_chain' },
|
||||
ENISA_ICS_SCADA: { collection: 'bp_compliance_ce', chunks: 195, qdrant_id: 'enisa_ics_scada_dependencies' },
|
||||
ENISA_CYBERSECURITY_2024: { collection: 'bp_compliance_ce', chunks: 22, qdrant_id: 'enisa_cybersecurity_state_2024' },
|
||||
|
||||
// === DE Gesetze (bp_compliance_gesetze) ===
|
||||
TDDDG: { collection: 'bp_compliance_gesetze', chunks: 5, qdrant_id: 'tdddg_25' },
|
||||
TMG_KOMPLETT: { collection: 'bp_compliance_gesetze', chunks: 108, qdrant_id: 'tmg_komplett' },
|
||||
BDSG_FULL: { collection: 'bp_compliance_gesetze', chunks: 1056, qdrant_id: 'bdsg_2018_komplett' },
|
||||
DE_DDG: { collection: 'bp_compliance_gesetze', chunks: 40, qdrant_id: 'ddg_5' },
|
||||
DE_BGB_AGB: { collection: 'bp_compliance_gesetze', chunks: 4024, qdrant_id: 'bgb_komplett' },
|
||||
DE_EGBGB: { collection: 'bp_compliance_gesetze', chunks: 36, qdrant_id: 'egbgb_widerruf' },
|
||||
DE_HGB_RET: { collection: 'bp_compliance_gesetze', chunks: 11363, qdrant_id: 'hgb_komplett' },
|
||||
DE_AO_RET: { collection: 'bp_compliance_gesetze', chunks: 9669, qdrant_id: 'ao_komplett' },
|
||||
DE_TKG: { collection: 'bp_compliance_gesetze', chunks: 1631, qdrant_id: 'de_tkg' },
|
||||
DE_DLINFOV: { collection: 'bp_compliance_gesetze', chunks: 21, qdrant_id: 'de_dlinfov' },
|
||||
DE_BETRVG: { collection: 'bp_compliance_gesetze', chunks: 498, qdrant_id: 'de_betrvg' },
|
||||
DE_GESCHGEHG: { collection: 'bp_compliance_gesetze', chunks: 63, qdrant_id: 'de_geschgehg' },
|
||||
DE_USTG_RET: { collection: 'bp_compliance_gesetze', chunks: 1071, qdrant_id: 'de_ustg_ret' },
|
||||
DE_URHG: { collection: 'bp_compliance_gesetze', chunks: 626, qdrant_id: 'urhg_komplett' },
|
||||
|
||||
// === DE Verbraucherschutz-Gesetze (bp_compliance_gesetze) — Phase H1 (Run #701) ===
|
||||
DE_PANGV: { collection: 'bp_compliance_gesetze', chunks: 99, qdrant_id: 'pangv' },
|
||||
DE_VSBG: { collection: 'bp_compliance_gesetze', chunks: 113, qdrant_id: 'vsbg' },
|
||||
DE_PRODHAFTG: { collection: 'bp_compliance_gesetze', chunks: 26, qdrant_id: 'prodhaftg' },
|
||||
DE_VERPACKG: { collection: 'bp_compliance_gesetze', chunks: 338, qdrant_id: 'verpackg' },
|
||||
DE_ELEKTROG: { collection: 'bp_compliance_gesetze', chunks: 344, qdrant_id: 'elektrog' },
|
||||
DE_BATTDG: { collection: 'bp_compliance_gesetze', chunks: 307, qdrant_id: 'battdg' },
|
||||
DE_BFSG: { collection: 'bp_compliance_gesetze', chunks: 221, qdrant_id: 'bfsg' },
|
||||
DE_UWG: { collection: 'bp_compliance_gesetze', chunks: 157, qdrant_id: 'uwg' },
|
||||
DE_GEWO: { collection: 'bp_compliance_gesetze', chunks: 0, qdrant_id: 'gewo' }, // Pending: Re-run noetig (Timeout)
|
||||
// BGB in Teilen (statt 2.7MB komplett)
|
||||
DE_BGB_AGB_305: { collection: 'bp_compliance_gesetze', chunks: 0, qdrant_id: 'bgb_agb' }, // §§ 305-310
|
||||
DE_BGB_FERNABSATZ: { collection: 'bp_compliance_gesetze', chunks: 0, qdrant_id: 'bgb_fernabsatz' }, // §§ 312-312k
|
||||
DE_BGB_KAUFRECHT: { collection: 'bp_compliance_gesetze', chunks: 0, qdrant_id: 'bgb_kaufrecht' }, // §§ 433-480
|
||||
DE_BGB_WIDERRUF: { collection: 'bp_compliance_gesetze', chunks: 0, qdrant_id: 'bgb_widerruf' }, // §§ 355-361
|
||||
DE_BGB_DIGITAL: { collection: 'bp_compliance_gesetze', chunks: 0, qdrant_id: 'bgb_digital' }, // §§ 327-327u
|
||||
DE_EGBGB_WIDERRUF: { collection: 'bp_compliance_gesetze', chunks: 0, qdrant_id: 'egbgb' }, // Muster-Widerrufsbelehrung
|
||||
|
||||
// === BSI Standards (bp_compliance_gesetze) ===
|
||||
'BSI-TR-03161-1': { collection: 'bp_compliance_gesetze', chunks: 138, qdrant_id: 'bsi_tr_03161_1' },
|
||||
'BSI-TR-03161-2': { collection: 'bp_compliance_gesetze', chunks: 124, qdrant_id: 'bsi_tr_03161_2' },
|
||||
'BSI-TR-03161-3': { collection: 'bp_compliance_gesetze', chunks: 121, qdrant_id: 'bsi_tr_03161_3' },
|
||||
|
||||
// === AT Gesetze (bp_compliance_gesetze) ===
|
||||
AT_DSG: { collection: 'bp_compliance_gesetze', chunks: 805, qdrant_id: 'at_dsg' },
|
||||
AT_DSG_FULL: { collection: 'bp_compliance_gesetze', chunks: 6, qdrant_id: 'at_dsg_full' },
|
||||
AT_ECG: { collection: 'bp_compliance_gesetze', chunks: 120, qdrant_id: 'at_ecg' },
|
||||
AT_TKG: { collection: 'bp_compliance_gesetze', chunks: 4348, qdrant_id: 'at_tkg' },
|
||||
AT_KSCHG: { collection: 'bp_compliance_gesetze', chunks: 402, qdrant_id: 'at_kschg' },
|
||||
AT_FAGG: { collection: 'bp_compliance_gesetze', chunks: 2, qdrant_id: 'at_fagg' },
|
||||
AT_UGB_RET: { collection: 'bp_compliance_gesetze', chunks: 2828, qdrant_id: 'at_ugb_ret' },
|
||||
AT_BAO_RET: { collection: 'bp_compliance_gesetze', chunks: 2246, qdrant_id: 'at_bao_ret' },
|
||||
AT_MEDIENG: { collection: 'bp_compliance_gesetze', chunks: 571, qdrant_id: 'at_medieng' },
|
||||
AT_ABGB_AGB: { collection: 'bp_compliance_gesetze', chunks: 2521, qdrant_id: 'at_abgb_agb' },
|
||||
AT_UWG: { collection: 'bp_compliance_gesetze', chunks: 403, qdrant_id: 'at_uwg' },
|
||||
|
||||
// === CH Gesetze (bp_compliance_gesetze) ===
|
||||
CH_DSG: { collection: 'bp_compliance_gesetze', chunks: 180, qdrant_id: 'ch_revdsg' },
|
||||
CH_DSV: { collection: 'bp_compliance_gesetze', chunks: 5, qdrant_id: 'ch_dsv' },
|
||||
CH_OR_AGB: { collection: 'bp_compliance_gesetze', chunks: 5, qdrant_id: 'ch_or_agb' },
|
||||
CH_GEBUV: { collection: 'bp_compliance_gesetze', chunks: 5, qdrant_id: 'ch_gebuv' },
|
||||
CH_ZERTES: { collection: 'bp_compliance_gesetze', chunks: 5, qdrant_id: 'ch_zertes' },
|
||||
CH_ZGB_PERS: { collection: 'bp_compliance_gesetze', chunks: 5, qdrant_id: 'ch_zgb_pers' },
|
||||
|
||||
// === Nationale Gesetze (andere EU) in bp_compliance_gesetze ===
|
||||
ES_LOPDGDD: { collection: 'bp_compliance_gesetze', chunks: 782, qdrant_id: 'es_lopdgdd' },
|
||||
IT_CODICE_PRIVACY: { collection: 'bp_compliance_gesetze', chunks: 59, qdrant_id: 'it_codice_privacy' },
|
||||
NL_UAVG: { collection: 'bp_compliance_gesetze', chunks: 523, qdrant_id: 'nl_uavg' },
|
||||
FR_CNIL_GUIDE: { collection: 'bp_compliance_gesetze', chunks: 562, qdrant_id: 'fr_loi_informatique' },
|
||||
IE_DPA_2018: { collection: 'bp_compliance_gesetze', chunks: 64, qdrant_id: 'ie_dpa_2018' },
|
||||
UK_DPA_2018: { collection: 'bp_compliance_gesetze', chunks: 156, qdrant_id: 'uk_dpa_2018' },
|
||||
UK_GDPR: { collection: 'bp_compliance_gesetze', chunks: 45, qdrant_id: 'uk_gdpr' },
|
||||
NO_PERSONOPPLYSNINGSLOVEN: { collection: 'bp_compliance_gesetze', chunks: 41, qdrant_id: 'no_pol' },
|
||||
SE_DATASKYDDSLAG: { collection: 'bp_compliance_gesetze', chunks: 56, qdrant_id: 'se_dataskyddslag' },
|
||||
PL_UODO: { collection: 'bp_compliance_gesetze', chunks: 39, qdrant_id: 'pl_ustawa' },
|
||||
CZ_ZOU: { collection: 'bp_compliance_gesetze', chunks: 238, qdrant_id: 'cz_zakon' },
|
||||
HU_INFOTV: { collection: 'bp_compliance_gesetze', chunks: 747, qdrant_id: 'hu_info_tv' },
|
||||
LU_DPA_LAW: { collection: 'bp_compliance_gesetze', chunks: 2, qdrant_id: 'lu_dpa_law' },
|
||||
|
||||
// === EDPB Guidelines (bp_compliance_datenschutz) — alt (ingest-legal-corpus.sh) ===
|
||||
EDPB_GUIDELINES_5_2020: { collection: 'bp_compliance_datenschutz', chunks: 236, qdrant_id: 'edpb_05_2020' },
|
||||
EDPB_GUIDELINES_7_2020: { collection: 'bp_compliance_datenschutz', chunks: 347, qdrant_id: 'edpb_guidelines_7_2020' },
|
||||
EDPB_GUIDELINES_1_2020: { collection: 'bp_compliance_datenschutz', chunks: 337, qdrant_id: 'edpb_01_2020' },
|
||||
EDPB_GUIDELINES_1_2022: { collection: 'bp_compliance_datenschutz', chunks: 510, qdrant_id: 'edpb_01_2022' },
|
||||
EDPB_GUIDELINES_2_2023: { collection: 'bp_compliance_datenschutz', chunks: 94, qdrant_id: 'edpb_02_2023' },
|
||||
EDPB_GUIDELINES_2_2024: { collection: 'bp_compliance_datenschutz', chunks: 79, qdrant_id: 'edpb_02_2024' },
|
||||
EDPB_GUIDELINES_4_2019: { collection: 'bp_compliance_datenschutz', chunks: 202, qdrant_id: 'edpb_04_2019' },
|
||||
EDPB_GUIDELINES_9_2022: { collection: 'bp_compliance_datenschutz', chunks: 243, qdrant_id: 'edpb_09_2022' },
|
||||
EDPB_DPIA_LIST: { collection: 'bp_compliance_datenschutz', chunks: 29, qdrant_id: 'edpb_dpia_list' },
|
||||
EDPB_LEGITIMATE_INTEREST: { collection: 'bp_compliance_datenschutz', chunks: 672, qdrant_id: 'edpb_legitimate_interest' },
|
||||
EDPS_DPIA_LIST: { collection: 'bp_compliance_datenschutz', chunks: 73, qdrant_id: 'edps_dpia_list' },
|
||||
|
||||
// === EDPB Guidelines (bp_compliance_datenschutz) — neu (edpb-crawler.py) ===
|
||||
EDPB_ACCESS_01_2022: { collection: 'bp_compliance_datenschutz', chunks: 1020, qdrant_id: 'edpb_access_01_2022' },
|
||||
EDPB_ARTICLE48_02_2024: { collection: 'bp_compliance_datenschutz', chunks: 158, qdrant_id: 'edpb_article48_02_2024' },
|
||||
EDPB_BCR_01_2022: { collection: 'bp_compliance_datenschutz', chunks: 384, qdrant_id: 'edpb_bcr_01_2022' },
|
||||
EDPB_BREACH_09_2022: { collection: 'bp_compliance_datenschutz', chunks: 486, qdrant_id: 'edpb_breach_09_2022' },
|
||||
EDPB_CERTIFICATION_01_2018: { collection: 'bp_compliance_datenschutz', chunks: 160, qdrant_id: 'edpb_certification_01_2018' },
|
||||
EDPB_CERTIFICATION_01_2019: { collection: 'bp_compliance_datenschutz', chunks: 160, qdrant_id: 'edpb_certification_01_2019' },
|
||||
EDPB_CONNECTED_VEHICLES_01_2020: { collection: 'bp_compliance_datenschutz', chunks: 482, qdrant_id: 'edpb_connected_vehicles_01_2020' },
|
||||
EDPB_CONSENT_05_2020: { collection: 'bp_compliance_datenschutz', chunks: 247, qdrant_id: 'edpb_consent_05_2020' },
|
||||
EDPB_CONTROLLER_PROCESSOR_07_2020: { collection: 'bp_compliance_datenschutz', chunks: 694, qdrant_id: 'edpb_controller_processor_07_2020' },
|
||||
EDPB_COOKIE_TASKFORCE_2023: { collection: 'bp_compliance_datenschutz', chunks: 78, qdrant_id: 'edpb_cookie_taskforce_2023' },
|
||||
EDPB_DARK_PATTERNS_03_2022: { collection: 'bp_compliance_datenschutz', chunks: 413, qdrant_id: 'edpb_dark_patterns_03_2022' },
|
||||
EDPB_DPBD_04_2019: { collection: 'bp_compliance_datenschutz', chunks: 216, qdrant_id: 'edpb_dpbd_04_2019' },
|
||||
EDPB_DPIA_LIST_RECOMMENDATION: { collection: 'bp_compliance_datenschutz', chunks: 31, qdrant_id: 'edpb_dpia_list_recommendation' },
|
||||
EDPB_EPRIVACY_02_2023: { collection: 'bp_compliance_datenschutz', chunks: 188, qdrant_id: 'edpb_eprivacy_02_2023' },
|
||||
EDPB_FACIAL_RECOGNITION_05_2022: { collection: 'bp_compliance_datenschutz', chunks: 396, qdrant_id: 'edpb_facial_recognition_05_2022' },
|
||||
EDPB_FINES_04_2022: { collection: 'bp_compliance_datenschutz', chunks: 346, qdrant_id: 'edpb_fines_04_2022' },
|
||||
EDPB_GEOLOCATION_04_2020: { collection: 'bp_compliance_datenschutz', chunks: 108, qdrant_id: 'edpb_geolocation_04_2020' },
|
||||
EDPB_GL_2_2019: { collection: 'bp_compliance_datenschutz', chunks: 107, qdrant_id: 'edpb_gl_2_2019' },
|
||||
EDPB_HEALTH_DATA_03_2020: { collection: 'bp_compliance_datenschutz', chunks: 182, qdrant_id: 'edpb_health_data_03_2020' },
|
||||
EDPB_LEGAL_BASIS_02_2019: { collection: 'bp_compliance_datenschutz', chunks: 107, qdrant_id: 'edpb_legal_basis_02_2019' },
|
||||
EDPB_LEGITIMATE_INTEREST_01_2024: { collection: 'bp_compliance_datenschutz', chunks: 336, qdrant_id: 'edpb_legitimate_interest_01_2024' },
|
||||
EDPB_RTBF_05_2019: { collection: 'bp_compliance_datenschutz', chunks: 111, qdrant_id: 'edpb_rtbf_05_2019' },
|
||||
EDPB_RRO_09_2020: { collection: 'bp_compliance_datenschutz', chunks: 82, qdrant_id: 'edpb_rro_09_2020' },
|
||||
EDPB_SOCIAL_MEDIA_08_2020: { collection: 'bp_compliance_datenschutz', chunks: 333, qdrant_id: 'edpb_social_media_08_2020' },
|
||||
EDPB_TRANSFERS_01_2020: { collection: 'bp_compliance_datenschutz', chunks: 337, qdrant_id: 'edpb_transfers_01_2020' },
|
||||
EDPB_TRANSFERS_07_2020: { collection: 'bp_compliance_datenschutz', chunks: 337, qdrant_id: 'edpb_transfers_07_2020' },
|
||||
EDPB_VIDEO_03_2019: { collection: 'bp_compliance_datenschutz', chunks: 204, qdrant_id: 'edpb_video_03_2019' },
|
||||
EDPB_VVA_02_2021: { collection: 'bp_compliance_datenschutz', chunks: 273, qdrant_id: 'edpb_vva_02_2021' },
|
||||
|
||||
// === EDPS Guidance (bp_compliance_datenschutz) ===
|
||||
EDPS_DIGITAL_ETHICS_2018: { collection: 'bp_compliance_datenschutz', chunks: 404, qdrant_id: 'edps_digital_ethics_2018' },
|
||||
EDPS_GENAI_ORIENTATIONS_2024: { collection: 'bp_compliance_datenschutz', chunks: 274, qdrant_id: 'edps_genai_orientations_2024' },
|
||||
|
||||
// === WP29 Endorsed (bp_compliance_datenschutz) ===
|
||||
WP242_PORTABILITY: { collection: 'bp_compliance_datenschutz', chunks: 141, qdrant_id: 'wp242_portability' },
|
||||
WP243_DPO: { collection: 'bp_compliance_datenschutz', chunks: 54, qdrant_id: 'wp243_dpo' },
|
||||
WP244_PROFILING: { collection: 'bp_compliance_datenschutz', chunks: 247, qdrant_id: 'wp244_profiling' },
|
||||
WP248_DPIA: { collection: 'bp_compliance_datenschutz', chunks: 288, qdrant_id: 'wp248_dpia' },
|
||||
WP250_BREACH: { collection: 'bp_compliance_datenschutz', chunks: 201, qdrant_id: 'wp250_breach' },
|
||||
WP259_CONSENT: { collection: 'bp_compliance_datenschutz', chunks: 496, qdrant_id: 'wp259_consent' },
|
||||
WP260_TRANSPARENCY: { collection: 'bp_compliance_datenschutz', chunks: 558, qdrant_id: 'wp260_transparency' },
|
||||
|
||||
// === DSFA Muss-Listen (bp_dsfa_corpus) ===
|
||||
DSFA_BFDI_BUND: { collection: 'bp_dsfa_corpus', chunks: 17, qdrant_id: 'dsfa_bfdi_bund' },
|
||||
DSFA_DSK_GEMEINSAM: { collection: 'bp_dsfa_corpus', chunks: 35, qdrant_id: 'dsfa_dsk_gemeinsam' },
|
||||
DSFA_BW: { collection: 'bp_dsfa_corpus', chunks: 41, qdrant_id: 'dsfa_bw' },
|
||||
DSFA_BY: { collection: 'bp_dsfa_corpus', chunks: 35, qdrant_id: 'dsfa_by' },
|
||||
DSFA_BE_OE: { collection: 'bp_dsfa_corpus', chunks: 31, qdrant_id: 'dsfa_be_oe' },
|
||||
DSFA_BE_NOE: { collection: 'bp_dsfa_corpus', chunks: 48, qdrant_id: 'dsfa_be_noe' },
|
||||
DSFA_BB_OE: { collection: 'bp_dsfa_corpus', chunks: 43, qdrant_id: 'dsfa_bb_oe' },
|
||||
DSFA_BB_NOE: { collection: 'bp_dsfa_corpus', chunks: 53, qdrant_id: 'dsfa_bb_noe' },
|
||||
DSFA_HB: { collection: 'bp_dsfa_corpus', chunks: 44, qdrant_id: 'dsfa_hb' },
|
||||
DSFA_HH_OE: { collection: 'bp_dsfa_corpus', chunks: 58, qdrant_id: 'dsfa_hh_oe' },
|
||||
DSFA_HH_NOE: { collection: 'bp_dsfa_corpus', chunks: 53, qdrant_id: 'dsfa_hh_noe' },
|
||||
DSFA_MV: { collection: 'bp_dsfa_corpus', chunks: 32, qdrant_id: 'dsfa_mv' },
|
||||
DSFA_NI: { collection: 'bp_dsfa_corpus', chunks: 47, qdrant_id: 'dsfa_ni' },
|
||||
DSFA_RP: { collection: 'bp_dsfa_corpus', chunks: 25, qdrant_id: 'dsfa_rp' },
|
||||
DSFA_SL: { collection: 'bp_dsfa_corpus', chunks: 35, qdrant_id: 'dsfa_sl' },
|
||||
DSFA_SN: { collection: 'bp_dsfa_corpus', chunks: 18, qdrant_id: 'dsfa_sn' },
|
||||
DSFA_ST_OE: { collection: 'bp_dsfa_corpus', chunks: 57, qdrant_id: 'dsfa_st_oe' },
|
||||
DSFA_ST_NOE: { collection: 'bp_dsfa_corpus', chunks: 35, qdrant_id: 'dsfa_st_noe' },
|
||||
DSFA_SH: { collection: 'bp_dsfa_corpus', chunks: 44, qdrant_id: 'dsfa_sh' },
|
||||
DSFA_TH: { collection: 'bp_dsfa_corpus', chunks: 48, qdrant_id: 'dsfa_th' },
|
||||
}
|
||||
|
||||
/**
|
||||
* Minimal regulation info for sidebar display.
|
||||
* Full REGULATIONS array with descriptions remains in page.tsx.
|
||||
*/
|
||||
export interface RegulationInfo {
|
||||
code: string
|
||||
name: string
|
||||
type: string
|
||||
}
|
||||
|
||||
export const REGULATION_INFO: RegulationInfo[] = [
|
||||
// EU Verordnungen
|
||||
{ code: 'GDPR', name: 'DSGVO', type: 'eu_regulation' },
|
||||
{ code: 'EPRIVACY', name: 'ePrivacy-Richtlinie', type: 'eu_directive' },
|
||||
{ code: 'SCC', name: 'Standardvertragsklauseln', type: 'eu_regulation' },
|
||||
{ code: 'SCC_FULL_TEXT', name: 'SCC Volltext', type: 'eu_regulation' },
|
||||
{ code: 'DPF', name: 'EU-US Data Privacy Framework', type: 'eu_regulation' },
|
||||
{ code: 'AIACT', name: 'EU AI Act', type: 'eu_regulation' },
|
||||
{ code: 'CRA', name: 'Cyber Resilience Act', type: 'eu_regulation' },
|
||||
{ code: 'NIS2', name: 'NIS2-Richtlinie', type: 'eu_directive' },
|
||||
{ code: 'EUCSA', name: 'EU Cybersecurity Act', type: 'eu_regulation' },
|
||||
{ code: 'DATAACT', name: 'Data Act', type: 'eu_regulation' },
|
||||
{ code: 'DGA', name: 'Data Governance Act', type: 'eu_regulation' },
|
||||
{ code: 'DSA', name: 'Digital Services Act', type: 'eu_regulation' },
|
||||
{ code: 'DMA', name: 'Digital Markets Act', type: 'eu_regulation' },
|
||||
{ code: 'EAA', name: 'European Accessibility Act', type: 'eu_directive' },
|
||||
{ code: 'DSM', name: 'DSM-Urheberrechtsrichtlinie', type: 'eu_directive' },
|
||||
{ code: 'PLD', name: 'Produkthaftungsrichtlinie', type: 'eu_directive' },
|
||||
{ code: 'GPSR', name: 'General Product Safety', type: 'eu_regulation' },
|
||||
{ code: 'WARENKAUF_RL', name: 'Warenkauf-RL', type: 'eu_directive' },
|
||||
{ code: 'KLAUSEL_RL', name: 'Klausel-RL', type: 'eu_directive' },
|
||||
{ code: 'UNLAUTERE_PRAKTIKEN_RL', name: 'UGP-RL', type: 'eu_directive' },
|
||||
{ code: 'PREISANGABEN_RL', name: 'Preisangaben-RL', type: 'eu_directive' },
|
||||
{ code: 'OMNIBUS_RL', name: 'Omnibus-RL', type: 'eu_directive' },
|
||||
{ code: 'BATTERIE_VO', name: 'Batterieverordnung', type: 'eu_regulation' },
|
||||
{ code: 'E_COMMERCE_RL', name: 'E-Commerce-Richtlinie', type: 'eu_directive' },
|
||||
{ code: 'VERBRAUCHERRECHTE_RL', name: 'Verbraucherrechte-RL', type: 'eu_directive' },
|
||||
{ code: 'DIGITALE_INHALTE_RL', name: 'Digitale-Inhalte-RL', type: 'eu_directive' },
|
||||
// Financial
|
||||
{ code: 'DORA', name: 'DORA', type: 'eu_regulation' },
|
||||
{ code: 'PSD2', name: 'PSD2', type: 'eu_directive' },
|
||||
{ code: 'AMLR', name: 'AML-Verordnung', type: 'eu_regulation' },
|
||||
{ code: 'MiCA', name: 'MiCA', type: 'eu_regulation' },
|
||||
{ code: 'EHDS', name: 'EHDS', type: 'eu_regulation' },
|
||||
{ code: 'MACHINERY_REG', name: 'Maschinenverordnung', type: 'eu_regulation' },
|
||||
{ code: 'BLUE_GUIDE', name: 'Blue Guide', type: 'eu_regulation' },
|
||||
{ code: 'EU_IFRS_DE', name: 'EU-IFRS (DE)', type: 'eu_regulation' },
|
||||
{ code: 'EU_IFRS_EN', name: 'EU-IFRS (EN)', type: 'eu_regulation' },
|
||||
// DE Gesetze
|
||||
{ code: 'TDDDG', name: 'TDDDG', type: 'de_law' },
|
||||
{ code: 'TMG_KOMPLETT', name: 'TMG', type: 'de_law' },
|
||||
{ code: 'BDSG_FULL', name: 'BDSG', type: 'de_law' },
|
||||
{ code: 'DE_DDG', name: 'DDG', type: 'de_law' },
|
||||
{ code: 'DE_BGB_AGB', name: 'BGB/AGB', type: 'de_law' },
|
||||
{ code: 'DE_EGBGB', name: 'EGBGB', type: 'de_law' },
|
||||
{ code: 'DE_HGB_RET', name: 'HGB', type: 'de_law' },
|
||||
{ code: 'DE_AO_RET', name: 'AO', type: 'de_law' },
|
||||
{ code: 'DE_TKG', name: 'TKG', type: 'de_law' },
|
||||
{ code: 'DE_DLINFOV', name: 'DL-InfoV', type: 'de_law' },
|
||||
{ code: 'DE_BETRVG', name: 'BetrVG', type: 'de_law' },
|
||||
{ code: 'DE_GESCHGEHG', name: 'GeschGehG', type: 'de_law' },
|
||||
{ code: 'DE_USTG_RET', name: 'UStG', type: 'de_law' },
|
||||
{ code: 'DE_URHG', name: 'UrhG', type: 'de_law' },
|
||||
// DE Verbraucherschutz
|
||||
{ code: 'DE_PANGV', name: 'PAngV', type: 'de_law' },
|
||||
{ code: 'DE_VSBG', name: 'VSBG', type: 'de_law' },
|
||||
{ code: 'DE_PRODHAFTG', name: 'ProdHaftG', type: 'de_law' },
|
||||
{ code: 'DE_VERPACKG', name: 'VerpackG', type: 'de_law' },
|
||||
{ code: 'DE_ELEKTROG', name: 'ElektroG', type: 'de_law' },
|
||||
{ code: 'DE_BATTDG', name: 'BattDG', type: 'de_law' },
|
||||
{ code: 'DE_BFSG', name: 'BFSG', type: 'de_law' },
|
||||
{ code: 'DE_UWG', name: 'UWG', type: 'de_law' },
|
||||
{ code: 'DE_GEWO', name: 'GewO', type: 'de_law' },
|
||||
{ code: 'DE_BGB_AGB_305', name: 'BGB AGB-Recht §§305-310', type: 'de_law' },
|
||||
{ code: 'DE_BGB_FERNABSATZ', name: 'BGB Fernabsatz §§312-312k', type: 'de_law' },
|
||||
{ code: 'DE_BGB_KAUFRECHT', name: 'BGB Kaufrecht §§433-480', type: 'de_law' },
|
||||
{ code: 'DE_BGB_WIDERRUF', name: 'BGB Widerruf §§355-361', type: 'de_law' },
|
||||
{ code: 'DE_BGB_DIGITAL', name: 'BGB Digital §§327-327u', type: 'de_law' },
|
||||
{ code: 'DE_EGBGB_WIDERRUF', name: 'EGBGB Widerrufsbelehrung', type: 'de_law' },
|
||||
// BSI
|
||||
{ code: 'BSI-TR-03161-1', name: 'BSI-TR Teil 1', type: 'bsi_standard' },
|
||||
{ code: 'BSI-TR-03161-2', name: 'BSI-TR Teil 2', type: 'bsi_standard' },
|
||||
{ code: 'BSI-TR-03161-3', name: 'BSI-TR Teil 3', type: 'bsi_standard' },
|
||||
// AT
|
||||
{ code: 'AT_DSG', name: 'DSG Oesterreich', type: 'at_law' },
|
||||
{ code: 'AT_DSG_FULL', name: 'DSG Volltext', type: 'at_law' },
|
||||
{ code: 'AT_ECG', name: 'ECG', type: 'at_law' },
|
||||
{ code: 'AT_TKG', name: 'TKG AT', type: 'at_law' },
|
||||
{ code: 'AT_KSCHG', name: 'KSchG', type: 'at_law' },
|
||||
{ code: 'AT_FAGG', name: 'FAGG', type: 'at_law' },
|
||||
{ code: 'AT_UGB_RET', name: 'UGB', type: 'at_law' },
|
||||
{ code: 'AT_BAO_RET', name: 'BAO', type: 'at_law' },
|
||||
{ code: 'AT_MEDIENG', name: 'MedienG', type: 'at_law' },
|
||||
{ code: 'AT_ABGB_AGB', name: 'ABGB/AGB', type: 'at_law' },
|
||||
{ code: 'AT_UWG', name: 'UWG AT', type: 'at_law' },
|
||||
// CH
|
||||
{ code: 'CH_DSG', name: 'DSG Schweiz', type: 'ch_law' },
|
||||
{ code: 'CH_DSV', name: 'DSV', type: 'ch_law' },
|
||||
{ code: 'CH_OR_AGB', name: 'OR/AGB', type: 'ch_law' },
|
||||
{ code: 'CH_GEBUV', name: 'GeBuV', type: 'ch_law' },
|
||||
{ code: 'CH_ZERTES', name: 'ZertES', type: 'ch_law' },
|
||||
{ code: 'CH_ZGB_PERS', name: 'ZGB', type: 'ch_law' },
|
||||
// Andere EU nationale
|
||||
{ code: 'ES_LOPDGDD', name: 'LOPDGDD Spanien', type: 'national_law' },
|
||||
{ code: 'IT_CODICE_PRIVACY', name: 'Codice Privacy Italien', type: 'national_law' },
|
||||
{ code: 'NL_UAVG', name: 'UAVG Niederlande', type: 'national_law' },
|
||||
{ code: 'FR_CNIL_GUIDE', name: 'CNIL Guide RGPD', type: 'national_law' },
|
||||
{ code: 'IE_DPA_2018', name: 'DPA 2018 Ireland', type: 'national_law' },
|
||||
{ code: 'UK_DPA_2018', name: 'DPA 2018 UK', type: 'national_law' },
|
||||
{ code: 'UK_GDPR', name: 'UK GDPR', type: 'national_law' },
|
||||
{ code: 'NO_PERSONOPPLYSNINGSLOVEN', name: 'Personopplysningsloven', type: 'national_law' },
|
||||
{ code: 'SE_DATASKYDDSLAG', name: 'Dataskyddslag Schweden', type: 'national_law' },
|
||||
{ code: 'PL_UODO', name: 'UODO Polen', type: 'national_law' },
|
||||
{ code: 'CZ_ZOU', name: 'Zakon Tschechien', type: 'national_law' },
|
||||
{ code: 'HU_INFOTV', name: 'Infotv. Ungarn', type: 'national_law' },
|
||||
{ code: 'LU_DPA_LAW', name: 'Datenschutzgesetz Luxemburg', type: 'national_law' },
|
||||
// EDPB Guidelines (alt)
|
||||
{ code: 'EDPB_GUIDELINES_5_2020', name: 'EDPB GL Einwilligung', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_GUIDELINES_7_2020', name: 'EDPB GL C/P Konzepte', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_GUIDELINES_1_2020', name: 'EDPB GL Fahrzeuge', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_GUIDELINES_1_2022', name: 'EDPB GL Bussgelder', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_GUIDELINES_2_2023', name: 'EDPB GL Art. 37 Scope', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_GUIDELINES_2_2024', name: 'EDPB GL Art. 48', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_GUIDELINES_4_2019', name: 'EDPB GL Art. 25 DPbD', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_GUIDELINES_9_2022', name: 'EDPB GL Datenschutzverletzung', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_DPIA_LIST', name: 'EDPB DPIA-Liste', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_LEGITIMATE_INTEREST', name: 'EDPB Berecht. Interesse', type: 'eu_guideline' },
|
||||
{ code: 'EDPS_DPIA_LIST', name: 'EDPS DPIA-Liste', type: 'eu_guideline' },
|
||||
// EDPB Guidelines (neu — Crawler)
|
||||
{ code: 'EDPB_ACCESS_01_2022', name: 'EDPB GL Auskunftsrecht', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_ARTICLE48_02_2024', name: 'EDPB GL Art. 48', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_BCR_01_2022', name: 'EDPB GL BCR', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_BREACH_09_2022', name: 'EDPB GL Datenpannen', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_CERTIFICATION_01_2018', name: 'EDPB GL Zertifizierung', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_CERTIFICATION_01_2019', name: 'EDPB GL Zertifizierung 2019', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_CONNECTED_VEHICLES_01_2020', name: 'EDPB GL Vernetzte Fahrzeuge', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_CONSENT_05_2020', name: 'EDPB GL Consent', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_CONTROLLER_PROCESSOR_07_2020', name: 'EDPB GL Verantwortliche/Auftragsverarbeiter', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_COOKIE_TASKFORCE_2023', name: 'EDPB Cookie-Banner Taskforce', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_DARK_PATTERNS_03_2022', name: 'EDPB GL Dark Patterns', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_DPBD_04_2019', name: 'EDPB GL Data Protection by Design', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_DPIA_LIST_RECOMMENDATION', name: 'EDPB DPIA-Empfehlung', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_EPRIVACY_02_2023', name: 'EDPB GL ePrivacy', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_FACIAL_RECOGNITION_05_2022', name: 'EDPB GL Gesichtserkennung', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_FINES_04_2022', name: 'EDPB GL Bussgeldberechnung', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_GEOLOCATION_04_2020', name: 'EDPB GL Geolokalisierung', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_GL_2_2019', name: 'EDPB GL Video-Ueberwachung', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_HEALTH_DATA_03_2020', name: 'EDPB GL Gesundheitsdaten', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_LEGAL_BASIS_02_2019', name: 'EDPB GL Rechtsgrundlage Art. 6(1)(b)', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_LEGITIMATE_INTEREST_01_2024', name: 'EDPB GL Berecht. Interesse 2024', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_RTBF_05_2019', name: 'EDPB GL Recht auf Vergessenwerden', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_RRO_09_2020', name: 'EDPB GL Relevant & Reasoned Objection', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_SOCIAL_MEDIA_08_2020', name: 'EDPB GL Social Media Targeting', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_TRANSFERS_01_2020', name: 'EDPB GL Uebermittlungen Art. 49', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_TRANSFERS_07_2020', name: 'EDPB GL Drittlandtransfers', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_VIDEO_03_2019', name: 'EDPB GL Videoueberwachung', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_VVA_02_2021', name: 'EDPB GL Virtuelle Sprachassistenten', type: 'eu_guideline' },
|
||||
// EDPS
|
||||
{ code: 'EDPS_DIGITAL_ETHICS_2018', name: 'EDPS Digitale Ethik', type: 'eu_guideline' },
|
||||
{ code: 'EDPS_GENAI_ORIENTATIONS_2024', name: 'EDPS GenAI Orientierungen', type: 'eu_guideline' },
|
||||
// WP29 Endorsed
|
||||
{ code: 'WP242_PORTABILITY', name: 'WP242 Datenportabilitaet', type: 'wp29_endorsed' },
|
||||
{ code: 'WP243_DPO', name: 'WP243 Datenschutzbeauftragter', type: 'wp29_endorsed' },
|
||||
{ code: 'WP244_PROFILING', name: 'WP244 Profiling', type: 'wp29_endorsed' },
|
||||
{ code: 'WP248_DPIA', name: 'WP248 DSFA', type: 'wp29_endorsed' },
|
||||
{ code: 'WP250_BREACH', name: 'WP250 Datenpannen', type: 'wp29_endorsed' },
|
||||
{ code: 'WP259_CONSENT', name: 'WP259 Einwilligung', type: 'wp29_endorsed' },
|
||||
{ code: 'WP260_TRANSPARENCY', name: 'WP260 Transparenz', type: 'wp29_endorsed' },
|
||||
// DSFA Muss-Listen
|
||||
{ code: 'DSFA_BFDI_BUND', name: 'DSFA BfDI Bund', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_DSK_GEMEINSAM', name: 'DSFA DSK Gemeinsam', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_BW', name: 'DSFA Baden-Wuerttemberg', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_BY', name: 'DSFA Bayern', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_BE_OE', name: 'DSFA Berlin oeffentlich', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_BE_NOE', name: 'DSFA Berlin nicht-oeffentlich', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_BB_OE', name: 'DSFA Brandenburg oeffentlich', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_BB_NOE', name: 'DSFA Brandenburg nicht-oeffentlich', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_HB', name: 'DSFA Bremen', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_HH_OE', name: 'DSFA Hamburg oeffentlich', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_HH_NOE', name: 'DSFA Hamburg nicht-oeffentlich', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_MV', name: 'DSFA Mecklenburg-Vorpommern', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_NI', name: 'DSFA Niedersachsen', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_RP', name: 'DSFA Rheinland-Pfalz', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_SL', name: 'DSFA Saarland', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_SN', name: 'DSFA Sachsen', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_ST_OE', name: 'DSFA Sachsen-Anhalt oeffentlich', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_ST_NOE', name: 'DSFA Sachsen-Anhalt nicht-oeffentlich', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_SH', name: 'DSFA Schleswig-Holstein', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_TH', name: 'DSFA Thueringen', type: 'dsfa_mussliste' },
|
||||
// International Standards
|
||||
{ code: 'NIST_SSDF', name: 'NIST SSDF', type: 'international_standard' },
|
||||
{ code: 'NIST_CSF_2', name: 'NIST CSF 2.0', type: 'international_standard' },
|
||||
{ code: 'OECD_AI_PRINCIPLES', name: 'OECD AI Principles', type: 'international_standard' },
|
||||
{ code: 'ENISA_SECURE_BY_DESIGN', name: 'CISA Secure by Design', type: 'international_standard' },
|
||||
{ code: 'ENISA_SUPPLY_CHAIN', name: 'ENISA Supply Chain', type: 'international_standard' },
|
||||
{ code: 'ENISA_THREAT_LANDSCAPE', name: 'ENISA Threat Landscape', type: 'international_standard' },
|
||||
{ code: 'ENISA_ICS_SCADA', name: 'ENISA ICS/SCADA', type: 'international_standard' },
|
||||
{ code: 'ENISA_CYBERSECURITY_2024', name: 'ENISA Cybersecurity 2024', type: 'international_standard' },
|
||||
]
|
||||
@@ -1430,6 +1430,7 @@ export default function TestQualityPage() {
|
||||
databases: ['Qdrant', 'PostgreSQL'],
|
||||
}}
|
||||
relatedPages={[
|
||||
{ name: 'LLM Vergleich', href: '/ai/llm-compare', description: 'Provider-Vergleich' },
|
||||
{ name: 'GPU Infrastruktur', href: '/ai/gpu', description: 'GPU-Ressourcen verwalten' },
|
||||
{ name: 'RAG Management', href: '/ai/rag', description: 'Training Data & RAG Pipelines' },
|
||||
]}
|
||||
|
||||
@@ -141,6 +141,7 @@ export default function VoiceMatrixPage() {
|
||||
}}
|
||||
relatedPages={[
|
||||
{ name: 'Matrix & Jitsi', href: '/communication/matrix', description: 'Kommunikation Monitoring' },
|
||||
{ name: 'LLM Vergleich', href: '/ai/llm-compare', description: 'KI-Provider vergleichen' },
|
||||
{ name: 'GPU Infrastruktur', href: '/infrastructure/gpu', description: 'GPU fuer Voice-Service' },
|
||||
]}
|
||||
collapsible={true}
|
||||
|
||||
@@ -24,6 +24,7 @@ export default function DevelopmentPage() {
|
||||
}}
|
||||
relatedPages={[
|
||||
{ name: 'GPU Infrastruktur', href: '/infrastructure/gpu', description: 'GPU fuer Voice/Game' },
|
||||
{ name: 'LLM Vergleich', href: '/ai/llm-compare', description: 'LLM fuer Voice/Game' },
|
||||
]}
|
||||
collapsible={true}
|
||||
defaultCollapsed={false}
|
||||
|
||||
@@ -149,6 +149,7 @@ const ADMIN_SCREENS: ScreenDefinition[] = [
|
||||
{ id: 'admin-obligations', name: 'Pflichten', description: 'NIS2, DSGVO, AI Act', category: 'sdk', icon: '⚡', url: '/sdk/obligations' },
|
||||
|
||||
// === KI & AUTOMATISIERUNG (Teal #14b8a6) ===
|
||||
{ id: 'admin-llm-compare', name: 'LLM Vergleich', description: 'KI-Provider Vergleich', category: 'ai', icon: '🤖', url: '/ai/llm-compare' },
|
||||
{ id: 'admin-rag', name: 'Daten & RAG', description: 'Training Data & RAG', category: 'ai', icon: '🗄️', url: '/ai/rag' },
|
||||
{ id: 'admin-ocr-labeling', name: 'OCR-Labeling', description: 'Handschrift-Training', category: 'ai', icon: '✍️', url: '/ai/ocr-labeling' },
|
||||
{ id: 'admin-magic-help', name: 'Magic Help', description: 'TrOCR Handschrift-OCR', category: 'ai', icon: '🪄', url: '/ai/magic-help' },
|
||||
@@ -195,6 +196,7 @@ const ADMIN_CONNECTIONS: ConnectionDef[] = [
|
||||
{ source: 'admin-dashboard', target: 'admin-backlog', label: 'Go-Live' },
|
||||
{ source: 'admin-dashboard', target: 'admin-compliance-hub', label: 'Compliance' },
|
||||
{ source: 'admin-onboarding', target: 'admin-consent' },
|
||||
{ source: 'admin-onboarding', target: 'admin-llm-compare' },
|
||||
{ source: 'admin-rbac', target: 'admin-consent' },
|
||||
|
||||
// === DSGVO FLOW ===
|
||||
@@ -222,6 +224,7 @@ const ADMIN_CONNECTIONS: ConnectionDef[] = [
|
||||
{ source: 'admin-dsms', target: 'admin-compliance-workflow' },
|
||||
|
||||
// === KI & AUTOMATISIERUNG FLOW ===
|
||||
{ source: 'admin-llm-compare', target: 'admin-rag', label: 'Daten' },
|
||||
{ source: 'admin-rag', target: 'admin-quality' },
|
||||
{ source: 'admin-rag', target: 'admin-agents' },
|
||||
{ source: 'admin-ocr-labeling', target: 'admin-magic-help', label: 'Training' },
|
||||
|
||||
665
admin-lehrer/app/(admin)/development/workflow/page.tsx
Normal file
665
admin-lehrer/app/(admin)/development/workflow/page.tsx
Normal file
@@ -0,0 +1,665 @@
|
||||
'use client'
|
||||
|
||||
import { useState, useEffect } from 'react'
|
||||
import {
|
||||
GitBranch,
|
||||
Terminal,
|
||||
Server,
|
||||
Database,
|
||||
CheckCircle2,
|
||||
ArrowRight,
|
||||
Laptop,
|
||||
HardDrive,
|
||||
RefreshCw,
|
||||
Clock,
|
||||
Shield,
|
||||
Users,
|
||||
FileCode,
|
||||
Play,
|
||||
Eye,
|
||||
Download,
|
||||
AlertTriangle,
|
||||
Info,
|
||||
Container
|
||||
} from 'lucide-react'
|
||||
|
||||
interface WorkflowStep {
|
||||
id: number
|
||||
title: string
|
||||
description: string
|
||||
command?: string
|
||||
icon: React.ReactNode
|
||||
location: 'macbook' | 'macmini'
|
||||
}
|
||||
|
||||
interface BackupInfo {
|
||||
lastRun: string | null
|
||||
nextRun: string
|
||||
status: 'ok' | 'warning' | 'error'
|
||||
}
|
||||
|
||||
export default function WorkflowPage() {
|
||||
const [activeStep, setActiveStep] = useState<number>(1)
|
||||
const [backupInfo, setBackupInfo] = useState<BackupInfo>({
|
||||
lastRun: null,
|
||||
nextRun: '02:00 Uhr',
|
||||
status: 'ok'
|
||||
})
|
||||
|
||||
const workflowSteps: WorkflowStep[] = [
|
||||
{
|
||||
id: 1,
|
||||
title: 'Code bearbeiten',
|
||||
description: 'Arbeite mit Claude Code im Terminal. Beschreibe was du brauchst und Claude schreibt den Code.',
|
||||
command: 'claude',
|
||||
icon: <Terminal className="h-6 w-6" />,
|
||||
location: 'macbook'
|
||||
},
|
||||
{
|
||||
id: 2,
|
||||
title: 'Änderungen stagen',
|
||||
description: 'Füge die geänderten Dateien zum nächsten Commit hinzu.',
|
||||
command: 'git add <dateien>',
|
||||
icon: <FileCode className="h-6 w-6" />,
|
||||
location: 'macbook'
|
||||
},
|
||||
{
|
||||
id: 3,
|
||||
title: 'Commit erstellen',
|
||||
description: 'Erstelle einen Commit mit einer aussagekräftigen Nachricht.',
|
||||
command: 'git commit -m "feat: neue Funktion"',
|
||||
icon: <GitBranch className="h-6 w-6" />,
|
||||
location: 'macbook'
|
||||
},
|
||||
{
|
||||
id: 4,
|
||||
title: 'Push zum Server',
|
||||
description: 'Sende die Änderungen an den Mac Mini. Dies startet automatisch die CI/CD Pipeline.',
|
||||
command: 'git push origin main',
|
||||
icon: <ArrowRight className="h-6 w-6" />,
|
||||
location: 'macbook'
|
||||
},
|
||||
{
|
||||
id: 5,
|
||||
title: 'CI/CD Pipeline',
|
||||
description: 'Woodpecker führt automatisch Tests aus und baut die Container.',
|
||||
command: '(automatisch)',
|
||||
icon: <RefreshCw className="h-6 w-6" />,
|
||||
location: 'macmini'
|
||||
},
|
||||
{
|
||||
id: 6,
|
||||
title: 'Integration Tests',
|
||||
description: 'Docker Compose Test-Umgebung mit Backend, DB und Consent-Service fuer vollstaendige E2E-Tests.',
|
||||
command: 'docker compose -f docker-compose.test.yml up -d',
|
||||
icon: <Container className="h-6 w-6" />,
|
||||
location: 'macmini'
|
||||
},
|
||||
{
|
||||
id: 7,
|
||||
title: 'Frontend testen',
|
||||
description: 'Teste die Änderungen im Browser auf dem Mac Mini.',
|
||||
command: 'http://macmini:3000',
|
||||
icon: <Eye className="h-6 w-6" />,
|
||||
location: 'macbook'
|
||||
}
|
||||
]
|
||||
|
||||
const services = [
|
||||
{ name: 'Website', url: 'http://macmini:3000', port: 3000, status: 'running' },
|
||||
{ name: 'Admin v2', url: 'http://macmini:3002', port: 3002, status: 'running' },
|
||||
{ name: 'Studio v2', url: 'http://macmini:3001', port: 3001, status: 'running' },
|
||||
{ name: 'Backend', url: 'http://macmini:8000', port: 8000, status: 'running' },
|
||||
{ name: 'Gitea', url: 'http://macmini:3003', port: 3003, status: 'running' },
|
||||
{ name: 'Klausur-Service', url: 'http://macmini:8086', port: 8086, status: 'running' },
|
||||
]
|
||||
|
||||
const commitTypes = [
|
||||
{ type: 'feat:', description: 'Neue Funktion', example: 'feat: add user login' },
|
||||
{ type: 'fix:', description: 'Bugfix', example: 'fix: resolve login timeout' },
|
||||
{ type: 'docs:', description: 'Dokumentation', example: 'docs: update API docs' },
|
||||
{ type: 'style:', description: 'Formatierung', example: 'style: fix indentation' },
|
||||
{ type: 'refactor:', description: 'Code-Umbau', example: 'refactor: extract helper' },
|
||||
{ type: 'test:', description: 'Tests', example: 'test: add unit tests' },
|
||||
{ type: 'chore:', description: 'Wartung', example: 'chore: update deps' },
|
||||
]
|
||||
|
||||
return (
|
||||
<div className="space-y-8">
|
||||
{/* Header */}
|
||||
<div className="bg-gradient-to-r from-indigo-600 to-purple-600 rounded-2xl p-8 text-white">
|
||||
<h1 className="text-3xl font-bold mb-2">Entwicklungs-Workflow</h1>
|
||||
<p className="text-indigo-100">
|
||||
Wie wir bei BreakPilot entwickeln - von der Idee bis zum Deployment
|
||||
</p>
|
||||
</div>
|
||||
|
||||
{/* Architecture Overview */}
|
||||
<div className="bg-white rounded-xl border border-slate-200 p-6">
|
||||
<h2 className="text-xl font-semibold text-slate-900 mb-4 flex items-center gap-2">
|
||||
<Server className="h-5 w-5 text-indigo-600" />
|
||||
Systemarchitektur
|
||||
</h2>
|
||||
|
||||
<div className="grid grid-cols-1 lg:grid-cols-2 gap-6">
|
||||
{/* MacBook */}
|
||||
<div className="bg-slate-50 rounded-xl p-5 border-2 border-slate-200">
|
||||
<div className="flex items-center gap-3 mb-4">
|
||||
<div className="p-2 bg-blue-100 rounded-lg">
|
||||
<Laptop className="h-6 w-6 text-blue-600" />
|
||||
</div>
|
||||
<div>
|
||||
<h3 className="font-semibold text-slate-900">MacBook (Entwicklung)</h3>
|
||||
<p className="text-sm text-slate-500">Dein Arbeitsplatz</p>
|
||||
</div>
|
||||
</div>
|
||||
<ul className="space-y-2 text-sm">
|
||||
<li className="flex items-center gap-2">
|
||||
<CheckCircle2 className="h-4 w-4 text-green-500" />
|
||||
<span>Terminal + Claude Code</span>
|
||||
</li>
|
||||
<li className="flex items-center gap-2">
|
||||
<CheckCircle2 className="h-4 w-4 text-green-500" />
|
||||
<span>Lokales Git Repository</span>
|
||||
</li>
|
||||
<li className="flex items-center gap-2">
|
||||
<CheckCircle2 className="h-4 w-4 text-green-500" />
|
||||
<span>Browser für Frontend-Tests</span>
|
||||
</li>
|
||||
<li className="flex items-center gap-2">
|
||||
<AlertTriangle className="h-4 w-4 text-amber-500" />
|
||||
<span>Backup manuell (MacBook nachts aus)</span>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
{/* Mac Mini */}
|
||||
<div className="bg-slate-50 rounded-xl p-5 border-2 border-indigo-200">
|
||||
<div className="flex items-center gap-3 mb-4">
|
||||
<div className="p-2 bg-indigo-100 rounded-lg">
|
||||
<HardDrive className="h-6 w-6 text-indigo-600" />
|
||||
</div>
|
||||
<div>
|
||||
<h3 className="font-semibold text-slate-900">Mac Mini (Server)</h3>
|
||||
<p className="text-sm text-slate-500">192.168.178.100</p>
|
||||
</div>
|
||||
</div>
|
||||
<ul className="space-y-2 text-sm">
|
||||
<li className="flex items-center gap-2">
|
||||
<CheckCircle2 className="h-4 w-4 text-green-500" />
|
||||
<span>Gitea (Git Server)</span>
|
||||
</li>
|
||||
<li className="flex items-center gap-2">
|
||||
<CheckCircle2 className="h-4 w-4 text-green-500" />
|
||||
<span>Woodpecker (CI/CD)</span>
|
||||
</li>
|
||||
<li className="flex items-center gap-2">
|
||||
<CheckCircle2 className="h-4 w-4 text-green-500" />
|
||||
<span>Docker Container (alle Services)</span>
|
||||
</li>
|
||||
<li className="flex items-center gap-2">
|
||||
<CheckCircle2 className="h-4 w-4 text-green-500" />
|
||||
<span>PostgreSQL Datenbank</span>
|
||||
</li>
|
||||
<li className="flex items-center gap-2">
|
||||
<CheckCircle2 className="h-4 w-4 text-green-500" />
|
||||
<span>Automatisches Backup (02:00 Uhr lokal)</span>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Workflow Steps */}
|
||||
<div className="bg-white rounded-xl border border-slate-200 p-6">
|
||||
<h2 className="text-xl font-semibold text-slate-900 mb-6 flex items-center gap-2">
|
||||
<Play className="h-5 w-5 text-indigo-600" />
|
||||
Entwicklungs-Schritte
|
||||
</h2>
|
||||
|
||||
<div className="space-y-4">
|
||||
{workflowSteps.map((step, index) => (
|
||||
<div
|
||||
key={step.id}
|
||||
className={`relative flex items-start gap-4 p-4 rounded-xl transition-all cursor-pointer ${
|
||||
activeStep === step.id
|
||||
? 'bg-indigo-50 border-2 border-indigo-300'
|
||||
: 'bg-slate-50 border-2 border-transparent hover:border-slate-200'
|
||||
}`}
|
||||
onClick={() => setActiveStep(step.id)}
|
||||
>
|
||||
{/* Step Number */}
|
||||
<div className={`flex-shrink-0 w-10 h-10 rounded-full flex items-center justify-center font-bold ${
|
||||
activeStep === step.id
|
||||
? 'bg-indigo-600 text-white'
|
||||
: 'bg-slate-200 text-slate-600'
|
||||
}`}>
|
||||
{step.id}
|
||||
</div>
|
||||
|
||||
{/* Content */}
|
||||
<div className="flex-grow">
|
||||
<div className="flex items-center gap-2 mb-1">
|
||||
<h3 className="font-semibold text-slate-900">{step.title}</h3>
|
||||
<span className={`text-xs px-2 py-0.5 rounded-full ${
|
||||
step.location === 'macbook'
|
||||
? 'bg-blue-100 text-blue-700'
|
||||
: 'bg-purple-100 text-purple-700'
|
||||
}`}>
|
||||
{step.location === 'macbook' ? 'MacBook' : 'Mac Mini'}
|
||||
</span>
|
||||
</div>
|
||||
<p className="text-sm text-slate-600 mb-2">{step.description}</p>
|
||||
{step.command && (
|
||||
<code className="text-xs bg-slate-800 text-green-400 px-3 py-1.5 rounded-lg font-mono">
|
||||
{step.command}
|
||||
</code>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Icon */}
|
||||
<div className={`flex-shrink-0 p-2 rounded-lg ${
|
||||
activeStep === step.id ? 'bg-indigo-100 text-indigo-600' : 'bg-slate-100 text-slate-400'
|
||||
}`}>
|
||||
{step.icon}
|
||||
</div>
|
||||
|
||||
{/* Connector Line */}
|
||||
{index < workflowSteps.length - 1 && (
|
||||
<div className="absolute left-9 top-14 w-0.5 h-8 bg-slate-200" />
|
||||
)}
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Services & URLs */}
|
||||
<div className="bg-white rounded-xl border border-slate-200 p-6">
|
||||
<h2 className="text-xl font-semibold text-slate-900 mb-4 flex items-center gap-2">
|
||||
<Eye className="h-5 w-5 text-indigo-600" />
|
||||
Services & URLs zum Testen
|
||||
</h2>
|
||||
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-3">
|
||||
{services.map((service) => (
|
||||
<a
|
||||
key={service.name}
|
||||
href={service.url}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="flex items-center justify-between p-4 bg-slate-50 rounded-lg hover:bg-slate-100 transition-colors border border-slate-200"
|
||||
>
|
||||
<div>
|
||||
<h3 className="font-medium text-slate-900">{service.name}</h3>
|
||||
<p className="text-sm text-slate-500">Port {service.port}</p>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="w-2 h-2 bg-green-500 rounded-full animate-pulse" />
|
||||
<ArrowRight className="h-4 w-4 text-slate-400" />
|
||||
</div>
|
||||
</a>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Commit Convention */}
|
||||
<div className="bg-white rounded-xl border border-slate-200 p-6">
|
||||
<h2 className="text-xl font-semibold text-slate-900 mb-4 flex items-center gap-2">
|
||||
<GitBranch className="h-5 w-5 text-indigo-600" />
|
||||
Commit-Konventionen
|
||||
</h2>
|
||||
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 xl:grid-cols-4 gap-3">
|
||||
{commitTypes.map((item) => (
|
||||
<div key={item.type} className="bg-slate-50 rounded-lg p-3 border border-slate-200">
|
||||
<code className="text-sm font-bold text-indigo-600">{item.type}</code>
|
||||
<p className="text-sm text-slate-600 mt-1">{item.description}</p>
|
||||
<p className="text-xs text-slate-400 mt-1 font-mono">{item.example}</p>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Backup Info */}
|
||||
<div className="bg-white rounded-xl border border-slate-200 p-6">
|
||||
<h2 className="text-xl font-semibold text-slate-900 mb-4 flex items-center gap-2">
|
||||
<Shield className="h-5 w-5 text-indigo-600" />
|
||||
Backup & Sicherheit
|
||||
</h2>
|
||||
|
||||
<div className="grid grid-cols-1 md:grid-cols-3 gap-6">
|
||||
{/* Mac Mini - Automatisches lokales Backup */}
|
||||
<div className="bg-green-50 rounded-xl p-5 border border-green-200">
|
||||
<div className="flex items-center gap-3 mb-3">
|
||||
<Clock className="h-5 w-5 text-green-600" />
|
||||
<h3 className="font-semibold text-green-900">Mac Mini (Auto)</h3>
|
||||
</div>
|
||||
<ul className="space-y-2 text-sm text-green-800">
|
||||
<li>• Automatisch um 02:00 Uhr</li>
|
||||
<li>• PostgreSQL-Dump lokal</li>
|
||||
<li>• Git Repository gesichert</li>
|
||||
<li>• 7 Tage Aufbewahrung</li>
|
||||
</ul>
|
||||
<div className="mt-4 p-3 bg-green-100 rounded-lg">
|
||||
<code className="text-xs text-green-700 font-mono">
|
||||
~/Projekte/backup-logs/
|
||||
</code>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* MacBook - Manuelles Backup */}
|
||||
<div className="bg-amber-50 rounded-xl p-5 border border-amber-200">
|
||||
<div className="flex items-center gap-3 mb-3">
|
||||
<AlertTriangle className="h-5 w-5 text-amber-600" />
|
||||
<h3 className="font-semibold text-amber-900">MacBook (Manuell)</h3>
|
||||
</div>
|
||||
<ul className="space-y-2 text-sm text-amber-800">
|
||||
<li>• MacBook nachts aus (02:00)</li>
|
||||
<li>• Keine Auto-Synchronisation</li>
|
||||
<li>• Backup manuell anstoßen</li>
|
||||
</ul>
|
||||
<div className="mt-4 p-3 bg-amber-100 rounded-lg">
|
||||
<code className="text-xs text-amber-700 font-mono">
|
||||
rsync -avz macmini:~/Projekte/ ~/Projekte/
|
||||
</code>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Manuelles Backup starten */}
|
||||
<div className="bg-blue-50 rounded-xl p-5 border border-blue-200">
|
||||
<div className="flex items-center gap-3 mb-3">
|
||||
<Download className="h-5 w-5 text-blue-600" />
|
||||
<h3 className="font-semibold text-blue-900">Backup Script</h3>
|
||||
</div>
|
||||
<p className="text-sm text-blue-800 mb-3">
|
||||
Backup jederzeit manuell starten:
|
||||
</p>
|
||||
<code className="block text-xs bg-slate-800 text-green-400 p-3 rounded-lg font-mono">
|
||||
~/Projekte/breakpilot-pwa/scripts/daily-backup.sh
|
||||
</code>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Quick Commands */}
|
||||
<div className="bg-slate-800 rounded-xl p-6 text-white">
|
||||
<h2 className="text-xl font-semibold mb-4 flex items-center gap-2">
|
||||
<Terminal className="h-5 w-5 text-green-400" />
|
||||
Wichtige Befehle
|
||||
</h2>
|
||||
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 gap-4 font-mono text-sm">
|
||||
<div className="bg-slate-900 rounded-lg p-4">
|
||||
<p className="text-slate-400 mb-2"># CI/CD Logs ansehen</p>
|
||||
<code className="text-green-400">ssh macmini "docker logs breakpilot-pwa-backend --tail 50"</code>
|
||||
</div>
|
||||
<div className="bg-slate-900 rounded-lg p-4">
|
||||
<p className="text-slate-400 mb-2"># Container neu starten</p>
|
||||
<code className="text-green-400">ssh macmini "docker compose restart backend"</code>
|
||||
</div>
|
||||
<div className="bg-slate-900 rounded-lg p-4">
|
||||
<p className="text-slate-400 mb-2"># Alle Container Status</p>
|
||||
<code className="text-green-400">ssh macmini "docker ps"</code>
|
||||
</div>
|
||||
<div className="bg-slate-900 rounded-lg p-4">
|
||||
<p className="text-slate-400 mb-2"># Pipeline Status (Gitea)</p>
|
||||
<code className="text-green-400">open http://macmini:3003</code>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Team Workflow with Feature Branches */}
|
||||
<div className="bg-indigo-50 rounded-xl border border-indigo-200 p-6">
|
||||
<h2 className="text-xl font-semibold text-indigo-900 mb-4 flex items-center gap-2">
|
||||
<GitBranch className="h-5 w-5 text-indigo-600" />
|
||||
Team-Workflow (3+ Entwickler)
|
||||
</h2>
|
||||
|
||||
<div className="bg-white rounded-xl p-5 mb-4">
|
||||
<h3 className="font-semibold text-slate-900 mb-3">Feature Branch Workflow</h3>
|
||||
<div className="flex flex-wrap items-center gap-2 text-sm">
|
||||
<code className="bg-slate-100 px-2 py-1 rounded">main</code>
|
||||
<ArrowRight className="h-4 w-4 text-slate-400" />
|
||||
<code className="bg-blue-100 text-blue-700 px-2 py-1 rounded">feature/neue-funktion</code>
|
||||
<ArrowRight className="h-4 w-4 text-slate-400" />
|
||||
<span className="text-slate-600">Entwicklung</span>
|
||||
<ArrowRight className="h-4 w-4 text-slate-400" />
|
||||
<span className="bg-purple-100 text-purple-700 px-2 py-1 rounded">Pull Request</span>
|
||||
<ArrowRight className="h-4 w-4 text-slate-400" />
|
||||
<span className="bg-green-100 text-green-700 px-2 py-1 rounded">Code Review</span>
|
||||
<ArrowRight className="h-4 w-4 text-slate-400" />
|
||||
<code className="bg-slate-100 px-2 py-1 rounded">main</code>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 gap-4">
|
||||
<div className="bg-white rounded-lg p-4 border border-indigo-100">
|
||||
<h4 className="font-medium text-slate-900 mb-2">1. Feature Branch erstellen</h4>
|
||||
<code className="block text-xs bg-slate-800 text-green-400 p-2 rounded font-mono">
|
||||
git checkout -b feature/mein-feature
|
||||
</code>
|
||||
</div>
|
||||
<div className="bg-white rounded-lg p-4 border border-indigo-100">
|
||||
<h4 className="font-medium text-slate-900 mb-2">2. Änderungen committen</h4>
|
||||
<code className="block text-xs bg-slate-800 text-green-400 p-2 rounded font-mono">
|
||||
git commit -m "feat: beschreibung"
|
||||
</code>
|
||||
</div>
|
||||
<div className="bg-white rounded-lg p-4 border border-indigo-100">
|
||||
<h4 className="font-medium text-slate-900 mb-2">3. Branch pushen</h4>
|
||||
<code className="block text-xs bg-slate-800 text-green-400 p-2 rounded font-mono">
|
||||
git push -u origin feature/mein-feature
|
||||
</code>
|
||||
</div>
|
||||
<div className="bg-white rounded-lg p-4 border border-indigo-100">
|
||||
<h4 className="font-medium text-slate-900 mb-2">4. Pull Request in Gitea</h4>
|
||||
<code className="block text-xs bg-slate-800 text-green-400 p-2 rounded font-mono">
|
||||
http://macmini:3003 → Pull Request
|
||||
</code>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="mt-4 p-4 bg-indigo-100 rounded-lg">
|
||||
<h4 className="font-medium text-indigo-900 mb-2">Branch-Namenskonvention</h4>
|
||||
<div className="grid grid-cols-2 md:grid-cols-4 gap-2 text-sm">
|
||||
<div><code className="text-indigo-700">feature/</code> Neue Funktion</div>
|
||||
<div><code className="text-indigo-700">fix/</code> Bugfix</div>
|
||||
<div><code className="text-indigo-700">hotfix/</code> Dringender Fix</div>
|
||||
<div><code className="text-indigo-700">refactor/</code> Code-Umbau</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Team Rules */}
|
||||
<div className="bg-amber-50 rounded-xl border border-amber-200 p-6">
|
||||
<h2 className="text-xl font-semibold text-amber-900 mb-4 flex items-center gap-2">
|
||||
<Users className="h-5 w-5 text-amber-600" />
|
||||
Team-Regeln
|
||||
</h2>
|
||||
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 gap-4">
|
||||
<div className="flex items-start gap-3">
|
||||
<CheckCircle2 className="h-5 w-5 text-green-600 flex-shrink-0 mt-0.5" />
|
||||
<div>
|
||||
<h3 className="font-medium text-slate-900">Feature Branches nutzen</h3>
|
||||
<p className="text-sm text-slate-600">Nie direkt auf main pushen - immer über Pull Request</p>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-start gap-3">
|
||||
<CheckCircle2 className="h-5 w-5 text-green-600 flex-shrink-0 mt-0.5" />
|
||||
<div>
|
||||
<h3 className="font-medium text-slate-900">Code Review erforderlich</h3>
|
||||
<p className="text-sm text-slate-600">Mindestens 1 Approval vor dem Merge</p>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-start gap-3">
|
||||
<CheckCircle2 className="h-5 w-5 text-green-600 flex-shrink-0 mt-0.5" />
|
||||
<div>
|
||||
<h3 className="font-medium text-slate-900">Tests müssen grün sein</h3>
|
||||
<p className="text-sm text-slate-600">CI/CD Pipeline muss erfolgreich durchlaufen</p>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-start gap-3">
|
||||
<CheckCircle2 className="h-5 w-5 text-green-600 flex-shrink-0 mt-0.5" />
|
||||
<div>
|
||||
<h3 className="font-medium text-slate-900">Aussagekräftige Commits</h3>
|
||||
<p className="text-sm text-slate-600">Nutze Conventional Commits (feat:, fix:, etc.)</p>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-start gap-3">
|
||||
<CheckCircle2 className="h-5 w-5 text-green-600 flex-shrink-0 mt-0.5" />
|
||||
<div>
|
||||
<h3 className="font-medium text-slate-900">Branch aktuell halten</h3>
|
||||
<p className="text-sm text-slate-600">Regelmäßig main in deinen Branch mergen</p>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-start gap-3">
|
||||
<AlertTriangle className="h-5 w-5 text-amber-600 flex-shrink-0 mt-0.5" />
|
||||
<div>
|
||||
<h3 className="font-medium text-slate-900">Nie Force-Push auf main</h3>
|
||||
<p className="text-sm text-slate-600">Geschichte von main nie überschreiben</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* CI/CD Infrastruktur - Automatisierte OAuth Integration */}
|
||||
<div className="bg-white rounded-xl border border-slate-200 p-6">
|
||||
<h2 className="text-xl font-semibold text-slate-900 mb-4 flex items-center gap-2">
|
||||
<Shield className="h-5 w-5 text-indigo-600" />
|
||||
CI/CD Infrastruktur (Automatisiert)
|
||||
</h2>
|
||||
|
||||
<div className="bg-blue-50 rounded-xl p-4 mb-6 border border-blue-200">
|
||||
<div className="flex items-start gap-3">
|
||||
<Info className="h-5 w-5 text-blue-600 flex-shrink-0 mt-0.5" />
|
||||
<div>
|
||||
<h4 className="font-medium text-blue-900">Warum automatisiert?</h4>
|
||||
<p className="text-sm text-blue-800 mt-1">
|
||||
Die OAuth-Integration zwischen Woodpecker und Gitea ist vollautomatisiert.
|
||||
Dies ist eine DevSecOps Best Practice: Credentials werden in HashiCorp Vault gespeichert
|
||||
und können bei Bedarf automatisch regeneriert werden.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="grid grid-cols-1 lg:grid-cols-2 gap-6">
|
||||
{/* Architektur */}
|
||||
<div className="bg-slate-50 rounded-xl p-5 border border-slate-200">
|
||||
<h3 className="font-semibold text-slate-900 mb-3">Architektur</h3>
|
||||
<div className="space-y-3 text-sm">
|
||||
<div className="flex items-center gap-3 p-2 bg-white rounded-lg border">
|
||||
<div className="w-3 h-3 bg-green-500 rounded-full" />
|
||||
<span className="font-medium">Gitea</span>
|
||||
<span className="text-slate-500">Port 3003</span>
|
||||
<span className="text-xs text-slate-400 ml-auto">Git Server</span>
|
||||
</div>
|
||||
<div className="flex items-center justify-center">
|
||||
<ArrowRight className="h-4 w-4 text-slate-400 rotate-90" />
|
||||
<span className="text-xs text-slate-500 ml-2">OAuth 2.0</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-3 p-2 bg-white rounded-lg border">
|
||||
<div className="w-3 h-3 bg-blue-500 rounded-full" />
|
||||
<span className="font-medium">Woodpecker</span>
|
||||
<span className="text-slate-500">Port 8090</span>
|
||||
<span className="text-xs text-slate-400 ml-auto">CI/CD Server</span>
|
||||
</div>
|
||||
<div className="flex items-center justify-center">
|
||||
<ArrowRight className="h-4 w-4 text-slate-400 rotate-90" />
|
||||
<span className="text-xs text-slate-500 ml-2">Credentials</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-3 p-2 bg-white rounded-lg border">
|
||||
<div className="w-3 h-3 bg-purple-500 rounded-full" />
|
||||
<span className="font-medium">Vault</span>
|
||||
<span className="text-slate-500">Port 8200</span>
|
||||
<span className="text-xs text-slate-400 ml-auto">Secrets Manager</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Credentials Speicherort */}
|
||||
<div className="bg-slate-50 rounded-xl p-5 border border-slate-200">
|
||||
<h3 className="font-semibold text-slate-900 mb-3">Credentials Speicherorte</h3>
|
||||
<div className="space-y-3 text-sm">
|
||||
<div className="p-3 bg-white rounded-lg border">
|
||||
<div className="flex items-center gap-2 mb-1">
|
||||
<Database className="h-4 w-4 text-purple-500" />
|
||||
<span className="font-medium">HashiCorp Vault</span>
|
||||
</div>
|
||||
<code className="text-xs bg-slate-100 px-2 py-1 rounded">
|
||||
secret/cicd/woodpecker
|
||||
</code>
|
||||
<p className="text-xs text-slate-500 mt-1">Client ID + Secret (Quelle der Wahrheit)</p>
|
||||
</div>
|
||||
<div className="p-3 bg-white rounded-lg border">
|
||||
<div className="flex items-center gap-2 mb-1">
|
||||
<FileCode className="h-4 w-4 text-blue-500" />
|
||||
<span className="font-medium">.env Datei</span>
|
||||
</div>
|
||||
<code className="text-xs bg-slate-100 px-2 py-1 rounded">
|
||||
WOODPECKER_GITEA_CLIENT/SECRET
|
||||
</code>
|
||||
<p className="text-xs text-slate-500 mt-1">Für Docker Compose (aus Vault geladen)</p>
|
||||
</div>
|
||||
<div className="p-3 bg-white rounded-lg border">
|
||||
<div className="flex items-center gap-2 mb-1">
|
||||
<Database className="h-4 w-4 text-green-500" />
|
||||
<span className="font-medium">Gitea PostgreSQL</span>
|
||||
</div>
|
||||
<code className="text-xs bg-slate-100 px-2 py-1 rounded">
|
||||
oauth2_application
|
||||
</code>
|
||||
<p className="text-xs text-slate-500 mt-1">OAuth App Registration (gehashtes Secret)</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Troubleshooting */}
|
||||
<div className="mt-6 bg-amber-50 rounded-xl p-5 border border-amber-200">
|
||||
<h3 className="font-semibold text-amber-900 mb-3 flex items-center gap-2">
|
||||
<AlertTriangle className="h-5 w-5 text-amber-600" />
|
||||
Troubleshooting: OAuth Fehler beheben
|
||||
</h3>
|
||||
<p className="text-sm text-amber-800 mb-3">
|
||||
Falls der Fehler "Client ID not registered" oder "user does not exist" auftritt:
|
||||
</p>
|
||||
<div className="bg-slate-800 rounded-lg p-4 font-mono text-sm">
|
||||
<p className="text-slate-400"># Credentials automatisch regenerieren</p>
|
||||
<p className="text-green-400">./scripts/sync-woodpecker-credentials.sh --regenerate</p>
|
||||
<p className="text-slate-400 mt-2"># Oder manuell: Vault → Gitea → .env → Restart</p>
|
||||
<p className="text-green-400">rsync .env macmini:~/Projekte/breakpilot-pwa/</p>
|
||||
<p className="text-green-400">ssh macmini "cd ~/Projekte/breakpilot-pwa && docker compose up -d --force-recreate woodpecker-server"</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Team Members Info */}
|
||||
<div className="bg-white rounded-xl border border-slate-200 p-6">
|
||||
<h2 className="text-xl font-semibold text-slate-900 mb-4 flex items-center gap-2">
|
||||
<Users className="h-5 w-5 text-indigo-600" />
|
||||
Team-Kommunikation
|
||||
</h2>
|
||||
|
||||
<div className="grid grid-cols-1 md:grid-cols-3 gap-4">
|
||||
<div className="bg-slate-50 rounded-lg p-4 text-center">
|
||||
<div className="text-3xl mb-2">💬</div>
|
||||
<h3 className="font-medium text-slate-900">Pull Request Kommentare</h3>
|
||||
<p className="text-sm text-slate-600 mt-1">Code-Diskussionen im PR</p>
|
||||
</div>
|
||||
<div className="bg-slate-50 rounded-lg p-4 text-center">
|
||||
<div className="text-3xl mb-2">📋</div>
|
||||
<h3 className="font-medium text-slate-900">Issues in Gitea</h3>
|
||||
<p className="text-sm text-slate-600 mt-1">Bugs & Features tracken</p>
|
||||
</div>
|
||||
<div className="bg-slate-50 rounded-lg p-4 text-center">
|
||||
<div className="text-3xl mb-2">🔔</div>
|
||||
<h3 className="font-medium text-slate-900">CI/CD Notifications</h3>
|
||||
<p className="text-sm text-slate-600 mt-1">Pipeline-Status per Mail</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -177,6 +177,7 @@ export default function GPUInfrastructurePage() {
|
||||
databases: ['PostgreSQL (Logs)'],
|
||||
}}
|
||||
relatedPages={[
|
||||
{ name: 'LLM Vergleich', href: '/ai/llm-compare', description: 'KI-Provider testen' },
|
||||
{ name: 'Security', href: '/infrastructure/security', description: 'DevSecOps Dashboard' },
|
||||
{ name: 'Builds', href: '/infrastructure/builds', description: 'CI/CD Pipeline' },
|
||||
]}
|
||||
|
||||
@@ -51,9 +51,13 @@ const INFRASTRUCTURE_COMPONENTS: Component[] = [
|
||||
// ===== DATABASES =====
|
||||
{ type: 'service', name: 'PostgreSQL', version: '16-alpine', category: 'database', port: '5432', description: 'Hauptdatenbank', license: 'PostgreSQL', sourceUrl: 'https://github.com/postgres/postgres' },
|
||||
{ type: 'service', name: 'Synapse PostgreSQL', version: '16-alpine', category: 'database', port: '-', description: 'Matrix Datenbank', license: 'PostgreSQL', sourceUrl: 'https://github.com/postgres/postgres' },
|
||||
{ type: 'service', name: 'ERPNext MariaDB', version: '10.6', category: 'database', port: '-', description: 'ERPNext Datenbank', license: 'GPL-2.0', sourceUrl: 'https://github.com/MariaDB/server' },
|
||||
{ type: 'service', name: 'MongoDB', version: '7.0', category: 'database', port: '27017', description: 'LibreChat Datenbank', license: 'SSPL-1.0', sourceUrl: 'https://github.com/mongodb/mongo' },
|
||||
|
||||
// ===== CACHE & QUEUE =====
|
||||
{ type: 'service', name: 'Valkey', version: '8-alpine', category: 'cache', port: '6379', description: 'In-Memory Cache & Sessions (Redis OSS Fork)', license: 'BSD-3-Clause', sourceUrl: 'https://github.com/valkey-io/valkey' },
|
||||
{ type: 'service', name: 'ERPNext Valkey Queue', version: 'alpine', category: 'cache', port: '-', description: 'Job Queue', license: 'BSD-3-Clause', sourceUrl: 'https://github.com/valkey-io/valkey' },
|
||||
{ type: 'service', name: 'ERPNext Valkey Cache', version: 'alpine', category: 'cache', port: '-', description: 'Cache Layer', license: 'BSD-3-Clause', sourceUrl: 'https://github.com/valkey-io/valkey' },
|
||||
|
||||
// ===== SEARCH ENGINES =====
|
||||
{ type: 'service', name: 'Qdrant', version: '1.7.4', category: 'search', port: '6333', description: 'Vector Database (RAG/Embeddings)', license: 'Apache-2.0', sourceUrl: 'https://github.com/qdrant/qdrant' },
|
||||
@@ -62,6 +66,8 @@ const INFRASTRUCTURE_COMPONENTS: Component[] = [
|
||||
|
||||
// ===== OBJECT STORAGE =====
|
||||
{ type: 'service', name: 'MinIO', version: 'latest', category: 'storage', port: '9000/9001', description: 'S3-kompatibel Object Storage', license: 'AGPL-3.0', sourceUrl: 'https://github.com/minio/minio' },
|
||||
{ type: 'service', name: 'IPFS (Kubo)', version: '0.24', category: 'storage', port: '5001', description: 'Dezentrales Speichersystem', license: 'MIT/Apache-2.0', sourceUrl: 'https://github.com/ipfs/kubo' },
|
||||
{ type: 'service', name: 'DSMS Gateway', version: '1.0', category: 'storage', port: '8082', description: 'IPFS REST API', license: 'Proprietary', sourceUrl: '-' },
|
||||
|
||||
// ===== SECURITY =====
|
||||
{ type: 'service', name: 'HashiCorp Vault', version: '1.15', category: 'security', port: '8200', description: 'Secrets Management', license: 'BUSL-1.1', sourceUrl: 'https://github.com/hashicorp/vault' },
|
||||
@@ -77,19 +83,36 @@ const INFRASTRUCTURE_COMPONENTS: Component[] = [
|
||||
{ type: 'service', name: 'Jibri', version: 'stable-9823', category: 'communication', port: '-', description: 'Recording & Streaming Service', license: 'Apache-2.0', sourceUrl: 'https://github.com/jitsi/jibri' },
|
||||
|
||||
// ===== APPLICATION SERVICES (Python) =====
|
||||
{ type: 'service', name: 'Python Backend (FastAPI)', version: '3.12', category: 'application', port: '8000', description: 'Lehrer Backend API (Klausuren, E-Mail, Alerts)', license: 'Proprietary', sourceUrl: '-' },
|
||||
{ type: 'service', name: 'Python Backend (FastAPI)', version: '3.12', category: 'application', port: '8000', description: 'Haupt-Backend API, Studio & Alerts Agent', license: 'Proprietary', sourceUrl: '-' },
|
||||
{ type: 'service', name: 'Klausur Service', version: '1.0', category: 'application', port: '8086', description: 'Abitur-Klausurkorrektur (BYOEH)', license: 'Proprietary', sourceUrl: '-' },
|
||||
{ type: 'service', name: 'Compliance Module', version: '2.0', category: 'application', port: '8000', description: 'GRC Framework (19 Regulations, 558 Requirements, AI)', license: 'Proprietary', sourceUrl: '-' },
|
||||
{ type: 'service', name: 'Transcription Worker', version: '1.0', category: 'application', port: '-', description: 'Whisper + pyannote Transkription', license: 'Proprietary', sourceUrl: '-' },
|
||||
|
||||
// ===== APPLICATION SERVICES (Go) =====
|
||||
{ type: 'service', name: 'Go Consent Service', version: '1.21', category: 'application', port: '8081', description: 'DSGVO Consent Management', license: 'Proprietary', sourceUrl: '-' },
|
||||
{ type: 'service', name: 'Go School Service', version: '1.21', category: 'application', port: '8084', description: 'Klausuren, Noten, Zeugnisse', license: 'Proprietary', sourceUrl: '-' },
|
||||
{ type: 'service', name: 'Go Billing Service', version: '1.21', category: 'application', port: '8083', description: 'Stripe Billing Integration', license: 'Proprietary', sourceUrl: '-' },
|
||||
|
||||
// ===== APPLICATION SERVICES (Node.js) =====
|
||||
{ type: 'service', name: 'Next.js Admin Frontend', version: '15.1', category: 'application', port: '3002', description: 'Admin Lehrer Dashboard (React)', license: 'Proprietary', sourceUrl: '-' },
|
||||
{ type: 'service', name: 'Next.js Admin Frontend', version: '15.1', category: 'application', port: '3000', description: 'Admin Dashboard (React)', license: 'Proprietary', sourceUrl: '-' },
|
||||
{ type: 'service', name: 'H5P Content Service', version: 'latest', category: 'application', port: '8085', description: 'Interaktive Inhalte', license: 'MIT', sourceUrl: 'https://github.com/h5p/h5p-server' },
|
||||
{ type: 'service', name: 'Policy Vault (NestJS)', version: '1.0', category: 'application', port: '3001', description: 'Richtlinien-Verwaltung API', license: 'Proprietary', sourceUrl: '-' },
|
||||
{ type: 'service', name: 'Policy Vault (Angular)', version: '17', category: 'application', port: '4200', description: 'Richtlinien-Verwaltung UI', license: 'Proprietary', sourceUrl: '-' },
|
||||
|
||||
// ===== APPLICATION SERVICES (Vue) =====
|
||||
{ type: 'service', name: 'Creator Studio (Vue 3)', version: '3.4', category: 'application', port: '-', description: 'Content Creation UI', license: 'Proprietary', sourceUrl: '-' },
|
||||
|
||||
// ===== AI/LLM SERVICES =====
|
||||
{ type: 'service', name: 'LibreChat', version: 'latest', category: 'ai', port: '3080', description: 'Multi-LLM Chat Interface', license: 'MIT', sourceUrl: 'https://github.com/danny-avila/LibreChat' },
|
||||
{ type: 'service', name: 'RAGFlow', version: 'latest', category: 'ai', port: '9380', description: 'RAG Pipeline Service', license: 'Apache-2.0', sourceUrl: 'https://github.com/infiniflow/ragflow' },
|
||||
|
||||
// ===== ERP =====
|
||||
{ type: 'service', name: 'ERPNext', version: 'v15', category: 'erp', port: '8090', description: 'Open Source ERP System', license: 'GPL-3.0', sourceUrl: 'https://github.com/frappe/erpnext' },
|
||||
|
||||
// ===== CI/CD & VERSION CONTROL =====
|
||||
{ type: 'service', name: 'Woodpecker CI', version: '2.x', category: 'cicd', port: '8082', description: 'Self-hosted CI/CD Pipeline (Drone Fork)', license: 'Apache-2.0', sourceUrl: 'https://github.com/woodpecker-ci/woodpecker' },
|
||||
{ type: 'service', name: 'Gitea', version: '1.21', category: 'cicd', port: '3003', description: 'Self-hosted Git Service', license: 'MIT', sourceUrl: 'https://github.com/go-gitea/gitea' },
|
||||
{ type: 'service', name: 'Dokploy', version: '0.26.7', category: 'cicd', port: '3000', description: 'Self-hosted PaaS (Vercel/Heroku Alternative)', license: 'Apache-2.0', sourceUrl: 'https://github.com/Dokploy/dokploy' },
|
||||
|
||||
// ===== DEVELOPMENT =====
|
||||
{ type: 'service', name: 'Mailpit', version: 'latest', category: 'development', port: '8025/1025', description: 'E-Mail Testing (SMTP Catch-All)', license: 'MIT', sourceUrl: 'https://github.com/axllent/mailpit' },
|
||||
@@ -161,7 +184,10 @@ const PYTHON_PACKAGES: Component[] = [
|
||||
{ type: 'library', name: 'structlog', version: '24.x', category: 'python', description: 'Structured Logging', license: 'Apache-2.0', sourceUrl: 'https://github.com/hynek/structlog' },
|
||||
{ type: 'library', name: 'feedparser', version: '6.x', category: 'python', description: 'RSS/Atom Feed Parser (Alerts Agent)', license: 'BSD-2-Clause', sourceUrl: 'https://github.com/kurtmckee/feedparser' },
|
||||
{ type: 'library', name: 'APScheduler', version: '3.x', category: 'python', description: 'AsyncIO Job Scheduler (Alerts Agent)', license: 'MIT', sourceUrl: 'https://github.com/agronholm/apscheduler' },
|
||||
{ type: 'library', name: 'beautifulsoup4', version: '4.x', category: 'python', description: 'HTML Parser (Email Parsing)', license: 'MIT', sourceUrl: 'https://code.launchpad.net/beautifulsoup' },
|
||||
{ type: 'library', name: 'beautifulsoup4', version: '4.x', category: 'python', description: 'HTML Parser (Email Parsing, Compliance Scraper)', license: 'MIT', sourceUrl: 'https://code.launchpad.net/beautifulsoup' },
|
||||
{ type: 'library', name: 'lxml', version: '5.x', category: 'python', description: 'XML/HTML Parser (EUR-Lex Scraping)', license: 'BSD-3-Clause', sourceUrl: 'https://github.com/lxml/lxml' },
|
||||
{ type: 'library', name: 'PyMuPDF', version: '1.24+', category: 'python', description: 'PDF Parser (BSI-TR Extraction)', license: 'AGPL-3.0', sourceUrl: 'https://github.com/pymupdf/PyMuPDF' },
|
||||
{ type: 'library', name: 'pdfplumber', version: '0.11+', category: 'python', description: 'PDF Table Extraction (Compliance Docs)', license: 'MIT', sourceUrl: 'https://github.com/jsvine/pdfplumber' },
|
||||
{ type: 'library', name: 'websockets', version: '14.x', category: 'python', description: 'WebSocket Support (Voice Streaming)', license: 'BSD-3-Clause', sourceUrl: 'https://github.com/python-websockets/websockets' },
|
||||
{ type: 'library', name: 'soundfile', version: '0.13+', category: 'python', description: 'Audio File Processing (Voice Service)', license: 'BSD-3-Clause', sourceUrl: 'https://github.com/bastibe/python-soundfile' },
|
||||
{ type: 'library', name: 'scipy', version: '1.14+', category: 'python', description: 'Signal Processing (Audio)', license: 'BSD-3-Clause', sourceUrl: 'https://github.com/scipy/scipy' },
|
||||
@@ -174,8 +200,7 @@ const GO_MODULES: Component[] = [
|
||||
{ type: 'library', name: 'gin-gonic/gin', version: '1.9+', category: 'go', description: 'Web Framework', license: 'MIT', sourceUrl: 'https://github.com/gin-gonic/gin' },
|
||||
{ type: 'library', name: 'gorm.io/gorm', version: '1.25+', category: 'go', description: 'ORM', license: 'MIT', sourceUrl: 'https://github.com/go-gorm/gorm' },
|
||||
{ type: 'library', name: 'golang-jwt/jwt', version: 'v5', category: 'go', description: 'JWT Library', license: 'MIT', sourceUrl: 'https://github.com/golang-jwt/jwt' },
|
||||
{ type: 'library', name: 'opensearch-project/opensearch-go', version: '4.x', category: 'go', description: 'OpenSearch Client (edu-search-service)', license: 'Apache-2.0', sourceUrl: 'https://github.com/opensearch-project/opensearch-go' },
|
||||
{ type: 'library', name: 'lib/pq', version: '1.10+', category: 'go', description: 'PostgreSQL Driver (school-service)', license: 'MIT', sourceUrl: 'https://github.com/lib/pq' },
|
||||
{ type: 'library', name: 'stripe/stripe-go', version: 'v76', category: 'go', description: 'Stripe SDK', license: 'MIT', sourceUrl: 'https://github.com/stripe/stripe-go' },
|
||||
{ type: 'library', name: 'spf13/viper', version: 'latest', category: 'go', description: 'Configuration', license: 'MIT', sourceUrl: 'https://github.com/spf13/viper' },
|
||||
{ type: 'library', name: 'uber-go/zap', version: 'latest', category: 'go', description: 'Structured Logging', license: 'MIT', sourceUrl: 'https://github.com/uber-go/zap' },
|
||||
{ type: 'library', name: 'swaggo/swag', version: 'latest', category: 'go', description: 'Swagger Docs', license: 'MIT', sourceUrl: 'https://github.com/swaggo/swag' },
|
||||
@@ -185,10 +210,15 @@ const GO_MODULES: Component[] = [
|
||||
const NODE_PACKAGES: Component[] = [
|
||||
{ type: 'library', name: 'Next.js', version: '15.1', category: 'nodejs', description: 'React Framework', license: 'MIT', sourceUrl: 'https://github.com/vercel/next.js' },
|
||||
{ type: 'library', name: 'React', version: '19', category: 'nodejs', description: 'UI Library', license: 'MIT', sourceUrl: 'https://github.com/facebook/react' },
|
||||
{ type: 'library', name: 'Vue.js', version: '3.4', category: 'nodejs', description: 'UI Framework (Creator Studio)', license: 'MIT', sourceUrl: 'https://github.com/vuejs/core' },
|
||||
{ type: 'library', name: 'Angular', version: '17', category: 'nodejs', description: 'UI Framework (Policy Vault)', license: 'MIT', sourceUrl: 'https://github.com/angular/angular' },
|
||||
{ type: 'library', name: 'NestJS', version: '10', category: 'nodejs', description: 'Node.js Framework', license: 'MIT', sourceUrl: 'https://github.com/nestjs/nest' },
|
||||
{ type: 'library', name: 'TypeScript', version: '5.x', category: 'nodejs', description: 'Type System', license: 'Apache-2.0', sourceUrl: 'https://github.com/microsoft/TypeScript' },
|
||||
{ type: 'library', name: 'Tailwind CSS', version: '3.4', category: 'nodejs', description: 'Utility CSS', license: 'MIT', sourceUrl: 'https://github.com/tailwindlabs/tailwindcss' },
|
||||
{ type: 'library', name: 'Prisma', version: '5.x', category: 'nodejs', description: 'ORM (Policy Vault)', license: 'Apache-2.0', sourceUrl: 'https://github.com/prisma/prisma' },
|
||||
{ type: 'library', name: 'Material Design Icons', version: 'latest', category: 'nodejs', description: 'Icon-System (Companion UI, Studio)', license: 'Apache-2.0', sourceUrl: 'https://github.com/google/material-design-icons' },
|
||||
{ type: 'library', name: 'Recharts', version: '2.12', category: 'nodejs', description: 'React Charts (Admin Dashboard)', license: 'MIT', sourceUrl: 'https://github.com/recharts/recharts' },
|
||||
{ type: 'library', name: 'Recharts', version: '2.12', category: 'nodejs', description: 'React Charts (Compliance Dashboard)', license: 'MIT', sourceUrl: 'https://github.com/recharts/recharts' },
|
||||
{ type: 'library', name: 'React Flow', version: '11.x', category: 'nodejs', description: 'Node-basierte Flow-Diagramme (Screen Flow)', license: 'MIT', sourceUrl: 'https://github.com/xyflow/xyflow' },
|
||||
{ type: 'library', name: 'Playwright', version: '1.50', category: 'nodejs', description: 'E2E Testing Framework (SDK Tests)', license: 'Apache-2.0', sourceUrl: 'https://github.com/microsoft/playwright' },
|
||||
{ type: 'library', name: 'Vitest', version: '4.x', category: 'nodejs', description: 'Unit Testing Framework', license: 'MIT', sourceUrl: 'https://github.com/vitest-dev/vitest' },
|
||||
{ type: 'library', name: 'jsPDF', version: '4.x', category: 'nodejs', description: 'PDF Generation (SDK Export)', license: 'MIT', sourceUrl: 'https://github.com/parallax/jsPDF' },
|
||||
@@ -327,7 +357,9 @@ export default function SBOMPage() {
|
||||
case 'communication': return 'bg-yellow-100 text-yellow-800'
|
||||
case 'storage': return 'bg-orange-100 text-orange-800'
|
||||
case 'search': return 'bg-pink-100 text-pink-800'
|
||||
case 'erp': return 'bg-indigo-100 text-indigo-800'
|
||||
case 'cache': return 'bg-cyan-100 text-cyan-800'
|
||||
case 'ai': return 'bg-violet-100 text-violet-800'
|
||||
case 'development': return 'bg-gray-100 text-gray-800'
|
||||
case 'cicd': return 'bg-orange-100 text-orange-800'
|
||||
case 'python': return 'bg-emerald-100 text-emerald-800'
|
||||
@@ -383,7 +415,7 @@ export default function SBOMPage() {
|
||||
<div>
|
||||
<PagePurpose
|
||||
title="SBOM"
|
||||
purpose="Software Bill of Materials - Alle Komponenten & Abhaengigkeiten der Breakpilot Lehrer-Plattform. Wichtig fuer Supply-Chain-Security, Compliance-Audits und Lizenz-Pruefung."
|
||||
purpose="Software Bill of Materials - Alle Komponenten & Abhaengigkeiten der Breakpilot-Plattform. Wichtig fuer Supply-Chain-Security, Compliance-Audits und Lizenz-Pruefung."
|
||||
audience={['DevOps', 'Compliance', 'Security', 'Auditoren']}
|
||||
gdprArticles={['Art. 32 (Sicherheit der Verarbeitung)']}
|
||||
architecture={{
|
||||
@@ -622,7 +654,7 @@ export default function SBOMPage() {
|
||||
const url = URL.createObjectURL(blob)
|
||||
const a = document.createElement('a')
|
||||
a.href = url
|
||||
a.download = `breakpilot-lehrer-sbom-${new Date().toISOString().split('T')[0]}.json`
|
||||
a.download = `breakpilot-sbom-${new Date().toISOString().split('T')[0]}.json`
|
||||
a.click()
|
||||
}}
|
||||
className="px-4 py-2 bg-orange-600 text-white rounded-lg hover:bg-orange-700 transition-colors flex items-center gap-2"
|
||||
|
||||
@@ -335,6 +335,7 @@ export default function RBACPage() {
|
||||
}}
|
||||
relatedPages={[
|
||||
{ name: 'Audit Trail', href: '/sdk/audit-report', description: 'LLM-Operationen protokollieren' },
|
||||
{ name: 'LLM Vergleich', href: '/ai/llm-compare', description: 'KI-Provider testen' },
|
||||
]}
|
||||
/>
|
||||
|
||||
|
||||
@@ -1,163 +0,0 @@
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest'
|
||||
|
||||
/**
|
||||
* Tests for Chunk-Browser logic:
|
||||
* - Collection dropdown has all 10 collections
|
||||
* - COLLECTION_TOTALS has expected keys
|
||||
* - Text search highlighting logic
|
||||
* - Pagination state management
|
||||
*/
|
||||
|
||||
// Replicate the COMPLIANCE_COLLECTIONS from the dropdown
|
||||
const COMPLIANCE_COLLECTIONS = [
|
||||
'bp_compliance_gesetze',
|
||||
'bp_compliance_ce',
|
||||
'bp_compliance_datenschutz',
|
||||
'bp_dsfa_corpus',
|
||||
'bp_compliance_recht',
|
||||
'bp_legal_templates',
|
||||
'bp_compliance_gdpr',
|
||||
'bp_compliance_schulrecht',
|
||||
'bp_dsfa_templates',
|
||||
'bp_dsfa_risks',
|
||||
] as const
|
||||
|
||||
// Replicate COLLECTION_TOTALS from page.tsx
|
||||
const COLLECTION_TOTALS: Record<string, number> = {
|
||||
bp_compliance_gesetze: 58304,
|
||||
bp_compliance_ce: 18183,
|
||||
bp_legal_templates: 7689,
|
||||
bp_compliance_datenschutz: 2448,
|
||||
bp_dsfa_corpus: 7867,
|
||||
bp_compliance_recht: 1425,
|
||||
bp_nibis_eh: 7996,
|
||||
total_legal: 76487,
|
||||
total_all: 103912,
|
||||
}
|
||||
|
||||
describe('Chunk-Browser Logic', () => {
|
||||
describe('COMPLIANCE_COLLECTIONS', () => {
|
||||
it('should have exactly 10 collections', () => {
|
||||
expect(COMPLIANCE_COLLECTIONS).toHaveLength(10)
|
||||
})
|
||||
|
||||
it('should include bp_compliance_ce for IFRS documents', () => {
|
||||
expect(COMPLIANCE_COLLECTIONS).toContain('bp_compliance_ce')
|
||||
})
|
||||
|
||||
it('should include bp_compliance_datenschutz for EFRAG/ENISA', () => {
|
||||
expect(COMPLIANCE_COLLECTIONS).toContain('bp_compliance_datenschutz')
|
||||
})
|
||||
|
||||
it('should include bp_compliance_gesetze as default', () => {
|
||||
expect(COMPLIANCE_COLLECTIONS[0]).toBe('bp_compliance_gesetze')
|
||||
})
|
||||
|
||||
it('should have all collection names starting with bp_', () => {
|
||||
COMPLIANCE_COLLECTIONS.forEach((col) => {
|
||||
expect(col).toMatch(/^bp_/)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('COLLECTION_TOTALS', () => {
|
||||
it('should have bp_compliance_ce key', () => {
|
||||
expect(COLLECTION_TOTALS).toHaveProperty('bp_compliance_ce')
|
||||
})
|
||||
|
||||
it('should have bp_compliance_datenschutz key', () => {
|
||||
expect(COLLECTION_TOTALS).toHaveProperty('bp_compliance_datenschutz')
|
||||
})
|
||||
|
||||
it('should have positive counts for all collections', () => {
|
||||
Object.values(COLLECTION_TOTALS).forEach((count) => {
|
||||
expect(count).toBeGreaterThan(0)
|
||||
})
|
||||
})
|
||||
|
||||
it('total_all should be greater than total_legal', () => {
|
||||
expect(COLLECTION_TOTALS.total_all).toBeGreaterThan(COLLECTION_TOTALS.total_legal)
|
||||
})
|
||||
})
|
||||
|
||||
describe('Text search filtering logic', () => {
|
||||
const mockChunks = [
|
||||
{ id: '1', text: 'DSGVO Artikel 1 Datenschutz', regulation_code: 'GDPR' },
|
||||
{ id: '2', text: 'IFRS 16 Leasing Standard', regulation_code: 'EU_IFRS' },
|
||||
{ id: '3', text: 'Datenschutz Grundverordnung', regulation_code: 'GDPR' },
|
||||
{ id: '4', text: 'ENISA Supply Chain Security', regulation_code: 'ENISA' },
|
||||
]
|
||||
|
||||
it('should filter chunks by text search (case insensitive)', () => {
|
||||
const search = 'datenschutz'
|
||||
const filtered = mockChunks.filter((c) =>
|
||||
c.text.toLowerCase().includes(search.toLowerCase())
|
||||
)
|
||||
expect(filtered).toHaveLength(2)
|
||||
})
|
||||
|
||||
it('should return all chunks when search is empty', () => {
|
||||
const search = ''
|
||||
const filtered = search
|
||||
? mockChunks.filter((c) => c.text.toLowerCase().includes(search.toLowerCase()))
|
||||
: mockChunks
|
||||
expect(filtered).toHaveLength(4)
|
||||
})
|
||||
|
||||
it('should return 0 chunks when no match', () => {
|
||||
const search = 'blockchain'
|
||||
const filtered = mockChunks.filter((c) =>
|
||||
c.text.toLowerCase().includes(search.toLowerCase())
|
||||
)
|
||||
expect(filtered).toHaveLength(0)
|
||||
})
|
||||
|
||||
it('should match IFRS chunks', () => {
|
||||
const search = 'IFRS'
|
||||
const filtered = mockChunks.filter((c) =>
|
||||
c.text.toLowerCase().includes(search.toLowerCase())
|
||||
)
|
||||
expect(filtered).toHaveLength(1)
|
||||
expect(filtered[0].regulation_code).toBe('EU_IFRS')
|
||||
})
|
||||
})
|
||||
|
||||
describe('Pagination state', () => {
|
||||
it('should start at page 0', () => {
|
||||
const currentPage = 0
|
||||
expect(currentPage).toBe(0)
|
||||
})
|
||||
|
||||
it('should increment page on next', () => {
|
||||
let currentPage = 0
|
||||
currentPage += 1
|
||||
expect(currentPage).toBe(1)
|
||||
})
|
||||
|
||||
it('should maintain offset history for back navigation', () => {
|
||||
const history: (string | null)[] = []
|
||||
history.push(null) // page 0 offset
|
||||
history.push('uuid-20') // page 1 offset
|
||||
history.push('uuid-40') // page 2 offset
|
||||
|
||||
// Go back to page 1
|
||||
const prevOffset = history[history.length - 2]
|
||||
expect(prevOffset).toBe('uuid-20')
|
||||
})
|
||||
|
||||
it('should reset state on collection change', () => {
|
||||
let chunkOffset: string | null = 'some-offset'
|
||||
let chunkHistory: (string | null)[] = [null, 'uuid-1']
|
||||
let chunkCurrentPage = 3
|
||||
|
||||
// Simulate collection change
|
||||
chunkOffset = null
|
||||
chunkHistory = []
|
||||
chunkCurrentPage = 0
|
||||
|
||||
expect(chunkOffset).toBeNull()
|
||||
expect(chunkHistory).toHaveLength(0)
|
||||
expect(chunkCurrentPage).toBe(0)
|
||||
})
|
||||
})
|
||||
})
|
||||
@@ -1,90 +0,0 @@
|
||||
import { describe, it, expect } from 'vitest'
|
||||
|
||||
/**
|
||||
* Tests for RAG page constants - REGULATIONS_IN_RAG, REGULATION_SOURCES, REGULATION_LICENSES
|
||||
*
|
||||
* These are defined inline in page.tsx, so we test the data structures
|
||||
* by importing a subset of the expected values.
|
||||
*/
|
||||
|
||||
// Expected IFRS entries in REGULATIONS_IN_RAG
|
||||
const EXPECTED_IFRS_ENTRIES = {
|
||||
EU_IFRS_DE: { collection: 'bp_compliance_ce', chunks: 0 },
|
||||
EU_IFRS_EN: { collection: 'bp_compliance_ce', chunks: 0 },
|
||||
EFRAG_ENDORSEMENT: { collection: 'bp_compliance_datenschutz', chunks: 0 },
|
||||
}
|
||||
|
||||
// Expected REGULATION_SOURCES URLs
|
||||
const EXPECTED_SOURCES = {
|
||||
GDPR: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32016R0679',
|
||||
EU_IFRS_DE: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32023R1803',
|
||||
EU_IFRS_EN: 'https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX:32023R1803',
|
||||
EFRAG_ENDORSEMENT: 'https://www.efrag.org/activities/endorsement-status-report',
|
||||
ENISA_SECURE_DEV: 'https://www.enisa.europa.eu/publications/secure-development-best-practices',
|
||||
NIST_SSDF: 'https://csrc.nist.gov/pubs/sp/800/218/final',
|
||||
NIST_CSF: 'https://www.nist.gov/cyberframework',
|
||||
OECD_AI: 'https://oecd.ai/en/ai-principles',
|
||||
}
|
||||
|
||||
describe('RAG Page Constants', () => {
|
||||
describe('IFRS entries in REGULATIONS_IN_RAG', () => {
|
||||
it('should have EU_IFRS_DE entry with bp_compliance_ce collection', () => {
|
||||
expect(EXPECTED_IFRS_ENTRIES.EU_IFRS_DE.collection).toBe('bp_compliance_ce')
|
||||
})
|
||||
|
||||
it('should have EU_IFRS_EN entry with bp_compliance_ce collection', () => {
|
||||
expect(EXPECTED_IFRS_ENTRIES.EU_IFRS_EN.collection).toBe('bp_compliance_ce')
|
||||
})
|
||||
|
||||
it('should have EFRAG_ENDORSEMENT entry with bp_compliance_datenschutz collection', () => {
|
||||
expect(EXPECTED_IFRS_ENTRIES.EFRAG_ENDORSEMENT.collection).toBe('bp_compliance_datenschutz')
|
||||
})
|
||||
})
|
||||
|
||||
describe('REGULATION_SOURCES URLs', () => {
|
||||
it('should have valid EUR-Lex URLs for EU regulations', () => {
|
||||
expect(EXPECTED_SOURCES.GDPR).toMatch(/^https:\/\/eur-lex\.europa\.eu/)
|
||||
expect(EXPECTED_SOURCES.EU_IFRS_DE).toMatch(/^https:\/\/eur-lex\.europa\.eu/)
|
||||
expect(EXPECTED_SOURCES.EU_IFRS_EN).toMatch(/^https:\/\/eur-lex\.europa\.eu/)
|
||||
})
|
||||
|
||||
it('should have correct CELEX for IFRS DE (32023R1803)', () => {
|
||||
expect(EXPECTED_SOURCES.EU_IFRS_DE).toContain('32023R1803')
|
||||
})
|
||||
|
||||
it('should have correct CELEX for IFRS EN (32023R1803)', () => {
|
||||
expect(EXPECTED_SOURCES.EU_IFRS_EN).toContain('32023R1803')
|
||||
})
|
||||
|
||||
it('should have DE language for IFRS DE', () => {
|
||||
expect(EXPECTED_SOURCES.EU_IFRS_DE).toContain('/DE/')
|
||||
})
|
||||
|
||||
it('should have EN language for IFRS EN', () => {
|
||||
expect(EXPECTED_SOURCES.EU_IFRS_EN).toContain('/EN/')
|
||||
})
|
||||
|
||||
it('should have EFRAG URL for endorsement status', () => {
|
||||
expect(EXPECTED_SOURCES.EFRAG_ENDORSEMENT).toMatch(/^https:\/\/www\.efrag\.org/)
|
||||
})
|
||||
|
||||
it('should have ENISA URL for secure development', () => {
|
||||
expect(EXPECTED_SOURCES.ENISA_SECURE_DEV).toMatch(/^https:\/\/www\.enisa\.europa\.eu/)
|
||||
})
|
||||
|
||||
it('should have NIST URLs for SSDF and CSF', () => {
|
||||
expect(EXPECTED_SOURCES.NIST_SSDF).toMatch(/nist\.gov/)
|
||||
expect(EXPECTED_SOURCES.NIST_CSF).toMatch(/nist\.gov/)
|
||||
})
|
||||
|
||||
it('should have OECD URL for AI principles', () => {
|
||||
expect(EXPECTED_SOURCES.OECD_AI).toMatch(/oecd\.ai/)
|
||||
})
|
||||
|
||||
it('should all be valid HTTPS URLs', () => {
|
||||
Object.values(EXPECTED_SOURCES).forEach((url) => {
|
||||
expect(url).toMatch(/^https:\/\//)
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
@@ -1,249 +0,0 @@
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest'
|
||||
|
||||
// Mock fetch globally
|
||||
const mockFetch = vi.fn()
|
||||
global.fetch = mockFetch
|
||||
|
||||
// Mock NextRequest and NextResponse
|
||||
vi.mock('next/server', () => ({
|
||||
NextRequest: class MockNextRequest {
|
||||
url: string
|
||||
constructor(url: string) {
|
||||
this.url = url
|
||||
}
|
||||
},
|
||||
NextResponse: {
|
||||
json: (data: unknown, init?: { status?: number }) => ({
|
||||
data,
|
||||
status: init?.status || 200,
|
||||
}),
|
||||
},
|
||||
}))
|
||||
|
||||
describe('Legal Corpus API Proxy', () => {
|
||||
beforeEach(() => {
|
||||
mockFetch.mockClear()
|
||||
})
|
||||
|
||||
describe('scroll action', () => {
|
||||
it('should call Qdrant scroll endpoint with correct collection', async () => {
|
||||
const mockScrollResponse = {
|
||||
result: {
|
||||
points: [
|
||||
{ id: 'uuid-1', payload: { text: 'DSGVO Artikel 1', regulation_code: 'GDPR' } },
|
||||
{ id: 'uuid-2', payload: { text: 'DSGVO Artikel 2', regulation_code: 'GDPR' } },
|
||||
],
|
||||
next_page_offset: 'uuid-3',
|
||||
},
|
||||
}
|
||||
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () => Promise.resolve(mockScrollResponse),
|
||||
})
|
||||
|
||||
const { GET } = await import('../route')
|
||||
const request = { url: 'http://localhost/api/legal-corpus?action=scroll&collection=bp_compliance_ce&limit=20' }
|
||||
const response = await GET(request as any)
|
||||
|
||||
expect(mockFetch).toHaveBeenCalledTimes(1)
|
||||
const calledUrl = mockFetch.mock.calls[0][0]
|
||||
expect(calledUrl).toContain('/collections/bp_compliance_ce/points/scroll')
|
||||
|
||||
const body = JSON.parse(mockFetch.mock.calls[0][1].body)
|
||||
expect(body.limit).toBe(20)
|
||||
expect(body.with_payload).toBe(true)
|
||||
expect(body.with_vector).toBe(false)
|
||||
})
|
||||
|
||||
it('should pass offset parameter to Qdrant', async () => {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({ result: { points: [], next_page_offset: null } }),
|
||||
})
|
||||
|
||||
const { GET } = await import('../route')
|
||||
const request = { url: 'http://localhost/api/legal-corpus?action=scroll&collection=bp_compliance_gesetze&offset=some-uuid' }
|
||||
await GET(request as any)
|
||||
|
||||
const body = JSON.parse(mockFetch.mock.calls[0][1].body)
|
||||
expect(body.offset).toBe('some-uuid')
|
||||
})
|
||||
|
||||
it('should limit chunks to max 100', async () => {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({ result: { points: [], next_page_offset: null } }),
|
||||
})
|
||||
|
||||
const { GET } = await import('../route')
|
||||
const request = { url: 'http://localhost/api/legal-corpus?action=scroll&collection=bp_compliance_ce&limit=500' }
|
||||
await GET(request as any)
|
||||
|
||||
const body = JSON.parse(mockFetch.mock.calls[0][1].body)
|
||||
expect(body.limit).toBe(100)
|
||||
})
|
||||
|
||||
it('should apply text_search filter client-side', async () => {
|
||||
const mockScrollResponse = {
|
||||
result: {
|
||||
points: [
|
||||
{ id: 'uuid-1', payload: { text: 'DSGVO Artikel 1 Datenschutz' } },
|
||||
{ id: 'uuid-2', payload: { text: 'IFRS Standard 16 Leasing' } },
|
||||
{ id: 'uuid-3', payload: { text: 'Datenschutz Grundverordnung' } },
|
||||
],
|
||||
next_page_offset: null,
|
||||
},
|
||||
}
|
||||
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () => Promise.resolve(mockScrollResponse),
|
||||
})
|
||||
|
||||
const { GET } = await import('../route')
|
||||
const request = { url: 'http://localhost/api/legal-corpus?action=scroll&collection=bp_compliance_ce&text_search=Datenschutz' }
|
||||
const response = await GET(request as any)
|
||||
|
||||
// Should filter to only chunks containing "Datenschutz"
|
||||
expect((response as any).data.chunks).toHaveLength(2)
|
||||
expect((response as any).data.chunks[0].text).toContain('Datenschutz')
|
||||
})
|
||||
|
||||
it('should flatten payload into chunk objects', async () => {
|
||||
const mockScrollResponse = {
|
||||
result: {
|
||||
points: [
|
||||
{
|
||||
id: 'uuid-1',
|
||||
payload: {
|
||||
text: 'IFRS 16 Leasing',
|
||||
regulation_code: 'EU_IFRS',
|
||||
language: 'de',
|
||||
celex: '32023R1803',
|
||||
},
|
||||
},
|
||||
],
|
||||
next_page_offset: null,
|
||||
},
|
||||
}
|
||||
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () => Promise.resolve(mockScrollResponse),
|
||||
})
|
||||
|
||||
const { GET } = await import('../route')
|
||||
const request = { url: 'http://localhost/api/legal-corpus?action=scroll&collection=bp_compliance_ce' }
|
||||
const response = await GET(request as any)
|
||||
|
||||
const chunk = (response as any).data.chunks[0]
|
||||
expect(chunk.id).toBe('uuid-1')
|
||||
expect(chunk.text).toBe('IFRS 16 Leasing')
|
||||
expect(chunk.regulation_code).toBe('EU_IFRS')
|
||||
expect(chunk.language).toBe('de')
|
||||
})
|
||||
|
||||
it('should return next_offset from Qdrant response', async () => {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({
|
||||
result: { points: [], next_page_offset: 'next-uuid' },
|
||||
}),
|
||||
})
|
||||
|
||||
const { GET } = await import('../route')
|
||||
const request = { url: 'http://localhost/api/legal-corpus?action=scroll&collection=bp_compliance_ce' }
|
||||
const response = await GET(request as any)
|
||||
|
||||
expect((response as any).data.next_offset).toBe('next-uuid')
|
||||
})
|
||||
|
||||
it('should handle Qdrant scroll failure', async () => {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: false,
|
||||
status: 404,
|
||||
})
|
||||
|
||||
const { GET } = await import('../route')
|
||||
const request = { url: 'http://localhost/api/legal-corpus?action=scroll&collection=nonexistent' }
|
||||
const response = await GET(request as any)
|
||||
|
||||
expect((response as any).status).toBe(404)
|
||||
})
|
||||
|
||||
it('should apply filter when filter_key and filter_value provided', async () => {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({ result: { points: [], next_page_offset: null } }),
|
||||
})
|
||||
|
||||
const { GET } = await import('../route')
|
||||
const request = { url: 'http://localhost/api/legal-corpus?action=scroll&collection=bp_compliance_ce&filter_key=language&filter_value=de' }
|
||||
await GET(request as any)
|
||||
|
||||
const body = JSON.parse(mockFetch.mock.calls[0][1].body)
|
||||
expect(body.filter).toEqual({
|
||||
must: [{ key: 'language', match: { value: 'de' } }],
|
||||
})
|
||||
})
|
||||
|
||||
it('should default collection to bp_compliance_gesetze', async () => {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({ result: { points: [], next_page_offset: null } }),
|
||||
})
|
||||
|
||||
const { GET } = await import('../route')
|
||||
const request = { url: 'http://localhost/api/legal-corpus?action=scroll' }
|
||||
await GET(request as any)
|
||||
|
||||
const calledUrl = mockFetch.mock.calls[0][0]
|
||||
expect(calledUrl).toContain('/collections/bp_compliance_gesetze/')
|
||||
})
|
||||
})
|
||||
|
||||
describe('collection-count action', () => {
|
||||
it('should return points_count from Qdrant collection info', async () => {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({
|
||||
result: { points_count: 55053 },
|
||||
}),
|
||||
})
|
||||
|
||||
const { GET } = await import('../route')
|
||||
const request = { url: 'http://localhost/api/legal-corpus?action=collection-count&collection=bp_compliance_ce' }
|
||||
const response = await GET(request as any)
|
||||
|
||||
expect((response as any).data.count).toBe(55053)
|
||||
})
|
||||
|
||||
it('should return 0 when Qdrant is unavailable', async () => {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: false,
|
||||
status: 500,
|
||||
})
|
||||
|
||||
const { GET } = await import('../route')
|
||||
const request = { url: 'http://localhost/api/legal-corpus?action=collection-count&collection=bp_compliance_ce' }
|
||||
const response = await GET(request as any)
|
||||
|
||||
expect((response as any).data.count).toBe(0)
|
||||
})
|
||||
|
||||
it('should default to bp_compliance_gesetze collection', async () => {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({ result: { points_count: 1234 } }),
|
||||
})
|
||||
|
||||
const { GET } = await import('../route')
|
||||
const request = { url: 'http://localhost/api/legal-corpus?action=collection-count' }
|
||||
await GET(request as any)
|
||||
|
||||
const calledUrl = mockFetch.mock.calls[0][0]
|
||||
expect(calledUrl).toContain('/collections/bp_compliance_gesetze')
|
||||
})
|
||||
})
|
||||
})
|
||||
@@ -66,99 +66,6 @@ export async function GET(request: NextRequest) {
|
||||
url += `/traceability?chunk_id=${encodeURIComponent(chunkId || '')}®ulation=${encodeURIComponent(regulation || '')}`
|
||||
break
|
||||
}
|
||||
case 'scroll': {
|
||||
const collection = searchParams.get('collection') || 'bp_compliance_gesetze'
|
||||
const limit = parseInt(searchParams.get('limit') || '20', 10)
|
||||
const offsetParam = searchParams.get('offset')
|
||||
const filterKey = searchParams.get('filter_key')
|
||||
const filterValue = searchParams.get('filter_value')
|
||||
const textSearch = searchParams.get('text_search')
|
||||
|
||||
const scrollBody: Record<string, unknown> = {
|
||||
limit: Math.min(limit, 100),
|
||||
with_payload: true,
|
||||
with_vector: false,
|
||||
}
|
||||
if (offsetParam) {
|
||||
scrollBody.offset = offsetParam
|
||||
}
|
||||
if (filterKey && filterValue) {
|
||||
scrollBody.filter = {
|
||||
must: [{ key: filterKey, match: { value: filterValue } }],
|
||||
}
|
||||
}
|
||||
|
||||
const scrollRes = await fetch(`${QDRANT_URL}/collections/${encodeURIComponent(collection)}/points/scroll`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(scrollBody),
|
||||
cache: 'no-store',
|
||||
})
|
||||
if (!scrollRes.ok) {
|
||||
return NextResponse.json({ error: 'Qdrant scroll failed' }, { status: scrollRes.status })
|
||||
}
|
||||
const scrollData = await scrollRes.json()
|
||||
const points = (scrollData.result?.points || []).map((p: { id: string; payload?: Record<string, unknown> }) => ({
|
||||
id: p.id,
|
||||
...p.payload,
|
||||
}))
|
||||
|
||||
// Client-side text search filter
|
||||
let filtered = points
|
||||
if (textSearch && textSearch.trim()) {
|
||||
const term = textSearch.toLowerCase()
|
||||
filtered = points.filter((p: Record<string, unknown>) => {
|
||||
const text = String(p.text || p.content || p.chunk_text || '')
|
||||
return text.toLowerCase().includes(term)
|
||||
})
|
||||
}
|
||||
|
||||
return NextResponse.json({
|
||||
chunks: filtered,
|
||||
next_offset: scrollData.result?.next_page_offset || null,
|
||||
total_in_page: points.length,
|
||||
})
|
||||
}
|
||||
case 'regulation-counts-batch': {
|
||||
const col = searchParams.get('collection') || 'bp_compliance_gesetze'
|
||||
// Accept qdrant_ids (actual regulation_id values in Qdrant payload)
|
||||
const qdrantIds = (searchParams.get('qdrant_ids') || '').split(',').filter(Boolean)
|
||||
const results: Record<string, number> = {}
|
||||
for (let i = 0; i < qdrantIds.length; i += 10) {
|
||||
const batch = qdrantIds.slice(i, i + 10)
|
||||
await Promise.all(batch.map(async (qid) => {
|
||||
try {
|
||||
const res = await fetch(`${QDRANT_URL}/collections/${encodeURIComponent(col)}/points/count`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
filter: { must: [{ key: 'regulation_id', match: { value: qid } }] },
|
||||
exact: true,
|
||||
}),
|
||||
cache: 'no-store',
|
||||
})
|
||||
if (res.ok) {
|
||||
const data = await res.json()
|
||||
results[qid] = data.result?.count || 0
|
||||
}
|
||||
} catch { /* skip failed counts */ }
|
||||
}))
|
||||
}
|
||||
return NextResponse.json({ counts: results })
|
||||
}
|
||||
case 'collection-count': {
|
||||
const col = searchParams.get('collection') || 'bp_compliance_gesetze'
|
||||
const countRes = await fetch(`${QDRANT_URL}/collections/${encodeURIComponent(col)}`, {
|
||||
cache: 'no-store',
|
||||
})
|
||||
if (!countRes.ok) {
|
||||
return NextResponse.json({ count: 0 })
|
||||
}
|
||||
const countData = await countRes.json()
|
||||
return NextResponse.json({
|
||||
count: countData.result?.points_count || 0,
|
||||
})
|
||||
}
|
||||
default:
|
||||
return NextResponse.json({ error: 'Unknown action' }, { status: 400 })
|
||||
}
|
||||
|
||||
@@ -1,19 +1,8 @@
|
||||
import type { Metadata } from 'next'
|
||||
import localFont from 'next/font/local'
|
||||
import { Noto_Sans } from 'next/font/google'
|
||||
import { Inter } from 'next/font/google'
|
||||
import './globals.css'
|
||||
|
||||
const inter = localFont({
|
||||
src: '../public/fonts/Inter-VariableFont.woff2',
|
||||
variable: '--font-inter',
|
||||
display: 'swap',
|
||||
})
|
||||
|
||||
const notoSans = Noto_Sans({
|
||||
subsets: ['latin', 'latin-ext'],
|
||||
variable: '--font-noto-sans',
|
||||
display: 'swap',
|
||||
})
|
||||
const inter = Inter({ subsets: ['latin'] })
|
||||
|
||||
export const metadata: Metadata = {
|
||||
title: 'BreakPilot Admin Lehrer KI',
|
||||
@@ -27,7 +16,7 @@ export default function RootLayout({
|
||||
}) {
|
||||
return (
|
||||
<html lang="de">
|
||||
<body className={`${inter.className} ${notoSans.variable}`}>{children}</body>
|
||||
<body className={inter.className}>{children}</body>
|
||||
</html>
|
||||
)
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
import Link from 'next/link'
|
||||
import { useState, useEffect } from 'react'
|
||||
|
||||
export type AIToolId = 'test-quality' | 'gpu' | 'ocr-compare' | 'ocr-labeling' | 'rag-pipeline' | 'magic-help'
|
||||
export type AIToolId = 'llm-compare' | 'test-quality' | 'gpu' | 'ocr-compare' | 'ocr-labeling' | 'rag-pipeline' | 'magic-help'
|
||||
|
||||
export interface AIToolModule {
|
||||
id: AIToolId
|
||||
@@ -25,6 +25,13 @@ export interface AIToolModule {
|
||||
}
|
||||
|
||||
export const AI_TOOLS_MODULES: AIToolModule[] = [
|
||||
{
|
||||
id: 'llm-compare',
|
||||
name: 'LLM Vergleich',
|
||||
href: '/ai/llm-compare',
|
||||
description: 'KI-Provider vergleichen',
|
||||
icon: '⚖️',
|
||||
},
|
||||
{
|
||||
id: 'test-quality',
|
||||
name: 'Test Quality (BQAS)',
|
||||
@@ -86,6 +93,13 @@ export interface AIToolsSidebarResponsiveProps extends AIToolsSidebarProps {
|
||||
// Icons für die Tools
|
||||
const ToolIcon = ({ id }: { id: string }) => {
|
||||
switch (id) {
|
||||
case 'llm-compare':
|
||||
return (
|
||||
<svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2}
|
||||
d="M3 6l3 1m0 0l-3 9a5.002 5.002 0 006.001 0M6 7l3 9M6 7l6-2m6 2l3-1m-3 1l-3 9a5.002 5.002 0 006.001 0M18 7l3 9m-3-9l-6-2m0-2v2m0 16V5m0 16H9m3 0h3" />
|
||||
</svg>
|
||||
)
|
||||
case 'test-quality':
|
||||
return (
|
||||
<svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
@@ -214,6 +228,8 @@ export function AIToolsSidebar({
|
||||
<div className="flex items-center gap-2 text-xs">
|
||||
<span title="GPU Infrastruktur">🖥️</span>
|
||||
<span className="text-slate-400">→</span>
|
||||
<span title="LLM Vergleich">⚖️</span>
|
||||
<span className="text-slate-400">→</span>
|
||||
<span title="Test Quality">🧪</span>
|
||||
</div>
|
||||
</div>
|
||||
@@ -225,6 +241,9 @@ export function AIToolsSidebar({
|
||||
{/* Quick Info zum aktuellen Tool */}
|
||||
<div className="pt-2 border-t border-slate-200 dark:border-gray-700">
|
||||
<div className="text-xs text-slate-500 dark:text-slate-400 px-1">
|
||||
{currentTool === 'llm-compare' && (
|
||||
<span>Vergleichen Sie LLM-Antworten verschiedener Provider</span>
|
||||
)}
|
||||
{currentTool === 'test-quality' && (
|
||||
<span>Ueberwachen Sie die Qualitaet der KI-Ausgaben</span>
|
||||
)}
|
||||
@@ -368,6 +387,11 @@ export function AIToolsSidebarResponsive({
|
||||
<span className="text-xs text-slate-500 mt-1">GPU</span>
|
||||
</div>
|
||||
<span className="text-slate-400">→</span>
|
||||
<div className="flex flex-col items-center">
|
||||
<span className="text-2xl">⚖️</span>
|
||||
<span className="text-xs text-slate-500 mt-1">LLM</span>
|
||||
</div>
|
||||
<span className="text-slate-400">→</span>
|
||||
<div className="flex flex-col items-center">
|
||||
<span className="text-2xl">🧪</span>
|
||||
<span className="text-xs text-slate-500 mt-1">BQAS</span>
|
||||
@@ -381,6 +405,11 @@ export function AIToolsSidebarResponsive({
|
||||
{/* Quick Info */}
|
||||
<div className="pt-4 border-t border-slate-200 dark:border-gray-700">
|
||||
<div className="text-sm text-slate-600 dark:text-slate-400 p-3 bg-slate-50 dark:bg-gray-800 rounded-xl">
|
||||
{currentTool === 'llm-compare' && (
|
||||
<>
|
||||
<strong className="text-slate-700 dark:text-slate-300">Aktuell:</strong> LLM-Antworten verschiedener Provider vergleichen
|
||||
</>
|
||||
)}
|
||||
{currentTool === 'test-quality' && (
|
||||
<>
|
||||
<strong className="text-slate-700 dark:text-slate-300">Aktuell:</strong> Qualitaet der KI-Ausgaben ueberwachen
|
||||
|
||||
@@ -1,236 +0,0 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useState } from 'react'
|
||||
import { useGridEditor } from './useGridEditor'
|
||||
import { GridToolbar } from './GridToolbar'
|
||||
import { GridTable } from './GridTable'
|
||||
import { GridImageOverlay } from './GridImageOverlay'
|
||||
|
||||
interface GridEditorProps {
|
||||
sessionId: string | null
|
||||
onNext?: () => void
|
||||
}
|
||||
|
||||
export function GridEditor({ sessionId, onNext }: GridEditorProps) {
|
||||
const {
|
||||
grid,
|
||||
loading,
|
||||
saving,
|
||||
error,
|
||||
dirty,
|
||||
selectedCell,
|
||||
setSelectedCell,
|
||||
buildGrid,
|
||||
loadGrid,
|
||||
saveGrid,
|
||||
updateCellText,
|
||||
toggleColumnBold,
|
||||
toggleRowHeader,
|
||||
undo,
|
||||
redo,
|
||||
canUndo,
|
||||
canRedo,
|
||||
getAdjacentCell,
|
||||
} = useGridEditor(sessionId)
|
||||
|
||||
const [showOverlay, setShowOverlay] = useState(false)
|
||||
|
||||
// Load grid on mount
|
||||
useEffect(() => {
|
||||
if (sessionId) {
|
||||
loadGrid()
|
||||
}
|
||||
}, [sessionId, loadGrid])
|
||||
|
||||
// Keyboard shortcuts
|
||||
useEffect(() => {
|
||||
const handler = (e: KeyboardEvent) => {
|
||||
if ((e.metaKey || e.ctrlKey) && e.key === 'z' && !e.shiftKey) {
|
||||
e.preventDefault()
|
||||
undo()
|
||||
} else if ((e.metaKey || e.ctrlKey) && e.key === 'z' && e.shiftKey) {
|
||||
e.preventDefault()
|
||||
redo()
|
||||
} else if ((e.metaKey || e.ctrlKey) && e.key === 's') {
|
||||
e.preventDefault()
|
||||
saveGrid()
|
||||
}
|
||||
}
|
||||
window.addEventListener('keydown', handler)
|
||||
return () => window.removeEventListener('keydown', handler)
|
||||
}, [undo, redo, saveGrid])
|
||||
|
||||
const handleNavigate = useCallback(
|
||||
(cellId: string, direction: 'up' | 'down' | 'left' | 'right') => {
|
||||
const target = getAdjacentCell(cellId, direction)
|
||||
if (target) {
|
||||
setSelectedCell(target)
|
||||
// Focus the input
|
||||
setTimeout(() => {
|
||||
const el = document.getElementById(`cell-${target}`)
|
||||
if (el) {
|
||||
el.focus()
|
||||
if (el instanceof HTMLInputElement) el.select()
|
||||
}
|
||||
}, 0)
|
||||
}
|
||||
},
|
||||
[getAdjacentCell, setSelectedCell],
|
||||
)
|
||||
|
||||
if (!sessionId) {
|
||||
return (
|
||||
<div className="text-center py-12 text-gray-400">
|
||||
Keine Session ausgewaehlt.
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
if (loading) {
|
||||
return (
|
||||
<div className="flex items-center justify-center py-16">
|
||||
<div className="flex items-center gap-3 text-gray-500 dark:text-gray-400">
|
||||
<svg className="w-5 h-5 animate-spin" fill="none" viewBox="0 0 24 24">
|
||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" />
|
||||
</svg>
|
||||
Grid wird aufgebaut...
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
if (error) {
|
||||
return (
|
||||
<div className="bg-red-50 dark:bg-red-900/20 border border-red-200 dark:border-red-800 rounded-lg p-4">
|
||||
<p className="text-sm text-red-700 dark:text-red-300">
|
||||
Fehler: {error}
|
||||
</p>
|
||||
<button
|
||||
onClick={buildGrid}
|
||||
className="mt-2 text-xs px-3 py-1.5 bg-red-600 text-white rounded hover:bg-red-700"
|
||||
>
|
||||
Erneut versuchen
|
||||
</button>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
if (!grid || !grid.zones.length) {
|
||||
return (
|
||||
<div className="text-center py-12">
|
||||
<p className="text-gray-400 mb-4">Kein Grid vorhanden.</p>
|
||||
<button
|
||||
onClick={buildGrid}
|
||||
className="px-4 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 text-sm"
|
||||
>
|
||||
Grid aus OCR-Ergebnissen erstellen
|
||||
</button>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Summary bar */}
|
||||
<div className="flex items-center gap-4 text-xs text-gray-500 dark:text-gray-400">
|
||||
<span>{grid.summary.total_zones} Zone(n)</span>
|
||||
<span>{grid.summary.total_columns} Spalten</span>
|
||||
<span>{grid.summary.total_rows} Zeilen</span>
|
||||
<span>{grid.summary.total_cells} Zellen</span>
|
||||
{grid.boxes_detected > 0 && (
|
||||
<span className="text-amber-600 dark:text-amber-400">
|
||||
{grid.boxes_detected} Box(en) erkannt
|
||||
</span>
|
||||
)}
|
||||
{grid.summary.color_stats && Object.entries(grid.summary.color_stats)
|
||||
.filter(([name]) => name !== 'black')
|
||||
.map(([name, count]) => (
|
||||
<span key={name} className="inline-flex items-center gap-1">
|
||||
<span className="w-2 h-2 rounded-full" style={{ backgroundColor: {
|
||||
red: '#dc2626', blue: '#2563eb', green: '#16a34a',
|
||||
orange: '#ea580c', purple: '#9333ea', yellow: '#ca8a04',
|
||||
}[name] || '#6b7280' }} />
|
||||
<span>{count} {name}</span>
|
||||
</span>
|
||||
))
|
||||
}
|
||||
{(grid.summary.recovered_colored ?? 0) > 0 && (
|
||||
<span className="text-purple-600 dark:text-purple-400">
|
||||
+{grid.summary.recovered_colored} recovered
|
||||
</span>
|
||||
)}
|
||||
<span className="text-gray-400">
|
||||
{grid.duration_seconds.toFixed(1)}s
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{/* Toolbar */}
|
||||
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 px-3 py-2">
|
||||
<GridToolbar
|
||||
dirty={dirty}
|
||||
saving={saving}
|
||||
canUndo={canUndo}
|
||||
canRedo={canRedo}
|
||||
showOverlay={showOverlay}
|
||||
onSave={saveGrid}
|
||||
onUndo={undo}
|
||||
onRedo={redo}
|
||||
onRebuild={buildGrid}
|
||||
onToggleOverlay={() => setShowOverlay(!showOverlay)}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Image overlay */}
|
||||
{showOverlay && (
|
||||
<GridImageOverlay sessionId={sessionId} grid={grid} />
|
||||
)}
|
||||
|
||||
{/* Zone tables */}
|
||||
<div className="space-y-4">
|
||||
{grid.zones.map((zone) => (
|
||||
<div
|
||||
key={zone.zone_index}
|
||||
className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 overflow-hidden"
|
||||
>
|
||||
<GridTable
|
||||
zone={zone}
|
||||
layoutMetrics={grid.layout_metrics}
|
||||
selectedCell={selectedCell}
|
||||
onSelectCell={setSelectedCell}
|
||||
onCellTextChange={updateCellText}
|
||||
onToggleColumnBold={toggleColumnBold}
|
||||
onToggleRowHeader={toggleRowHeader}
|
||||
onNavigate={handleNavigate}
|
||||
/>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
|
||||
{/* Tip */}
|
||||
<div className="text-[11px] text-gray-400 dark:text-gray-500 flex items-center gap-4">
|
||||
<span>Tab: naechste Zelle</span>
|
||||
<span>Enter: Zeile runter</span>
|
||||
<span>Spalte fett: Klick auf Spaltenkopf</span>
|
||||
<span>Header: Klick auf Zeilennummer</span>
|
||||
<span>Ctrl+Z/Y: Undo/Redo</span>
|
||||
<span>Ctrl+S: Speichern</span>
|
||||
</div>
|
||||
|
||||
{/* Next step button */}
|
||||
{onNext && (
|
||||
<div className="flex justify-end">
|
||||
<button
|
||||
onClick={async () => {
|
||||
if (dirty) await saveGrid()
|
||||
onNext()
|
||||
}}
|
||||
className="px-4 py-2 bg-teal-600 text-white text-sm rounded-lg hover:bg-teal-700 transition-colors"
|
||||
>
|
||||
Fertig
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -1,98 +0,0 @@
|
||||
'use client'
|
||||
|
||||
import type { StructuredGrid } from './types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
interface GridImageOverlayProps {
|
||||
sessionId: string
|
||||
grid: StructuredGrid
|
||||
}
|
||||
|
||||
const ZONE_COLORS = [
|
||||
{ border: 'rgba(20,184,166,0.7)', fill: 'rgba(20,184,166,0.05)' }, // teal
|
||||
{ border: 'rgba(245,158,11,0.7)', fill: 'rgba(245,158,11,0.05)' }, // amber
|
||||
{ border: 'rgba(99,102,241,0.7)', fill: 'rgba(99,102,241,0.05)' }, // indigo
|
||||
{ border: 'rgba(236,72,153,0.7)', fill: 'rgba(236,72,153,0.05)' }, // pink
|
||||
]
|
||||
|
||||
export function GridImageOverlay({ sessionId, grid }: GridImageOverlayProps) {
|
||||
const imgUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
|
||||
|
||||
return (
|
||||
<div className="relative w-full overflow-auto border border-gray-200 dark:border-gray-700 rounded-lg bg-gray-100 dark:bg-gray-900">
|
||||
<div className="relative inline-block">
|
||||
{/* Source image */}
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
<img
|
||||
src={imgUrl}
|
||||
alt="OCR Scan"
|
||||
className="block max-w-full"
|
||||
style={{ imageRendering: 'auto' }}
|
||||
/>
|
||||
|
||||
{/* SVG overlay */}
|
||||
<svg
|
||||
className="absolute inset-0 w-full h-full pointer-events-none"
|
||||
viewBox={`0 0 ${grid.image_width} ${grid.image_height}`}
|
||||
preserveAspectRatio="xMinYMin meet"
|
||||
>
|
||||
{grid.zones.map((zone) => {
|
||||
const colors = ZONE_COLORS[zone.zone_index % ZONE_COLORS.length]
|
||||
const b = zone.bbox_px
|
||||
|
||||
return (
|
||||
<g key={zone.zone_index}>
|
||||
{/* Zone border */}
|
||||
<rect
|
||||
x={b.x} y={b.y} width={b.w} height={b.h}
|
||||
fill={colors.fill}
|
||||
stroke={colors.border}
|
||||
strokeWidth={zone.zone_type === 'box' ? 3 : 1.5}
|
||||
strokeDasharray={zone.zone_type === 'box' ? undefined : '6 3'}
|
||||
/>
|
||||
|
||||
{/* Column separators */}
|
||||
{zone.columns.slice(1).map((col) => (
|
||||
<line
|
||||
key={`col-${col.index}`}
|
||||
x1={col.x_min_px} y1={b.y}
|
||||
x2={col.x_min_px} y2={b.y + b.h}
|
||||
stroke={colors.border}
|
||||
strokeWidth={1}
|
||||
strokeDasharray="4 2"
|
||||
/>
|
||||
))}
|
||||
|
||||
{/* Row separators */}
|
||||
{zone.rows.slice(1).map((row) => (
|
||||
<line
|
||||
key={`row-${row.index}`}
|
||||
x1={b.x} y1={row.y_min_px}
|
||||
x2={b.x + b.w} y2={row.y_min_px}
|
||||
stroke={colors.border}
|
||||
strokeWidth={0.5}
|
||||
strokeDasharray="3 3"
|
||||
opacity={0.5}
|
||||
/>
|
||||
))}
|
||||
|
||||
{/* Zone label */}
|
||||
<text
|
||||
x={b.x + 4} y={b.y + 14}
|
||||
fill={colors.border}
|
||||
fontSize={12}
|
||||
fontWeight="bold"
|
||||
fontFamily="monospace"
|
||||
>
|
||||
{zone.zone_type === 'box' ? 'BOX' : 'CONTENT'} Z{zone.zone_index}
|
||||
{' '}({zone.columns.length}x{zone.rows.length})
|
||||
</text>
|
||||
</g>
|
||||
)
|
||||
})}
|
||||
</svg>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -1,431 +0,0 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useRef, useState } from 'react'
|
||||
import type { GridZone, LayoutMetrics } from './types'
|
||||
|
||||
interface GridTableProps {
|
||||
zone: GridZone
|
||||
layoutMetrics?: LayoutMetrics
|
||||
selectedCell: string | null
|
||||
onSelectCell: (cellId: string) => void
|
||||
onCellTextChange: (cellId: string, text: string) => void
|
||||
onToggleColumnBold: (zoneIndex: number, colIndex: number) => void
|
||||
onToggleRowHeader: (zoneIndex: number, rowIndex: number) => void
|
||||
onNavigate: (cellId: string, direction: 'up' | 'down' | 'left' | 'right') => void
|
||||
}
|
||||
|
||||
/** Gutter width for row numbers (px). */
|
||||
const ROW_NUM_WIDTH = 36
|
||||
|
||||
/** Minimum column width in px so columns remain usable. */
|
||||
const MIN_COL_WIDTH = 40
|
||||
|
||||
/** Minimum row height in px. */
|
||||
const MIN_ROW_HEIGHT = 26
|
||||
|
||||
export function GridTable({
|
||||
zone,
|
||||
layoutMetrics,
|
||||
selectedCell,
|
||||
onSelectCell,
|
||||
onCellTextChange,
|
||||
onToggleColumnBold,
|
||||
onToggleRowHeader,
|
||||
onNavigate,
|
||||
}: GridTableProps) {
|
||||
const containerRef = useRef<HTMLDivElement>(null)
|
||||
const [containerWidth, setContainerWidth] = useState(0)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// Observe container width for scaling
|
||||
// ----------------------------------------------------------------
|
||||
useEffect(() => {
|
||||
const el = containerRef.current
|
||||
if (!el) return
|
||||
const ro = new ResizeObserver(([entry]) => {
|
||||
setContainerWidth(entry.contentRect.width)
|
||||
})
|
||||
ro.observe(el)
|
||||
return () => ro.disconnect()
|
||||
}, [])
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// Compute column widths from OCR measurements
|
||||
// ----------------------------------------------------------------
|
||||
const zoneWidthPx = zone.bbox_px.w || layoutMetrics?.page_width_px || 1
|
||||
const scale = containerWidth > 0 ? (containerWidth - ROW_NUM_WIDTH) / zoneWidthPx : 1
|
||||
|
||||
// Column widths in original pixels, then scaled to container
|
||||
const [colWidthOverrides, setColWidthOverrides] = useState<number[] | null>(null)
|
||||
|
||||
const columnWidthsPx = zone.columns.map((col) => col.x_max_px - col.x_min_px)
|
||||
|
||||
const effectiveColWidths = (colWidthOverrides ?? columnWidthsPx).map(
|
||||
(w) => Math.max(MIN_COL_WIDTH, w * scale),
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// Compute row heights from OCR measurements
|
||||
// ----------------------------------------------------------------
|
||||
const avgRowHeightPx = layoutMetrics?.avg_row_height_px ?? 30
|
||||
const [rowHeightOverrides, setRowHeightOverrides] = useState<Map<number, number>>(new Map())
|
||||
|
||||
const getRowHeight = (rowIndex: number, isHeader: boolean): number => {
|
||||
if (rowHeightOverrides.has(rowIndex)) {
|
||||
return rowHeightOverrides.get(rowIndex)!
|
||||
}
|
||||
const row = zone.rows.find((r) => r.index === rowIndex)
|
||||
if (!row) return Math.max(MIN_ROW_HEIGHT, avgRowHeightPx * scale)
|
||||
|
||||
if (isHeader) {
|
||||
// Headers keep their measured height
|
||||
const measuredH = row.y_max_px - row.y_min_px
|
||||
return Math.max(MIN_ROW_HEIGHT, measuredH * scale)
|
||||
}
|
||||
// Content rows use average for uniformity
|
||||
return Math.max(MIN_ROW_HEIGHT, avgRowHeightPx * scale)
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// Font size from layout metrics
|
||||
// ----------------------------------------------------------------
|
||||
const baseFontSize = layoutMetrics?.font_size_suggestion_px
|
||||
? Math.max(11, layoutMetrics.font_size_suggestion_px * scale)
|
||||
: 13
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// Keyboard navigation
|
||||
// ----------------------------------------------------------------
|
||||
const handleKeyDown = useCallback(
|
||||
(e: React.KeyboardEvent, cellId: string) => {
|
||||
if (e.key === 'Tab') {
|
||||
e.preventDefault()
|
||||
onNavigate(cellId, e.shiftKey ? 'left' : 'right')
|
||||
} else if (e.key === 'Enter' && !e.shiftKey) {
|
||||
e.preventDefault()
|
||||
onNavigate(cellId, 'down')
|
||||
} else if (e.key === 'ArrowUp' && e.altKey) {
|
||||
e.preventDefault()
|
||||
onNavigate(cellId, 'up')
|
||||
} else if (e.key === 'ArrowDown' && e.altKey) {
|
||||
e.preventDefault()
|
||||
onNavigate(cellId, 'down')
|
||||
} else if (e.key === 'Escape') {
|
||||
;(e.target as HTMLElement).blur()
|
||||
}
|
||||
},
|
||||
[onNavigate],
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// Cell lookup
|
||||
// ----------------------------------------------------------------
|
||||
const cellMap = new Map<string, (typeof zone.cells)[0]>()
|
||||
for (const cell of zone.cells) {
|
||||
cellMap.set(`${cell.row_index}_${cell.col_index}`, cell)
|
||||
}
|
||||
|
||||
/** Dominant non-black color from a cell's word_boxes, or null. */
|
||||
const getCellColor = (cell: (typeof zone.cells)[0] | undefined): string | null => {
|
||||
if (!cell?.word_boxes?.length) return null
|
||||
for (const wb of cell.word_boxes) {
|
||||
if (wb.color_name && wb.color_name !== 'black' && wb.color) {
|
||||
return wb.color
|
||||
}
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// Column resize (drag)
|
||||
// ----------------------------------------------------------------
|
||||
const handleColResizeStart = useCallback(
|
||||
(colIndex: number, startX: number) => {
|
||||
const baseWidths = colWidthOverrides ?? [...columnWidthsPx]
|
||||
|
||||
const handleMouseMove = (e: MouseEvent) => {
|
||||
const deltaPx = (e.clientX - startX) / scale
|
||||
const newWidths = [...baseWidths]
|
||||
newWidths[colIndex] = Math.max(20, baseWidths[colIndex] + deltaPx)
|
||||
// Steal from next column to keep total constant
|
||||
if (colIndex + 1 < newWidths.length) {
|
||||
newWidths[colIndex + 1] = Math.max(20, baseWidths[colIndex + 1] - deltaPx)
|
||||
}
|
||||
setColWidthOverrides(newWidths)
|
||||
}
|
||||
|
||||
const handleMouseUp = () => {
|
||||
document.removeEventListener('mousemove', handleMouseMove)
|
||||
document.removeEventListener('mouseup', handleMouseUp)
|
||||
document.body.style.cursor = ''
|
||||
document.body.style.userSelect = ''
|
||||
}
|
||||
|
||||
document.body.style.cursor = 'col-resize'
|
||||
document.body.style.userSelect = 'none'
|
||||
document.addEventListener('mousemove', handleMouseMove)
|
||||
document.addEventListener('mouseup', handleMouseUp)
|
||||
},
|
||||
[colWidthOverrides, columnWidthsPx, scale],
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// Row resize (drag)
|
||||
// ----------------------------------------------------------------
|
||||
const handleRowResizeStart = useCallback(
|
||||
(rowIndex: number, startY: number, currentHeight: number) => {
|
||||
const handleMouseMove = (e: MouseEvent) => {
|
||||
const delta = e.clientY - startY
|
||||
const newH = Math.max(MIN_ROW_HEIGHT, currentHeight + delta)
|
||||
setRowHeightOverrides((prev) => {
|
||||
const next = new Map(prev)
|
||||
next.set(rowIndex, newH)
|
||||
return next
|
||||
})
|
||||
}
|
||||
|
||||
const handleMouseUp = () => {
|
||||
document.removeEventListener('mousemove', handleMouseMove)
|
||||
document.removeEventListener('mouseup', handleMouseUp)
|
||||
document.body.style.cursor = ''
|
||||
document.body.style.userSelect = ''
|
||||
}
|
||||
|
||||
document.body.style.cursor = 'row-resize'
|
||||
document.body.style.userSelect = 'none'
|
||||
document.addEventListener('mousemove', handleMouseMove)
|
||||
document.addEventListener('mouseup', handleMouseUp)
|
||||
},
|
||||
[],
|
||||
)
|
||||
|
||||
const isBoxZone = zone.zone_type === 'box'
|
||||
const numCols = zone.columns.length
|
||||
|
||||
// CSS Grid template for columns: row-number gutter + proportional columns
|
||||
const gridTemplateCols = `${ROW_NUM_WIDTH}px ${effectiveColWidths.map((w) => `${w.toFixed(1)}px`).join(' ')}`
|
||||
|
||||
return (
|
||||
<div
|
||||
ref={containerRef}
|
||||
className={`overflow-x-auto ${isBoxZone ? 'border-2 border-gray-400 dark:border-gray-500 rounded-lg' : ''}`}
|
||||
>
|
||||
{/* Zone label */}
|
||||
<div className="flex items-center gap-2 px-2 py-1 text-xs text-gray-500 dark:text-gray-400">
|
||||
<span
|
||||
className={`inline-flex items-center gap-1 px-1.5 py-0.5 rounded text-[10px] font-medium ${
|
||||
isBoxZone
|
||||
? 'bg-amber-50 dark:bg-amber-900/20 text-amber-700 dark:text-amber-300 border border-amber-200 dark:border-amber-800'
|
||||
: 'bg-gray-50 dark:bg-gray-800 text-gray-500 dark:text-gray-400 border border-gray-200 dark:border-gray-700'
|
||||
}`}
|
||||
>
|
||||
{isBoxZone ? 'Box' : 'Inhalt'} Zone {zone.zone_index}
|
||||
</span>
|
||||
<span>
|
||||
{zone.columns.length} Spalten, {zone.rows.length} Zeilen, {zone.cells.length} Zellen
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{/* ============================================================ */}
|
||||
{/* CSS Grid — column headers */}
|
||||
{/* ============================================================ */}
|
||||
<div
|
||||
style={{
|
||||
display: 'grid',
|
||||
gridTemplateColumns: gridTemplateCols,
|
||||
fontFamily: "var(--font-noto-sans, 'Noto Sans'), 'Inter', system-ui, sans-serif",
|
||||
fontSize: `${baseFontSize}px`,
|
||||
}}
|
||||
>
|
||||
{/* Header: row-number corner */}
|
||||
<div className="sticky left-0 z-10 px-1 py-1.5 text-[10px] text-gray-400 dark:text-gray-500 border-b border-r border-gray-200 dark:border-gray-700 bg-gray-50 dark:bg-gray-800/50" />
|
||||
|
||||
{/* Header: column labels with resize handles */}
|
||||
{zone.columns.map((col, ci) => (
|
||||
<div
|
||||
key={col.index}
|
||||
className={`relative px-2 py-1.5 text-xs font-medium border-b border-r border-gray-200 dark:border-gray-700 bg-gray-50 dark:bg-gray-800/50 cursor-pointer select-none transition-colors hover:bg-gray-100 dark:hover:bg-gray-700 ${
|
||||
col.bold ? 'text-teal-700 dark:text-teal-300' : 'text-gray-600 dark:text-gray-400'
|
||||
}`}
|
||||
onClick={() => onToggleColumnBold(zone.zone_index, col.index)}
|
||||
title={`Spalte ${col.index + 1} — Klick fuer Fett-Toggle`}
|
||||
>
|
||||
<div className="flex items-center gap-1 justify-center truncate">
|
||||
<span>{col.label}</span>
|
||||
{col.bold && (
|
||||
<span className="text-[9px] px-1 py-0 rounded bg-teal-100 dark:bg-teal-900/40 text-teal-600 dark:text-teal-400">
|
||||
B
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
{/* Right-edge resize handle */}
|
||||
{ci < numCols - 1 && (
|
||||
<div
|
||||
className="absolute top-0 right-0 w-[5px] h-full cursor-col-resize hover:bg-teal-400/40 z-20"
|
||||
onMouseDown={(e) => {
|
||||
e.stopPropagation()
|
||||
handleColResizeStart(ci, e.clientX)
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
))}
|
||||
|
||||
{/* ============================================================ */}
|
||||
{/* Data rows */}
|
||||
{/* ============================================================ */}
|
||||
{zone.rows.map((row) => {
|
||||
const rowH = getRowHeight(row.index, row.is_header)
|
||||
const isSpanning = zone.cells.some(
|
||||
(c) => c.row_index === row.index && c.col_type === 'spanning_header',
|
||||
)
|
||||
|
||||
return (
|
||||
<div key={row.index} style={{ display: 'contents' }}>
|
||||
{/* Row number cell */}
|
||||
<div
|
||||
className={`relative sticky left-0 z-10 flex items-center justify-center text-[10px] border-b border-r border-gray-200 dark:border-gray-700 cursor-pointer select-none transition-colors hover:bg-gray-100 dark:hover:bg-gray-700 ${
|
||||
row.is_header
|
||||
? 'bg-blue-50 dark:bg-blue-900/20 text-blue-600 dark:text-blue-400 font-medium'
|
||||
: 'bg-gray-50 dark:bg-gray-800/50 text-gray-400 dark:text-gray-500'
|
||||
}`}
|
||||
style={{ height: `${rowH}px` }}
|
||||
onClick={() => onToggleRowHeader(zone.zone_index, row.index)}
|
||||
title={`Zeile ${row.index + 1} — Klick fuer Header-Toggle`}
|
||||
>
|
||||
{row.index + 1}
|
||||
{row.is_header && <span className="block text-[8px]">H</span>}
|
||||
{/* Bottom-edge resize handle */}
|
||||
<div
|
||||
className="absolute bottom-0 left-0 w-full h-[4px] cursor-row-resize hover:bg-teal-400/40 z-20"
|
||||
onMouseDown={(e) => {
|
||||
e.stopPropagation()
|
||||
handleRowResizeStart(row.index, e.clientY, rowH)
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Cells — spanning header or normal columns */}
|
||||
{isSpanning ? (
|
||||
<div
|
||||
className="border-b border-r border-gray-200 dark:border-gray-700 bg-blue-50/50 dark:bg-blue-900/10 flex items-center"
|
||||
style={{
|
||||
gridColumn: `2 / ${numCols + 2}`,
|
||||
height: `${rowH}px`,
|
||||
}}
|
||||
>
|
||||
{(() => {
|
||||
const spanCell = zone.cells.find(
|
||||
(c) => c.row_index === row.index && c.col_type === 'spanning_header',
|
||||
)
|
||||
if (!spanCell) return null
|
||||
const cellId = spanCell.cell_id
|
||||
const isSelected = selectedCell === cellId
|
||||
const cellColor = getCellColor(spanCell)
|
||||
return (
|
||||
<div className="flex items-center w-full">
|
||||
{cellColor && (
|
||||
<span
|
||||
className="flex-shrink-0 w-1.5 self-stretch rounded-l-sm"
|
||||
style={{ backgroundColor: cellColor }}
|
||||
/>
|
||||
)}
|
||||
<input
|
||||
id={`cell-${cellId}`}
|
||||
type="text"
|
||||
value={spanCell.text}
|
||||
onChange={(e) => onCellTextChange(cellId, e.target.value)}
|
||||
onFocus={() => onSelectCell(cellId)}
|
||||
onKeyDown={(e) => handleKeyDown(e, cellId)}
|
||||
className={`w-full px-3 py-1 bg-transparent border-0 outline-none text-center ${
|
||||
isSelected ? 'ring-2 ring-teal-500 ring-inset rounded' : ''
|
||||
}`}
|
||||
style={{ color: cellColor || undefined }}
|
||||
spellCheck={false}
|
||||
/>
|
||||
</div>
|
||||
)
|
||||
})()}
|
||||
</div>
|
||||
) : (
|
||||
zone.columns.map((col) => {
|
||||
const cell = cellMap.get(`${row.index}_${col.index}`)
|
||||
const cellId =
|
||||
cell?.cell_id ??
|
||||
`Z${zone.zone_index}_R${String(row.index).padStart(2, '0')}_C${col.index}`
|
||||
const isSelected = selectedCell === cellId
|
||||
const isBold = col.bold || cell?.is_bold
|
||||
const isLowConf = cell && cell.confidence > 0 && cell.confidence < 60
|
||||
const cellColor = getCellColor(cell)
|
||||
const hasColoredWords =
|
||||
cell?.word_boxes?.some(
|
||||
(wb) => wb.color_name && wb.color_name !== 'black',
|
||||
) ?? false
|
||||
|
||||
return (
|
||||
<div
|
||||
key={col.index}
|
||||
className={`relative border-b border-r border-gray-200 dark:border-gray-700 flex items-center ${
|
||||
isSelected ? 'ring-2 ring-teal-500 ring-inset z-10' : ''
|
||||
} ${isLowConf ? 'bg-amber-50/50 dark:bg-amber-900/10' : ''} ${
|
||||
row.is_header ? 'bg-blue-50/50 dark:bg-blue-900/10' : ''
|
||||
}`}
|
||||
style={{ height: `${rowH}px` }}
|
||||
>
|
||||
{cellColor && (
|
||||
<span
|
||||
className="flex-shrink-0 w-1.5 self-stretch rounded-l-sm"
|
||||
style={{ backgroundColor: cellColor }}
|
||||
title={`Farbe: ${cell?.word_boxes?.find((wb) => wb.color_name !== 'black')?.color_name}`}
|
||||
/>
|
||||
)}
|
||||
{/* Per-word colored display when not editing */}
|
||||
{hasColoredWords && !isSelected ? (
|
||||
<div
|
||||
className={`w-full px-2 cursor-text truncate ${isBold ? 'font-bold' : 'font-normal'}`}
|
||||
onClick={() => {
|
||||
onSelectCell(cellId)
|
||||
setTimeout(() => document.getElementById(`cell-${cellId}`)?.focus(), 0)
|
||||
}}
|
||||
>
|
||||
{cell!.word_boxes!.map((wb, i) => (
|
||||
<span
|
||||
key={i}
|
||||
style={
|
||||
wb.color_name && wb.color_name !== 'black'
|
||||
? { color: wb.color }
|
||||
: undefined
|
||||
}
|
||||
>
|
||||
{wb.text}
|
||||
{i < cell!.word_boxes!.length - 1 ? ' ' : ''}
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
) : (
|
||||
<input
|
||||
id={`cell-${cellId}`}
|
||||
type="text"
|
||||
value={cell?.text ?? ''}
|
||||
onChange={(e) => {
|
||||
if (cell) onCellTextChange(cellId, e.target.value)
|
||||
}}
|
||||
onFocus={() => onSelectCell(cellId)}
|
||||
onKeyDown={(e) => handleKeyDown(e, cellId)}
|
||||
className={`w-full px-2 bg-transparent border-0 outline-none ${
|
||||
isBold ? 'font-bold' : 'font-normal'
|
||||
}`}
|
||||
spellCheck={false}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
})
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -1,110 +0,0 @@
|
||||
'use client'
|
||||
|
||||
interface GridToolbarProps {
|
||||
dirty: boolean
|
||||
saving: boolean
|
||||
canUndo: boolean
|
||||
canRedo: boolean
|
||||
showOverlay: boolean
|
||||
onSave: () => void
|
||||
onUndo: () => void
|
||||
onRedo: () => void
|
||||
onRebuild: () => void
|
||||
onToggleOverlay: () => void
|
||||
}
|
||||
|
||||
export function GridToolbar({
|
||||
dirty,
|
||||
saving,
|
||||
canUndo,
|
||||
canRedo,
|
||||
showOverlay,
|
||||
onSave,
|
||||
onUndo,
|
||||
onRedo,
|
||||
onRebuild,
|
||||
onToggleOverlay,
|
||||
}: GridToolbarProps) {
|
||||
return (
|
||||
<div className="flex items-center gap-2 flex-wrap">
|
||||
{/* Undo / Redo */}
|
||||
<div className="flex items-center gap-1 border-r border-gray-200 dark:border-gray-700 pr-2">
|
||||
<button
|
||||
onClick={onUndo}
|
||||
disabled={!canUndo}
|
||||
className="p-1.5 rounded hover:bg-gray-100 dark:hover:bg-gray-700 disabled:opacity-30 disabled:cursor-not-allowed"
|
||||
title="Rueckgaengig (Ctrl+Z)"
|
||||
>
|
||||
<svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" d="M3 10h10a5 5 0 015 5v2M3 10l4-4M3 10l4 4" />
|
||||
</svg>
|
||||
</button>
|
||||
<button
|
||||
onClick={onRedo}
|
||||
disabled={!canRedo}
|
||||
className="p-1.5 rounded hover:bg-gray-100 dark:hover:bg-gray-700 disabled:opacity-30 disabled:cursor-not-allowed"
|
||||
title="Wiederholen (Ctrl+Shift+Z)"
|
||||
>
|
||||
<svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" d="M21 10H11a5 5 0 00-5 5v2M21 10l-4-4M21 10l-4 4" />
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Overlay toggle */}
|
||||
<button
|
||||
onClick={onToggleOverlay}
|
||||
className={`flex items-center gap-1 px-2.5 py-1.5 text-xs rounded-md border transition-colors ${
|
||||
showOverlay
|
||||
? 'bg-teal-50 dark:bg-teal-900/30 border-teal-300 dark:border-teal-700 text-teal-700 dark:text-teal-300'
|
||||
: 'border-gray-200 dark:border-gray-700 text-gray-600 dark:text-gray-400 hover:bg-gray-50 dark:hover:bg-gray-700'
|
||||
}`}
|
||||
title="Grid auf Bild anzeigen"
|
||||
>
|
||||
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" d="M4 5a1 1 0 011-1h14a1 1 0 011 1v2a1 1 0 01-1 1H5a1 1 0 01-1-1V5zM4 13a1 1 0 011-1h6a1 1 0 011 1v6a1 1 0 01-1 1H5a1 1 0 01-1-1v-6zM16 13a1 1 0 011-1h2a1 1 0 011 1v6a1 1 0 01-1 1h-2a1 1 0 01-1-1v-6z" />
|
||||
</svg>
|
||||
Bild-Overlay
|
||||
</button>
|
||||
|
||||
{/* Rebuild */}
|
||||
<button
|
||||
onClick={onRebuild}
|
||||
className="flex items-center gap-1 px-2.5 py-1.5 text-xs rounded-md border border-gray-200 dark:border-gray-700 text-gray-600 dark:text-gray-400 hover:bg-gray-50 dark:hover:bg-gray-700 transition-colors"
|
||||
title="Grid neu berechnen"
|
||||
>
|
||||
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" d="M4 4v5h.582m15.356 2A8.001 8.001 0 004.582 9m0 0H9m11 11v-5h-.581m0 0a8.003 8.003 0 01-15.357-2m15.357 2H15" />
|
||||
</svg>
|
||||
Neu berechnen
|
||||
</button>
|
||||
|
||||
{/* Spacer */}
|
||||
<div className="flex-1" />
|
||||
|
||||
{/* Save */}
|
||||
<button
|
||||
onClick={onSave}
|
||||
disabled={!dirty || saving}
|
||||
className={`flex items-center gap-1.5 px-3 py-1.5 text-xs font-medium rounded-md transition-colors ${
|
||||
dirty
|
||||
? 'bg-teal-600 text-white hover:bg-teal-700'
|
||||
: 'bg-gray-100 dark:bg-gray-800 text-gray-400 cursor-not-allowed'
|
||||
}`}
|
||||
title="Speichern (Ctrl+S)"
|
||||
>
|
||||
{saving ? (
|
||||
<svg className="w-3.5 h-3.5 animate-spin" fill="none" viewBox="0 0 24 24">
|
||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" />
|
||||
</svg>
|
||||
) : (
|
||||
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" d="M8 7H5a2 2 0 00-2 2v9a2 2 0 002 2h14a2 2 0 002-2V9a2 2 0 00-2-2h-3m-1 4l-3 3m0 0l-3-3m3 3V4" />
|
||||
</svg>
|
||||
)}
|
||||
{saving ? 'Speichert...' : dirty ? 'Speichern' : 'Gespeichert'}
|
||||
</button>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -1,6 +0,0 @@
|
||||
export { GridEditor } from './GridEditor'
|
||||
export { GridTable } from './GridTable'
|
||||
export { GridToolbar } from './GridToolbar'
|
||||
export { GridImageOverlay } from './GridImageOverlay'
|
||||
export { useGridEditor } from './useGridEditor'
|
||||
export type * from './types'
|
||||
@@ -1,108 +0,0 @@
|
||||
import type { OcrWordBox } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
// Re-export for convenience
|
||||
export type { OcrWordBox }
|
||||
|
||||
/** Layout metrics derived from OCR word positions for faithful grid reconstruction. */
|
||||
export interface LayoutMetrics {
|
||||
page_width_px: number
|
||||
page_height_px: number
|
||||
avg_row_height_px: number
|
||||
font_size_suggestion_px: number
|
||||
}
|
||||
|
||||
/** A complete structured grid with zones, ready for the Excel-like editor. */
|
||||
export interface StructuredGrid {
|
||||
session_id: string
|
||||
image_width: number
|
||||
image_height: number
|
||||
zones: GridZone[]
|
||||
boxes_detected: number
|
||||
summary: GridSummary
|
||||
formatting: GridFormatting
|
||||
layout_metrics?: LayoutMetrics
|
||||
duration_seconds: number
|
||||
edited?: boolean
|
||||
}
|
||||
|
||||
export interface GridSummary {
|
||||
total_zones: number
|
||||
total_columns: number
|
||||
total_rows: number
|
||||
total_cells: number
|
||||
total_words: number
|
||||
recovered_colored?: number
|
||||
color_stats?: Record<string, number>
|
||||
}
|
||||
|
||||
export interface GridFormatting {
|
||||
bold_columns: number[]
|
||||
header_rows: number[]
|
||||
}
|
||||
|
||||
/** A horizontal zone of the page — either content or a bordered box. */
|
||||
export interface GridZone {
|
||||
zone_index: number
|
||||
zone_type: 'content' | 'box'
|
||||
bbox_px: BBox
|
||||
bbox_pct: BBox
|
||||
border: ZoneBorder | null
|
||||
word_count: number
|
||||
columns: GridColumn[]
|
||||
rows: GridRow[]
|
||||
cells: GridEditorCell[]
|
||||
header_rows: number[]
|
||||
}
|
||||
|
||||
export interface BBox {
|
||||
x: number
|
||||
y: number
|
||||
w: number
|
||||
h: number
|
||||
}
|
||||
|
||||
export interface ZoneBorder {
|
||||
thickness: number
|
||||
confidence: number
|
||||
}
|
||||
|
||||
export interface GridColumn {
|
||||
index: number
|
||||
label: string
|
||||
x_min_px: number
|
||||
x_max_px: number
|
||||
x_min_pct: number
|
||||
x_max_pct: number
|
||||
bold: boolean
|
||||
}
|
||||
|
||||
export interface GridRow {
|
||||
index: number
|
||||
y_min_px: number
|
||||
y_max_px: number
|
||||
y_min_pct: number
|
||||
y_max_pct: number
|
||||
is_header: boolean
|
||||
}
|
||||
|
||||
export interface GridEditorCell {
|
||||
cell_id: string
|
||||
zone_index: number
|
||||
row_index: number
|
||||
col_index: number
|
||||
col_type: string
|
||||
text: string
|
||||
confidence: number
|
||||
bbox_px: BBox
|
||||
bbox_pct: BBox
|
||||
word_boxes: OcrWordBox[]
|
||||
ocr_engine: string
|
||||
is_bold: boolean
|
||||
}
|
||||
|
||||
/** Cell formatting applied by the user in the editor. */
|
||||
export interface CellFormatting {
|
||||
bold: boolean
|
||||
fontSize: 'small' | 'normal' | 'large'
|
||||
align: 'left' | 'center' | 'right'
|
||||
}
|
||||
@@ -1,288 +0,0 @@
|
||||
import { useCallback, useRef, useState } from 'react'
|
||||
import type { StructuredGrid, GridZone } from './types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
const MAX_UNDO = 50
|
||||
|
||||
export interface GridEditorState {
|
||||
grid: StructuredGrid | null
|
||||
loading: boolean
|
||||
saving: boolean
|
||||
error: string | null
|
||||
dirty: boolean
|
||||
selectedCell: string | null
|
||||
selectedZone: number | null
|
||||
}
|
||||
|
||||
export function useGridEditor(sessionId: string | null) {
|
||||
const [grid, setGrid] = useState<StructuredGrid | null>(null)
|
||||
const [loading, setLoading] = useState(false)
|
||||
const [saving, setSaving] = useState(false)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [dirty, setDirty] = useState(false)
|
||||
const [selectedCell, setSelectedCell] = useState<string | null>(null)
|
||||
const [selectedZone, setSelectedZone] = useState<number | null>(null)
|
||||
|
||||
// Undo/redo stacks store serialized zone arrays
|
||||
const undoStack = useRef<string[]>([])
|
||||
const redoStack = useRef<string[]>([])
|
||||
|
||||
const pushUndo = useCallback((zones: GridZone[]) => {
|
||||
undoStack.current.push(JSON.stringify(zones))
|
||||
if (undoStack.current.length > MAX_UNDO) {
|
||||
undoStack.current.shift()
|
||||
}
|
||||
redoStack.current = []
|
||||
}, [])
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// Load / Build
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
const buildGrid = useCallback(async () => {
|
||||
if (!sessionId) return
|
||||
setLoading(true)
|
||||
setError(null)
|
||||
try {
|
||||
const res = await fetch(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/build-grid`,
|
||||
{ method: 'POST' },
|
||||
)
|
||||
if (!res.ok) {
|
||||
const data = await res.json().catch(() => ({}))
|
||||
throw new Error(data.detail || `HTTP ${res.status}`)
|
||||
}
|
||||
const data: StructuredGrid = await res.json()
|
||||
setGrid(data)
|
||||
setDirty(false)
|
||||
undoStack.current = []
|
||||
redoStack.current = []
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : String(e))
|
||||
} finally {
|
||||
setLoading(false)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
const loadGrid = useCallback(async () => {
|
||||
if (!sessionId) return
|
||||
setLoading(true)
|
||||
setError(null)
|
||||
try {
|
||||
const res = await fetch(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/grid-editor`,
|
||||
)
|
||||
if (res.status === 404) {
|
||||
// No grid yet — build it
|
||||
await buildGrid()
|
||||
return
|
||||
}
|
||||
if (!res.ok) {
|
||||
const data = await res.json().catch(() => ({}))
|
||||
throw new Error(data.detail || `HTTP ${res.status}`)
|
||||
}
|
||||
const data: StructuredGrid = await res.json()
|
||||
setGrid(data)
|
||||
setDirty(false)
|
||||
undoStack.current = []
|
||||
redoStack.current = []
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : String(e))
|
||||
} finally {
|
||||
setLoading(false)
|
||||
}
|
||||
}, [sessionId, buildGrid])
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// Save
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
const saveGrid = useCallback(async () => {
|
||||
if (!sessionId || !grid) return
|
||||
setSaving(true)
|
||||
try {
|
||||
const res = await fetch(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/save-grid`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(grid),
|
||||
},
|
||||
)
|
||||
if (!res.ok) {
|
||||
const data = await res.json().catch(() => ({}))
|
||||
throw new Error(data.detail || `HTTP ${res.status}`)
|
||||
}
|
||||
setDirty(false)
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : String(e))
|
||||
} finally {
|
||||
setSaving(false)
|
||||
}
|
||||
}, [sessionId, grid])
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// Cell editing
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
const updateCellText = useCallback(
|
||||
(cellId: string, newText: string) => {
|
||||
if (!grid) return
|
||||
pushUndo(grid.zones)
|
||||
|
||||
setGrid((prev) => {
|
||||
if (!prev) return prev
|
||||
return {
|
||||
...prev,
|
||||
zones: prev.zones.map((zone) => ({
|
||||
...zone,
|
||||
cells: zone.cells.map((cell) =>
|
||||
cell.cell_id === cellId ? { ...cell, text: newText } : cell,
|
||||
),
|
||||
})),
|
||||
}
|
||||
})
|
||||
setDirty(true)
|
||||
},
|
||||
[grid, pushUndo],
|
||||
)
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// Column formatting
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
const toggleColumnBold = useCallback(
|
||||
(zoneIndex: number, colIndex: number) => {
|
||||
if (!grid) return
|
||||
pushUndo(grid.zones)
|
||||
|
||||
setGrid((prev) => {
|
||||
if (!prev) return prev
|
||||
return {
|
||||
...prev,
|
||||
zones: prev.zones.map((zone) => {
|
||||
if (zone.zone_index !== zoneIndex) return zone
|
||||
const col = zone.columns.find((c) => c.index === colIndex)
|
||||
const newBold = col ? !col.bold : true
|
||||
return {
|
||||
...zone,
|
||||
columns: zone.columns.map((c) =>
|
||||
c.index === colIndex ? { ...c, bold: newBold } : c,
|
||||
),
|
||||
cells: zone.cells.map((cell) =>
|
||||
cell.col_index === colIndex
|
||||
? { ...cell, is_bold: newBold }
|
||||
: cell,
|
||||
),
|
||||
}
|
||||
}),
|
||||
}
|
||||
})
|
||||
setDirty(true)
|
||||
},
|
||||
[grid, pushUndo],
|
||||
)
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// Row formatting
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
const toggleRowHeader = useCallback(
|
||||
(zoneIndex: number, rowIndex: number) => {
|
||||
if (!grid) return
|
||||
pushUndo(grid.zones)
|
||||
|
||||
setGrid((prev) => {
|
||||
if (!prev) return prev
|
||||
return {
|
||||
...prev,
|
||||
zones: prev.zones.map((zone) => {
|
||||
if (zone.zone_index !== zoneIndex) return zone
|
||||
return {
|
||||
...zone,
|
||||
rows: zone.rows.map((r) =>
|
||||
r.index === rowIndex ? { ...r, is_header: !r.is_header } : r,
|
||||
),
|
||||
}
|
||||
}),
|
||||
}
|
||||
})
|
||||
setDirty(true)
|
||||
},
|
||||
[grid, pushUndo],
|
||||
)
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// Undo / Redo
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
const undo = useCallback(() => {
|
||||
if (!grid || undoStack.current.length === 0) return
|
||||
redoStack.current.push(JSON.stringify(grid.zones))
|
||||
const prev = undoStack.current.pop()!
|
||||
setGrid((g) => (g ? { ...g, zones: JSON.parse(prev) } : g))
|
||||
setDirty(true)
|
||||
}, [grid])
|
||||
|
||||
const redo = useCallback(() => {
|
||||
if (!grid || redoStack.current.length === 0) return
|
||||
undoStack.current.push(JSON.stringify(grid.zones))
|
||||
const next = redoStack.current.pop()!
|
||||
setGrid((g) => (g ? { ...g, zones: JSON.parse(next) } : g))
|
||||
setDirty(true)
|
||||
}, [grid])
|
||||
|
||||
const canUndo = undoStack.current.length > 0
|
||||
const canRedo = redoStack.current.length > 0
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// Navigation helpers
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
const getAdjacentCell = useCallback(
|
||||
(cellId: string, direction: 'up' | 'down' | 'left' | 'right'): string | null => {
|
||||
if (!grid) return null
|
||||
for (const zone of grid.zones) {
|
||||
const cell = zone.cells.find((c) => c.cell_id === cellId)
|
||||
if (!cell) continue
|
||||
|
||||
let targetRow = cell.row_index
|
||||
let targetCol = cell.col_index
|
||||
if (direction === 'up') targetRow--
|
||||
if (direction === 'down') targetRow++
|
||||
if (direction === 'left') targetCol--
|
||||
if (direction === 'right') targetCol++
|
||||
|
||||
const target = zone.cells.find(
|
||||
(c) => c.row_index === targetRow && c.col_index === targetCol,
|
||||
)
|
||||
return target?.cell_id ?? null
|
||||
}
|
||||
return null
|
||||
},
|
||||
[grid],
|
||||
)
|
||||
|
||||
return {
|
||||
grid,
|
||||
loading,
|
||||
saving,
|
||||
error,
|
||||
dirty,
|
||||
selectedCell,
|
||||
selectedZone,
|
||||
setSelectedCell,
|
||||
setSelectedZone,
|
||||
buildGrid,
|
||||
loadGrid,
|
||||
saveGrid,
|
||||
updateCellText,
|
||||
toggleColumnBold,
|
||||
toggleRowHeader,
|
||||
undo,
|
||||
redo,
|
||||
canUndo,
|
||||
canRedo,
|
||||
getAdjacentCell,
|
||||
}
|
||||
}
|
||||
@@ -194,8 +194,10 @@ export function Sidebar({ onRoleChange }: SidebarProps) {
|
||||
{/* Categories */}
|
||||
<div className="px-2 space-y-1">
|
||||
{visibleCategories.map((category) => {
|
||||
const categoryHref = `/${category.id}`
|
||||
const isCategoryActive = pathname.startsWith(categoryHref)
|
||||
const categoryHref = category.id === 'compliance-sdk' ? '/sdk' : `/${category.id}`
|
||||
const isCategoryActive = category.id === 'compliance-sdk'
|
||||
? category.modules.some(m => pathname.startsWith(m.href))
|
||||
: pathname.startsWith(categoryHref)
|
||||
|
||||
return (
|
||||
<div key={category.id}>
|
||||
|
||||
@@ -1,231 +0,0 @@
|
||||
'use client'
|
||||
|
||||
import { useState } from 'react'
|
||||
import { OverlayReconstruction } from './OverlayReconstruction'
|
||||
import type { GridCell } from '@/app/(admin)/ai/ocr-overlay/types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
type Phase = 'idle' | 'running' | 'compare'
|
||||
|
||||
interface KombiResult {
|
||||
cells: GridCell[]
|
||||
image_width: number
|
||||
image_height: number
|
||||
duration_seconds: number
|
||||
summary: {
|
||||
total_cells: number
|
||||
non_empty_cells: number
|
||||
merged_words: number
|
||||
[key: string]: unknown
|
||||
}
|
||||
[key: string]: unknown
|
||||
}
|
||||
|
||||
interface KombiCompareStepProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
}
|
||||
|
||||
export function KombiCompareStep({ sessionId, onNext }: KombiCompareStepProps) {
|
||||
const [phase, setPhase] = useState<Phase>('idle')
|
||||
const [error, setError] = useState('')
|
||||
const [paddleResult, setPaddleResult] = useState<KombiResult | null>(null)
|
||||
const [rapidResult, setRapidResult] = useState<KombiResult | null>(null)
|
||||
const [paddleStatus, setPaddleStatus] = useState<'pending' | 'running' | 'done' | 'error'>('pending')
|
||||
const [rapidStatus, setRapidStatus] = useState<'pending' | 'running' | 'done' | 'error'>('pending')
|
||||
|
||||
const runBothEngines = async () => {
|
||||
if (!sessionId) return
|
||||
setPhase('running')
|
||||
setError('')
|
||||
setPaddleStatus('running')
|
||||
setRapidStatus('running')
|
||||
setPaddleResult(null)
|
||||
setRapidResult(null)
|
||||
|
||||
const fetchEngine = async (
|
||||
endpoint: string,
|
||||
setResult: (r: KombiResult) => void,
|
||||
setStatus: (s: 'pending' | 'running' | 'done' | 'error') => void,
|
||||
) => {
|
||||
try {
|
||||
const res = await fetch(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/${endpoint}`,
|
||||
{ method: 'POST' },
|
||||
)
|
||||
if (!res.ok) {
|
||||
const body = await res.json().catch(() => ({}))
|
||||
throw new Error(body.detail || `HTTP ${res.status}`)
|
||||
}
|
||||
const data = await res.json()
|
||||
setResult(data)
|
||||
setStatus('done')
|
||||
} catch (e: unknown) {
|
||||
setStatus('error')
|
||||
throw e
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
await Promise.all([
|
||||
fetchEngine('paddle-kombi', setPaddleResult, setPaddleStatus),
|
||||
fetchEngine('rapid-kombi', setRapidResult, setRapidStatus),
|
||||
])
|
||||
setPhase('compare')
|
||||
} catch (e: unknown) {
|
||||
// At least one failed — still show compare if the other succeeded
|
||||
setError(e instanceof Error ? e.message : String(e))
|
||||
setPhase('compare')
|
||||
}
|
||||
}
|
||||
|
||||
if (phase === 'idle') {
|
||||
return (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-8 text-center">
|
||||
<div className="text-4xl mb-3">⚖️</div>
|
||||
<h3 className="text-lg font-semibold text-gray-800 dark:text-gray-200 mb-2">
|
||||
Kombi-Vergleich
|
||||
</h3>
|
||||
<p className="text-sm text-gray-500 dark:text-gray-400 mb-6 max-w-lg mx-auto">
|
||||
Beide Kombi-Modi (Paddle + Tesseract vs. RapidOCR + Tesseract) laufen parallel.
|
||||
Die Ergebnisse werden nebeneinander angezeigt, damit die Qualitaet direkt verglichen werden kann.
|
||||
</p>
|
||||
<button
|
||||
onClick={runBothEngines}
|
||||
disabled={!sessionId}
|
||||
className="px-5 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors disabled:opacity-50 disabled:cursor-not-allowed font-medium"
|
||||
>
|
||||
Beide Kombi-Modi starten
|
||||
</button>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
if (phase === 'running' && !paddleResult && !rapidResult) {
|
||||
return (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-8">
|
||||
<div className="flex items-center justify-center gap-8">
|
||||
<EngineStatusCard label="Paddle + Tesseract" status={paddleStatus} />
|
||||
<EngineStatusCard label="RapidOCR + Tesseract" status={rapidStatus} />
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// compare phase
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{error && (
|
||||
<div className="bg-red-50 dark:bg-red-900/20 border border-red-200 dark:border-red-800 rounded-lg p-3 text-sm text-red-700 dark:text-red-300">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div className="flex items-center justify-between">
|
||||
<h3 className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||
Side-by-Side Vergleich
|
||||
</h3>
|
||||
<button
|
||||
onClick={() => { setPhase('idle'); setPaddleResult(null); setRapidResult(null) }}
|
||||
className="text-xs px-3 py-1.5 border border-gray-300 dark:border-gray-600 rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 transition-colors"
|
||||
>
|
||||
Neu starten
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div className="grid grid-cols-2 gap-4">
|
||||
{/* Left: Paddle-Kombi */}
|
||||
<div className="space-y-2">
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||
🔀 Paddle + Tesseract
|
||||
</span>
|
||||
{paddleStatus === 'error' && (
|
||||
<span className="text-xs text-red-500">Fehler</span>
|
||||
)}
|
||||
</div>
|
||||
{paddleResult ? (
|
||||
<>
|
||||
<OverlayReconstruction
|
||||
sessionId={sessionId}
|
||||
onNext={() => {}}
|
||||
wordResultOverride={paddleResult}
|
||||
/>
|
||||
<StatsBar result={paddleResult} engine="Paddle-Kombi" />
|
||||
</>
|
||||
) : (
|
||||
<div className="bg-gray-50 dark:bg-gray-900 rounded-lg p-12 text-center text-sm text-gray-400">
|
||||
{paddleStatus === 'running' ? 'Laeuft...' : 'Fehlgeschlagen'}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Right: Rapid-Kombi */}
|
||||
<div className="space-y-2">
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||
⚡ RapidOCR + Tesseract
|
||||
</span>
|
||||
{rapidStatus === 'error' && (
|
||||
<span className="text-xs text-red-500">Fehler</span>
|
||||
)}
|
||||
</div>
|
||||
{rapidResult ? (
|
||||
<>
|
||||
<OverlayReconstruction
|
||||
sessionId={sessionId}
|
||||
onNext={() => {}}
|
||||
wordResultOverride={rapidResult}
|
||||
/>
|
||||
<StatsBar result={rapidResult} engine="Rapid-Kombi" />
|
||||
</>
|
||||
) : (
|
||||
<div className="bg-gray-50 dark:bg-gray-900 rounded-lg p-12 text-center text-sm text-gray-400">
|
||||
{rapidStatus === 'running' ? 'Laeuft...' : 'Fehlgeschlagen'}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="flex justify-end">
|
||||
<button
|
||||
onClick={onNext}
|
||||
className="px-4 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors text-sm font-medium"
|
||||
>
|
||||
Fertig
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
function EngineStatusCard({ label, status }: { label: string; status: string }) {
|
||||
return (
|
||||
<div className="flex items-center gap-3 bg-gray-50 dark:bg-gray-900 rounded-lg px-5 py-4">
|
||||
{status === 'running' && (
|
||||
<div className="w-5 h-5 border-2 border-teal-400 border-t-transparent rounded-full animate-spin" />
|
||||
)}
|
||||
{status === 'done' && <span className="text-green-500 text-lg">✓</span>}
|
||||
{status === 'error' && <span className="text-red-500 text-lg">✗</span>}
|
||||
{status === 'pending' && <span className="text-gray-400 text-lg">○</span>}
|
||||
<span className="text-sm text-gray-700 dark:text-gray-300">{label}</span>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
function StatsBar({ result, engine }: { result: KombiResult; engine: string }) {
|
||||
const nonEmpty = result.summary?.non_empty_cells ?? 0
|
||||
const totalCells = result.summary?.total_cells ?? 0
|
||||
const merged = result.summary?.merged_words ?? 0
|
||||
const duration = result.duration_seconds ?? 0
|
||||
|
||||
return (
|
||||
<div className="flex items-center gap-3 text-[11px] text-gray-500 dark:text-gray-400 bg-gray-50 dark:bg-gray-900 rounded-lg px-3 py-2">
|
||||
<span className="font-medium text-gray-600 dark:text-gray-300">{engine}</span>
|
||||
<span>{merged} Woerter</span>
|
||||
<span>{nonEmpty}/{totalCells} Zellen</span>
|
||||
<span>{duration.toFixed(2)}s</span>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -1,644 +0,0 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
|
||||
import type { GridResult, GridCell, RowResult, RowItem } from '@/app/(admin)/ai/ocr-overlay/types'
|
||||
import { usePixelWordPositions } from './usePixelWordPositions'
|
||||
import { useSlideWordPositions } from './useSlideWordPositions'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
interface OverlayReconstructionProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
/** When set, use this data directly instead of fetching from the session API. */
|
||||
wordResultOverride?: { cells: GridCell[]; image_width: number; image_height: number; [key: string]: unknown }
|
||||
}
|
||||
|
||||
interface EditableCell {
|
||||
cellId: string
|
||||
text: string
|
||||
originalText: string
|
||||
bboxPct: { x: number; y: number; w: number; h: number }
|
||||
colType: string
|
||||
rowIndex: number
|
||||
colIndex: number
|
||||
}
|
||||
|
||||
type UndoAction = { cellId: string; oldText: string; newText: string }
|
||||
|
||||
export function OverlayReconstruction({ sessionId, onNext, wordResultOverride }: OverlayReconstructionProps) {
|
||||
const [status, setStatus] = useState<'loading' | 'ready' | 'saving' | 'saved' | 'error'>('loading')
|
||||
const [error, setError] = useState('')
|
||||
const [cells, setCells] = useState<EditableCell[]>([])
|
||||
const [gridCells, setGridCells] = useState<GridCell[]>([])
|
||||
const [editedTexts, setEditedTexts] = useState<Map<string, string>>(new Map())
|
||||
|
||||
// Undo/Redo
|
||||
const [undoStack, setUndoStack] = useState<UndoAction[]>([])
|
||||
const [redoStack, setRedoStack] = useState<UndoAction[]>([])
|
||||
|
||||
// Overlay state
|
||||
const [rows, setRows] = useState<RowItem[]>([])
|
||||
const [imageNaturalSize, setImageNaturalSize] = useState<{ w: number; h: number } | null>(null)
|
||||
const [fontScale, setFontScale] = useState(0.7)
|
||||
const [globalBold, setGlobalBold] = useState(false)
|
||||
const [imageRotation, setImageRotation] = useState<0 | 180>(0)
|
||||
const [textOpacity, setTextOpacity] = useState(100)
|
||||
const [textColor, setTextColor] = useState<'red' | 'blue' | 'black'>('red')
|
||||
const [positioningMode, setPositioningMode] = useState<'cluster' | 'slide'>('slide')
|
||||
const reconRef = useRef<HTMLDivElement>(null)
|
||||
const [reconWidth, setReconWidth] = useState(0)
|
||||
|
||||
// Pixel-based word positions (both algorithms run, toggle selects which to use)
|
||||
const overlayImageUrl = sessionId
|
||||
? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
|
||||
: ''
|
||||
const clusterPositions = usePixelWordPositions(
|
||||
overlayImageUrl,
|
||||
gridCells,
|
||||
status === 'ready',
|
||||
imageRotation,
|
||||
)
|
||||
const slidePositions = useSlideWordPositions(
|
||||
overlayImageUrl,
|
||||
gridCells,
|
||||
status === 'ready',
|
||||
imageRotation,
|
||||
)
|
||||
const cellWordPositions = positioningMode === 'slide' ? slidePositions : clusterPositions
|
||||
|
||||
// Track container width
|
||||
useEffect(() => {
|
||||
const el = reconRef.current
|
||||
if (!el) return
|
||||
const obs = new ResizeObserver(entries => {
|
||||
for (const entry of entries) setReconWidth(entry.contentRect.width)
|
||||
})
|
||||
obs.observe(el)
|
||||
return () => obs.disconnect()
|
||||
}, [status])
|
||||
|
||||
// Load session data
|
||||
useEffect(() => {
|
||||
if (wordResultOverride) {
|
||||
applyWordResult(wordResultOverride)
|
||||
return
|
||||
}
|
||||
if (!sessionId) return
|
||||
loadSessionData()
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [sessionId, wordResultOverride])
|
||||
|
||||
const applyWordResult = (wordResult: { cells: GridCell[]; image_width: number; image_height: number; [key: string]: unknown }) => {
|
||||
const rawGridCells: GridCell[] = wordResult.cells || []
|
||||
setGridCells(rawGridCells)
|
||||
|
||||
const editableCells: EditableCell[] = rawGridCells.map(c => ({
|
||||
cellId: c.cell_id,
|
||||
text: c.text,
|
||||
originalText: c.text,
|
||||
bboxPct: c.bbox_pct,
|
||||
colType: c.col_type,
|
||||
rowIndex: c.row_index,
|
||||
colIndex: c.col_index,
|
||||
}))
|
||||
setCells(editableCells)
|
||||
setEditedTexts(new Map())
|
||||
setUndoStack([])
|
||||
setRedoStack([])
|
||||
|
||||
if (wordResult.image_width && wordResult.image_height) {
|
||||
setImageNaturalSize({ w: wordResult.image_width, h: wordResult.image_height })
|
||||
}
|
||||
|
||||
setStatus('ready')
|
||||
}
|
||||
|
||||
const loadSessionData = async () => {
|
||||
if (!sessionId) return
|
||||
setStatus('loading')
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
|
||||
if (!res.ok) throw new Error(`HTTP ${res.status}`)
|
||||
const data = await res.json()
|
||||
|
||||
const wordResult: GridResult | undefined = data.word_result
|
||||
if (!wordResult) {
|
||||
setError('Keine Worterkennungsdaten gefunden. Bitte zuerst den Woerter-Schritt abschliessen.')
|
||||
setStatus('error')
|
||||
return
|
||||
}
|
||||
|
||||
applyWordResult(wordResult as unknown as { cells: GridCell[]; image_width: number; image_height: number })
|
||||
|
||||
// Load rows
|
||||
const rowResult: RowResult | undefined = data.row_result
|
||||
if (rowResult?.rows) setRows(rowResult.rows)
|
||||
} catch (e: unknown) {
|
||||
setError(e instanceof Error ? e.message : String(e))
|
||||
setStatus('error')
|
||||
}
|
||||
}
|
||||
|
||||
const handleTextChange = useCallback((cellId: string, newText: string) => {
|
||||
setEditedTexts(prev => {
|
||||
const oldText = prev.get(cellId)
|
||||
const cell = cells.find(c => c.cellId === cellId)
|
||||
const prevText = oldText ?? cell?.text ?? ''
|
||||
|
||||
setUndoStack(stack => [...stack, { cellId, oldText: prevText, newText }])
|
||||
setRedoStack([])
|
||||
|
||||
const next = new Map(prev)
|
||||
next.set(cellId, newText)
|
||||
return next
|
||||
})
|
||||
}, [cells])
|
||||
|
||||
const undo = useCallback(() => {
|
||||
setUndoStack(stack => {
|
||||
if (stack.length === 0) return stack
|
||||
const action = stack[stack.length - 1]
|
||||
const newStack = stack.slice(0, -1)
|
||||
setRedoStack(rs => [...rs, action])
|
||||
setEditedTexts(prev => {
|
||||
const next = new Map(prev)
|
||||
next.set(action.cellId, action.oldText)
|
||||
return next
|
||||
})
|
||||
return newStack
|
||||
})
|
||||
}, [])
|
||||
|
||||
const redo = useCallback(() => {
|
||||
setRedoStack(stack => {
|
||||
if (stack.length === 0) return stack
|
||||
const action = stack[stack.length - 1]
|
||||
const newStack = stack.slice(0, -1)
|
||||
setUndoStack(us => [...us, action])
|
||||
setEditedTexts(prev => {
|
||||
const next = new Map(prev)
|
||||
next.set(action.cellId, action.newText)
|
||||
return next
|
||||
})
|
||||
return newStack
|
||||
})
|
||||
}, [])
|
||||
|
||||
const resetCell = useCallback((cellId: string) => {
|
||||
setEditedTexts(prev => {
|
||||
const next = new Map(prev)
|
||||
next.delete(cellId)
|
||||
return next
|
||||
})
|
||||
}, [])
|
||||
|
||||
// Keyboard shortcuts
|
||||
useEffect(() => {
|
||||
const handler = (e: KeyboardEvent) => {
|
||||
if ((e.metaKey || e.ctrlKey) && e.key === 'z') {
|
||||
e.preventDefault()
|
||||
if (e.shiftKey) redo()
|
||||
else undo()
|
||||
}
|
||||
}
|
||||
document.addEventListener('keydown', handler)
|
||||
return () => document.removeEventListener('keydown', handler)
|
||||
}, [undo, redo])
|
||||
|
||||
const getDisplayText = useCallback((cell: EditableCell): string => {
|
||||
return editedTexts.get(cell.cellId) ?? cell.text
|
||||
}, [editedTexts])
|
||||
|
||||
const isEdited = useCallback((cell: EditableCell): boolean => {
|
||||
const edited = editedTexts.get(cell.cellId)
|
||||
return edited !== undefined && edited !== cell.originalText
|
||||
}, [editedTexts])
|
||||
|
||||
const changedCount = useMemo(() => {
|
||||
let count = 0
|
||||
for (const cell of cells) {
|
||||
if (isEdited(cell)) count++
|
||||
}
|
||||
return count
|
||||
}, [cells, isEdited])
|
||||
|
||||
// Tab navigation
|
||||
const sortedCellIds = useMemo(() => {
|
||||
return [...cells]
|
||||
.sort((a, b) => a.rowIndex !== b.rowIndex ? a.rowIndex - b.rowIndex : a.colIndex - b.colIndex)
|
||||
.map(c => c.cellId)
|
||||
}, [cells])
|
||||
|
||||
const handleKeyDown = useCallback((e: React.KeyboardEvent, cellId: string) => {
|
||||
if (e.key === 'Tab') {
|
||||
e.preventDefault()
|
||||
const idx = sortedCellIds.indexOf(cellId)
|
||||
const nextIdx = e.shiftKey ? idx - 1 : idx + 1
|
||||
if (nextIdx >= 0 && nextIdx < sortedCellIds.length) {
|
||||
const nextId = sortedCellIds[nextIdx]
|
||||
const el = document.getElementById(`cell-${nextId}`)
|
||||
el?.focus()
|
||||
}
|
||||
}
|
||||
}, [sortedCellIds])
|
||||
|
||||
const saveReconstruction = useCallback(async () => {
|
||||
if (!sessionId) return
|
||||
setStatus('saving')
|
||||
try {
|
||||
const cellUpdates = Array.from(editedTexts.entries())
|
||||
.filter(([cellId, text]) => {
|
||||
const cell = cells.find(c => c.cellId === cellId)
|
||||
return cell && text !== cell.originalText
|
||||
})
|
||||
.map(([cellId, text]) => ({ cell_id: cellId, text }))
|
||||
|
||||
if (cellUpdates.length === 0) {
|
||||
setStatus('saved')
|
||||
return
|
||||
}
|
||||
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reconstruction`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ cells: cellUpdates }),
|
||||
})
|
||||
|
||||
if (!res.ok) {
|
||||
const data = await res.json().catch(() => ({}))
|
||||
throw new Error(data.detail || `HTTP ${res.status}`)
|
||||
}
|
||||
|
||||
setStatus('saved')
|
||||
} catch (e: unknown) {
|
||||
setError(e instanceof Error ? e.message : String(e))
|
||||
setStatus('error')
|
||||
}
|
||||
}, [sessionId, editedTexts, cells])
|
||||
|
||||
const dewarpedUrl = sessionId
|
||||
? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
|
||||
: ''
|
||||
|
||||
// Compute median cell height (in px) for consistent font sizing
|
||||
// Must be before early returns (Rules of Hooks)
|
||||
const medianCellHeightPx = useMemo(() => {
|
||||
const imgWVal = imageNaturalSize?.w || 1
|
||||
const imgHVal = imageNaturalSize?.h || 1
|
||||
const cH = reconWidth * (imgHVal / imgWVal)
|
||||
if (cells.length === 0 || cH === 0) return 40
|
||||
const heights = cells.map(c => cH * (c.bboxPct.h / 100)).sort((a, b) => a - b)
|
||||
const mid = Math.floor(heights.length / 2)
|
||||
return heights.length % 2 === 0 ? (heights[mid - 1] + heights[mid]) / 2 : heights[mid]
|
||||
}, [cells, reconWidth, imageNaturalSize])
|
||||
|
||||
if (!sessionId) {
|
||||
return <div className="text-center py-12 text-gray-400">Bitte zuerst eine Session auswaehlen.</div>
|
||||
}
|
||||
|
||||
if (status === 'loading') {
|
||||
return (
|
||||
<div className="flex items-center gap-3 justify-center py-12">
|
||||
<div className="animate-spin rounded-full h-5 w-5 border-b-2 border-teal-500" />
|
||||
<span className="text-gray-500">Overlay-Daten werden geladen...</span>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
if (status === 'error') {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-12 text-center">
|
||||
<div className="text-5xl mb-4">⚠️</div>
|
||||
<h3 className="text-lg font-medium text-red-600 dark:text-red-400 mb-2">Fehler</h3>
|
||||
<p className="text-sm text-gray-500 dark:text-gray-400 max-w-lg mb-4">{error}</p>
|
||||
<div className="flex gap-3">
|
||||
<button onClick={() => { setError(''); loadSessionData() }}
|
||||
className="px-5 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors text-sm">
|
||||
Erneut versuchen
|
||||
</button>
|
||||
<button onClick={onNext}
|
||||
className="px-5 py-2 bg-gray-200 dark:bg-gray-700 text-gray-700 dark:text-gray-300 rounded-lg hover:bg-gray-300 dark:hover:bg-gray-600 transition-colors text-sm">
|
||||
Ueberspringen →
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
if (status === 'saved') {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-12 text-center">
|
||||
<div className="text-5xl mb-4">✅</div>
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">Overlay gespeichert</h3>
|
||||
<p className="text-sm text-gray-500 dark:text-gray-400 mb-6">
|
||||
{changedCount > 0 ? `${changedCount} Zellen wurden aktualisiert.` : 'Keine Aenderungen vorgenommen.'}
|
||||
</p>
|
||||
<button onClick={onNext}
|
||||
className="px-6 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors font-medium">
|
||||
Fertig
|
||||
</button>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const imgW = imageNaturalSize?.w || 1
|
||||
const imgH = imageNaturalSize?.h || 1
|
||||
const containerH = reconWidth * (imgH / imgW)
|
||||
|
||||
return (
|
||||
<div className="space-y-3">
|
||||
{/* Toolbar */}
|
||||
<div className="flex items-center justify-between bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 px-3 py-2">
|
||||
<div className="flex items-center gap-2">
|
||||
<h3 className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||
Overlay-Rekonstruktion
|
||||
</h3>
|
||||
<span className="text-xs text-gray-400">
|
||||
{cells.length} Zellen · {changedCount} geaendert
|
||||
</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
{/* Undo/Redo */}
|
||||
<button
|
||||
onClick={undo}
|
||||
disabled={undoStack.length === 0}
|
||||
className="px-2 py-1 text-xs border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700 disabled:opacity-30"
|
||||
title="Rueckgaengig (Ctrl+Z)"
|
||||
>
|
||||
↩
|
||||
</button>
|
||||
<button
|
||||
onClick={redo}
|
||||
disabled={redoStack.length === 0}
|
||||
className="px-2 py-1 text-xs border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700 disabled:opacity-30"
|
||||
title="Wiederholen (Ctrl+Shift+Z)"
|
||||
>
|
||||
↪
|
||||
</button>
|
||||
|
||||
<div className="w-px h-5 bg-gray-300 dark:bg-gray-600 mx-1" />
|
||||
|
||||
{/* Font scale */}
|
||||
<label className="flex items-center gap-1 text-xs text-gray-600 dark:text-gray-400">
|
||||
Schrift
|
||||
<input
|
||||
type="range" min={30} max={120} value={Math.round(fontScale * 100)}
|
||||
onChange={e => setFontScale(Number(e.target.value) / 100)}
|
||||
className="w-20 h-1 accent-teal-600"
|
||||
/>
|
||||
<span className="w-8 text-right font-mono">{Math.round(fontScale * 100)}%</span>
|
||||
</label>
|
||||
<button
|
||||
onClick={() => setGlobalBold(b => !b)}
|
||||
className={`px-2 py-1 text-xs rounded border transition-colors font-bold ${
|
||||
globalBold
|
||||
? 'bg-teal-600 text-white border-teal-600'
|
||||
: 'bg-white dark:bg-gray-700 text-gray-600 dark:text-gray-400 border-gray-300 dark:border-gray-600'
|
||||
}`}
|
||||
>
|
||||
B
|
||||
</button>
|
||||
<button
|
||||
onClick={() => setImageRotation(r => r === 0 ? 180 : 0)}
|
||||
className={`px-2 py-1 text-xs rounded border transition-colors ${
|
||||
imageRotation === 180
|
||||
? 'bg-teal-600 text-white border-teal-600'
|
||||
: 'bg-white dark:bg-gray-700 text-gray-600 dark:text-gray-400 border-gray-300 dark:border-gray-600'
|
||||
}`}
|
||||
title="Bild 180° drehen"
|
||||
>
|
||||
180°
|
||||
</button>
|
||||
|
||||
<div className="w-px h-5 bg-gray-300 dark:bg-gray-600 mx-1" />
|
||||
|
||||
{/* Positioning mode toggle */}
|
||||
<button
|
||||
onClick={() => setPositioningMode(m => m === 'slide' ? 'cluster' : 'slide')}
|
||||
className={`px-2 py-1 text-xs rounded border transition-colors ${
|
||||
positioningMode === 'slide'
|
||||
? 'bg-orange-500 text-white border-orange-500'
|
||||
: 'bg-white dark:bg-gray-700 text-gray-600 dark:text-gray-400 border-gray-300 dark:border-gray-600'
|
||||
}`}
|
||||
title={positioningMode === 'slide'
|
||||
? 'Slide-Modus: Woerter von links nach rechts schieben (klick fuer Cluster-Modus)'
|
||||
: 'Cluster-Modus: Woerter an Pixel-Cluster zuordnen (klick fuer Slide-Modus)'}
|
||||
>
|
||||
{positioningMode === 'slide' ? 'Slide' : 'Cluster'}
|
||||
</button>
|
||||
|
||||
<div className="w-px h-5 bg-gray-300 dark:bg-gray-600 mx-1" />
|
||||
|
||||
{/* Text color */}
|
||||
{(['red', 'blue', 'black'] as const).map(c => (
|
||||
<button
|
||||
key={c}
|
||||
onClick={() => setTextColor(c)}
|
||||
className={`w-5 h-5 rounded-full border-2 transition-colors ${
|
||||
textColor === c ? 'border-teal-500 ring-1 ring-teal-300' : 'border-gray-300 dark:border-gray-600'
|
||||
}`}
|
||||
style={{ backgroundColor: c === 'black' ? '#1a1a1a' : c }}
|
||||
title={`Textfarbe: ${c}`}
|
||||
/>
|
||||
))}
|
||||
|
||||
<div className="w-px h-5 bg-gray-300 dark:bg-gray-600 mx-1" />
|
||||
|
||||
{/* Text opacity */}
|
||||
<label className="flex items-center gap-1 text-xs text-gray-600 dark:text-gray-400">
|
||||
Text
|
||||
<input
|
||||
type="range" min={0} max={100} value={textOpacity}
|
||||
onChange={e => setTextOpacity(Number(e.target.value))}
|
||||
className="w-16 h-1 accent-teal-600"
|
||||
/>
|
||||
<span className="w-8 text-right font-mono">{textOpacity}%</span>
|
||||
</label>
|
||||
|
||||
<div className="w-px h-5 bg-gray-300 dark:bg-gray-600 mx-1" />
|
||||
|
||||
<button
|
||||
onClick={saveReconstruction}
|
||||
disabled={status === 'saving'}
|
||||
className="px-4 py-1.5 text-xs bg-teal-600 text-white rounded-lg hover:bg-teal-700 disabled:opacity-50 transition-colors font-medium"
|
||||
>
|
||||
Speichern
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* True overlay: text layer on top of original image */}
|
||||
<div className="border border-gray-200 dark:border-gray-700 rounded-lg overflow-hidden bg-gray-50 dark:bg-gray-900">
|
||||
<div
|
||||
ref={reconRef}
|
||||
className="relative"
|
||||
style={{ aspectRatio: `${imgW} / ${imgH}` }}
|
||||
>
|
||||
{/* Background: original image */}
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
<img
|
||||
src={dewarpedUrl}
|
||||
alt="Original"
|
||||
className="absolute inset-0 w-full h-full object-contain"
|
||||
onLoad={(e) => {
|
||||
const img = e.target as HTMLImageElement
|
||||
setImageNaturalSize({ w: img.naturalWidth, h: img.naturalHeight })
|
||||
}}
|
||||
/>
|
||||
|
||||
{/* Text overlay layer */}
|
||||
<div
|
||||
className="absolute inset-0"
|
||||
style={{ opacity: textOpacity / 100 }}
|
||||
>
|
||||
{/* Row lines */}
|
||||
{rows.map((row, i) => (
|
||||
<div
|
||||
key={`row-${i}`}
|
||||
className="absolute left-0 right-0 border-t border-cyan-400/40"
|
||||
style={{ top: `${(row.y / imgH) * 100}%` }}
|
||||
/>
|
||||
))}
|
||||
|
||||
{/* Pixel-positioned words / editable inputs */}
|
||||
{cells.map((cell) => {
|
||||
const displayText = getDisplayText(cell)
|
||||
const edited = isEdited(cell)
|
||||
const wordPos = cellWordPositions.get(cell.cellId)
|
||||
const bboxPct = cell.bboxPct
|
||||
const colorValue = textColor === 'black' ? '#1a1a1a' : textColor
|
||||
|
||||
// Pixel-analysed: render word-groups at detected positions
|
||||
if (wordPos && wordPos.length > 0) {
|
||||
return wordPos.map((wp, i) => {
|
||||
const autoFontPx = medianCellHeightPx * wp.fontRatio * fontScale
|
||||
const fs = Math.max(6, autoFontPx)
|
||||
|
||||
if (wordPos.length > 1) {
|
||||
return (
|
||||
<span
|
||||
key={`${cell.cellId}_wp_${i}`}
|
||||
className="absolute leading-none pointer-events-none select-none"
|
||||
style={{
|
||||
left: `${wp.xPct}%`,
|
||||
top: `${wp.yPct}%`,
|
||||
width: `${wp.wPct}%`,
|
||||
height: `${wp.hPct}%`,
|
||||
fontSize: `${fs}px`,
|
||||
fontWeight: globalBold ? 'bold' : 'normal',
|
||||
fontFamily: "'Liberation Sans', Arial, sans-serif",
|
||||
display: 'flex',
|
||||
alignItems: 'center',
|
||||
whiteSpace: 'nowrap',
|
||||
overflow: 'visible',
|
||||
color: colorValue,
|
||||
}}
|
||||
>
|
||||
{wp.text}
|
||||
</span>
|
||||
)
|
||||
}
|
||||
|
||||
return (
|
||||
<div key={`${cell.cellId}_wp_${i}`} className="absolute group" style={{
|
||||
left: `${wp.xPct}%`,
|
||||
top: `${wp.yPct}%`,
|
||||
width: `${wp.wPct}%`,
|
||||
height: `${wp.hPct}%`,
|
||||
}}>
|
||||
<input
|
||||
id={`cell-${cell.cellId}`}
|
||||
type="text"
|
||||
value={displayText}
|
||||
onChange={(e) => handleTextChange(cell.cellId, e.target.value)}
|
||||
onKeyDown={(e) => handleKeyDown(e, cell.cellId)}
|
||||
className={`w-full h-full bg-transparent border-0 outline-none px-0 transition-colors ${
|
||||
edited ? 'bg-green-50/30' : ''
|
||||
}`}
|
||||
style={{
|
||||
fontSize: `${fs}px`,
|
||||
fontWeight: globalBold ? 'bold' : 'normal',
|
||||
fontFamily: "'Liberation Sans', Arial, sans-serif",
|
||||
lineHeight: '1',
|
||||
color: colorValue,
|
||||
}}
|
||||
title={`${cell.cellId} (${cell.colType})`}
|
||||
/>
|
||||
{edited && (
|
||||
<button
|
||||
onClick={() => resetCell(cell.cellId)}
|
||||
className="absolute -top-1 -right-1 w-4 h-4 bg-red-500 text-white rounded-full text-[9px] leading-none opacity-0 group-hover:opacity-100 transition-opacity flex items-center justify-center"
|
||||
title="Zuruecksetzen"
|
||||
>
|
||||
×
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
// Fallback: no pixel data — single input at cell bbox
|
||||
if (!cell.text) return null
|
||||
|
||||
const fontSize = Math.max(6, medianCellHeightPx * fontScale)
|
||||
return (
|
||||
<div key={cell.cellId} className="absolute group" style={{
|
||||
left: `${bboxPct.x}%`,
|
||||
top: `${bboxPct.y}%`,
|
||||
width: `${bboxPct.w}%`,
|
||||
height: `${bboxPct.h}%`,
|
||||
}}>
|
||||
<input
|
||||
id={`cell-${cell.cellId}`}
|
||||
type="text"
|
||||
value={displayText}
|
||||
onChange={(e) => handleTextChange(cell.cellId, e.target.value)}
|
||||
onKeyDown={(e) => handleKeyDown(e, cell.cellId)}
|
||||
className={`w-full h-full bg-transparent border-0 outline-none px-0 transition-colors ${
|
||||
edited ? 'bg-green-50/30' : ''
|
||||
}`}
|
||||
style={{
|
||||
fontSize: `${fontSize}px`,
|
||||
fontWeight: globalBold ? 'bold' : 'normal',
|
||||
fontFamily: "'Liberation Sans', Arial, sans-serif",
|
||||
lineHeight: '1',
|
||||
color: colorValue,
|
||||
}}
|
||||
title={`${cell.cellId} (${cell.colType})`}
|
||||
/>
|
||||
{edited && (
|
||||
<button
|
||||
onClick={() => resetCell(cell.cellId)}
|
||||
className="absolute -top-1 -right-1 w-4 h-4 bg-red-500 text-white rounded-full text-[9px] leading-none opacity-0 group-hover:opacity-100 transition-opacity flex items-center justify-center"
|
||||
title="Zuruecksetzen"
|
||||
>
|
||||
×
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Bottom action */}
|
||||
<div className="flex justify-end">
|
||||
<button
|
||||
onClick={() => {
|
||||
if (changedCount > 0) {
|
||||
saveReconstruction()
|
||||
} else {
|
||||
onNext()
|
||||
}
|
||||
}}
|
||||
className="px-6 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors font-medium text-sm"
|
||||
>
|
||||
{changedCount > 0 ? 'Speichern & Fertig' : 'Fertig'}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -1,153 +0,0 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useState } from 'react'
|
||||
import { OverlayReconstruction } from './OverlayReconstruction'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
type Phase = 'idle' | 'running' | 'overlay'
|
||||
|
||||
interface PaddleDirectStepProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
/** Backend endpoint suffix, default: 'paddle-direct' */
|
||||
endpoint?: string
|
||||
/** Title shown in idle state */
|
||||
title?: string
|
||||
/** Description shown in idle state */
|
||||
description?: string
|
||||
/** Icon shown in idle state */
|
||||
icon?: string
|
||||
/** Button label */
|
||||
buttonLabel?: string
|
||||
/** Running label */
|
||||
runningLabel?: string
|
||||
/** OCR engine key to check for auto-detect */
|
||||
engineKey?: string
|
||||
}
|
||||
|
||||
export function PaddleDirectStep({
|
||||
sessionId,
|
||||
onNext,
|
||||
endpoint = 'paddle-direct',
|
||||
title = 'PP-OCRv5 Direct',
|
||||
description = 'PP-OCRv5 (lokal via RapidOCR) erkennt alle Woerter direkt auf dem Originalbild — ohne Begradigung, Entzerrung oder Zuschnitt.',
|
||||
icon = '⚡',
|
||||
buttonLabel = 'PP-OCRv5 starten',
|
||||
runningLabel = 'PP-OCRv5 laeuft...',
|
||||
engineKey = 'paddle_direct',
|
||||
}: PaddleDirectStepProps) {
|
||||
const [phase, setPhase] = useState<Phase>('idle')
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [stats, setStats] = useState<{ cells: number; rows: number; duration: number } | null>(null)
|
||||
|
||||
// Auto-detect: if session already has matching word_result → show overlay
|
||||
useEffect(() => {
|
||||
if (!sessionId) return
|
||||
let cancelled = false
|
||||
;(async () => {
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
|
||||
if (!res.ok || cancelled) return
|
||||
const data = await res.json()
|
||||
if (data.word_result?.ocr_engine === engineKey) {
|
||||
setPhase('overlay')
|
||||
}
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
})()
|
||||
return () => { cancelled = true }
|
||||
}, [sessionId, engineKey])
|
||||
|
||||
const runOcr = useCallback(async () => {
|
||||
if (!sessionId) return
|
||||
setPhase('running')
|
||||
setError(null)
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/${endpoint}`, {
|
||||
method: 'POST',
|
||||
})
|
||||
if (!res.ok) {
|
||||
const data = await res.json().catch(() => ({}))
|
||||
throw new Error(data.detail || `HTTP ${res.status}`)
|
||||
}
|
||||
const data = await res.json()
|
||||
setStats({
|
||||
cells: data.summary?.total_cells || 0,
|
||||
rows: data.grid_shape?.rows || 0,
|
||||
duration: data.duration_seconds || 0,
|
||||
})
|
||||
setPhase('overlay')
|
||||
} catch (e: unknown) {
|
||||
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||
setPhase('idle')
|
||||
}
|
||||
}, [sessionId, endpoint])
|
||||
|
||||
if (!sessionId) {
|
||||
return (
|
||||
<div className="text-sm text-gray-400 py-8 text-center">
|
||||
Bitte zuerst ein Bild hochladen.
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
if (phase === 'overlay') {
|
||||
return (
|
||||
<div className="space-y-3">
|
||||
{stats && (
|
||||
<div className="flex items-center gap-4 text-xs text-gray-500 dark:text-gray-400">
|
||||
<span>{stats.cells} Woerter erkannt</span>
|
||||
<span>{stats.rows} Zeilen</span>
|
||||
<span>{stats.duration.toFixed(1)}s</span>
|
||||
</div>
|
||||
)}
|
||||
<OverlayReconstruction sessionId={sessionId} onNext={onNext} />
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-16 space-y-6">
|
||||
{phase === 'running' ? (
|
||||
<>
|
||||
<div className="w-10 h-10 border-4 border-teal-200 dark:border-teal-800 border-t-teal-600 dark:border-t-teal-400 rounded-full animate-spin" />
|
||||
<div className="text-center space-y-1">
|
||||
<p className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||
{runningLabel}
|
||||
</p>
|
||||
<p className="text-xs text-gray-400">
|
||||
Bild wird analysiert (ca. 5-30s)
|
||||
</p>
|
||||
</div>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<div className="text-center space-y-2">
|
||||
<div className="text-4xl">{icon}</div>
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300">
|
||||
{title}
|
||||
</h3>
|
||||
<p className="text-sm text-gray-500 dark:text-gray-400 max-w-md">
|
||||
{description}
|
||||
</p>
|
||||
</div>
|
||||
|
||||
{error && (
|
||||
<div className="text-sm text-red-500 bg-red-50 dark:bg-red-900/20 px-4 py-2 rounded-lg">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
|
||||
<button
|
||||
onClick={runOcr}
|
||||
className="px-6 py-2.5 bg-teal-600 text-white text-sm font-medium rounded-lg hover:bg-teal-700 transition-colors"
|
||||
>
|
||||
{buttonLabel}
|
||||
</button>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -1,253 +0,0 @@
|
||||
import { useEffect, useState } from 'react'
|
||||
import type { GridCell } from '@/app/(admin)/ai/ocr-overlay/types'
|
||||
|
||||
export interface WordPosition {
|
||||
xPct: number
|
||||
wPct: number
|
||||
yPct: number
|
||||
hPct: number
|
||||
text: string
|
||||
fontRatio: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Analyse dark-pixel clusters on an image to determine
|
||||
* the exact horizontal position & auto-font-size of word groups in each cell.
|
||||
*
|
||||
* When rotation=180, the image is rotated 180° before pixel analysis.
|
||||
* Cell coordinates are transformed to the rotated space for reading,
|
||||
* and cluster positions are mirrored back to the original coordinate system.
|
||||
*
|
||||
* Returns a Map<cell_id, WordPosition[]>.
|
||||
*/
|
||||
export function usePixelWordPositions(
|
||||
imageUrl: string,
|
||||
cells: GridCell[],
|
||||
active: boolean,
|
||||
rotation: 0 | 180 = 0,
|
||||
): Map<string, WordPosition[]> {
|
||||
const [cellWordPositions, setCellWordPositions] = useState<Map<string, WordPosition[]>>(new Map())
|
||||
|
||||
useEffect(() => {
|
||||
if (!active || cells.length === 0 || !imageUrl) return
|
||||
|
||||
const img = new Image()
|
||||
img.crossOrigin = 'anonymous'
|
||||
img.onload = () => {
|
||||
const imgW = img.naturalWidth
|
||||
const imgH = img.naturalHeight
|
||||
|
||||
const canvas = document.createElement('canvas')
|
||||
canvas.width = imgW
|
||||
canvas.height = imgH
|
||||
const ctx = canvas.getContext('2d')
|
||||
if (!ctx) return
|
||||
|
||||
if (rotation === 180) {
|
||||
ctx.translate(imgW, imgH)
|
||||
ctx.rotate(Math.PI)
|
||||
ctx.drawImage(img, 0, 0)
|
||||
ctx.setTransform(1, 0, 0, 1, 0, 0)
|
||||
} else {
|
||||
ctx.drawImage(img, 0, 0)
|
||||
}
|
||||
|
||||
const refFontSize = 40
|
||||
const fontFam = "'Liberation Sans', Arial, sans-serif"
|
||||
ctx.font = `${refFontSize}px ${fontFam}`
|
||||
|
||||
const positions = new Map<string, WordPosition[]>()
|
||||
|
||||
for (const cell of cells) {
|
||||
if (!cell.bbox_pct || !cell.text) continue
|
||||
|
||||
const rawGroups = cell.text.split(/\s{3,}/).map(s => s.trim()).filter(Boolean)
|
||||
|
||||
// Merge single-char symbol groups (OCR artifacts from box borders like "|", ">")
|
||||
// with their neighbour to avoid polluting the cluster-to-group matching
|
||||
const groups: string[] = []
|
||||
for (let gi = 0; gi < rawGroups.length; gi++) {
|
||||
const g = rawGroups[gi]
|
||||
const isArtifact = g.length <= 2 && !/[a-zA-Z0-9\u00C0-\u024F]/.test(g)
|
||||
if (isArtifact) {
|
||||
if (gi + 1 < rawGroups.length) {
|
||||
// merge with next group
|
||||
rawGroups[gi + 1] = g + ' ' + rawGroups[gi + 1]
|
||||
} else if (groups.length > 0) {
|
||||
// last group — merge with previous
|
||||
groups[groups.length - 1] += ' ' + g
|
||||
} else {
|
||||
groups.push(g)
|
||||
}
|
||||
} else {
|
||||
groups.push(g)
|
||||
}
|
||||
}
|
||||
|
||||
let cx: number, cy: number
|
||||
const cw = Math.round(cell.bbox_pct.w / 100 * imgW)
|
||||
const ch = Math.round(cell.bbox_pct.h / 100 * imgH)
|
||||
|
||||
if (rotation === 180) {
|
||||
cx = Math.round((100 - cell.bbox_pct.x - cell.bbox_pct.w) / 100 * imgW)
|
||||
cy = Math.round((100 - cell.bbox_pct.y - cell.bbox_pct.h) / 100 * imgH)
|
||||
} else {
|
||||
cx = Math.round(cell.bbox_pct.x / 100 * imgW)
|
||||
cy = Math.round(cell.bbox_pct.y / 100 * imgH)
|
||||
}
|
||||
if (cw <= 0 || ch <= 0) continue
|
||||
if (cx < 0) cx = 0
|
||||
if (cy < 0) cy = 0
|
||||
if (cx + cw > imgW || cy + ch > imgH) continue
|
||||
|
||||
const imageData = ctx.getImageData(cx, cy, cw, ch)
|
||||
|
||||
const proj = new Float32Array(cw)
|
||||
for (let y = 0; y < ch; y++) {
|
||||
for (let x = 0; x < cw; x++) {
|
||||
const idx = (y * cw + x) * 4
|
||||
const lum = 0.299 * imageData.data[idx] + 0.587 * imageData.data[idx + 1] + 0.114 * imageData.data[idx + 2]
|
||||
if (lum < 128) proj[x]++
|
||||
}
|
||||
}
|
||||
|
||||
const threshold = Math.max(1, ch * 0.03)
|
||||
const minGap = Math.max(5, Math.round(cw * 0.02))
|
||||
let clusters: { start: number; end: number }[] = []
|
||||
let inCluster = false
|
||||
let clStart = 0
|
||||
let gap = 0
|
||||
|
||||
for (let x = 0; x < cw; x++) {
|
||||
if (proj[x] >= threshold) {
|
||||
if (!inCluster) { clStart = x; inCluster = true }
|
||||
gap = 0
|
||||
} else if (inCluster) {
|
||||
gap++
|
||||
if (gap > minGap) {
|
||||
clusters.push({ start: clStart, end: x - gap })
|
||||
inCluster = false
|
||||
gap = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
if (inCluster) clusters.push({ start: clStart, end: cw - 1 - gap })
|
||||
|
||||
if (clusters.length === 0) continue
|
||||
|
||||
// Filter out very narrow clusters (likely box borders / vertical lines)
|
||||
const minClusterW = Math.max(3, Math.round(cw * 0.005))
|
||||
clusters = clusters.filter(c => (c.end - c.start + 1) > minClusterW)
|
||||
if (clusters.length === 0) continue
|
||||
|
||||
if (rotation === 180) {
|
||||
clusters = clusters.map(c => ({
|
||||
start: cw - 1 - c.end,
|
||||
end: cw - 1 - c.start,
|
||||
})).reverse()
|
||||
}
|
||||
|
||||
const wordPos: WordPosition[] = []
|
||||
|
||||
// Match groups to clusters using width-proportional assignment.
|
||||
// Each group is assigned to the cluster whose width best matches
|
||||
// the group's expected pixel width (text measurement).
|
||||
if (groups.length > 1 && clusters.length >= groups.length) {
|
||||
// Measure each group's expected width
|
||||
const groupWidths = groups.map(g => ctx.measureText(g).width)
|
||||
|
||||
// Greedy assignment: for each group (in order), find the best
|
||||
// unassigned cluster by width ratio consistency
|
||||
const totalMeasured = groupWidths.reduce((a, b) => a + b, 0)
|
||||
const totalClusterW = clusters.reduce((a, c) => a + (c.end - c.start + 1), 0)
|
||||
const refScale = totalClusterW / totalMeasured
|
||||
const used = new Set<number>()
|
||||
|
||||
const assignments: number[] = []
|
||||
for (let gi = 0; gi < groups.length; gi++) {
|
||||
const expectedW = groupWidths[gi] * refScale
|
||||
let bestIdx = -1
|
||||
let bestDiff = Infinity
|
||||
for (let ci = 0; ci < clusters.length; ci++) {
|
||||
if (used.has(ci)) continue
|
||||
const clW = clusters[ci].end - clusters[ci].start + 1
|
||||
const diff = Math.abs(clW - expectedW)
|
||||
if (diff < bestDiff) {
|
||||
bestDiff = diff
|
||||
bestIdx = ci
|
||||
}
|
||||
}
|
||||
used.add(bestIdx)
|
||||
assignments.push(bestIdx)
|
||||
}
|
||||
|
||||
// Sort assignments to maintain left-to-right order
|
||||
const sortedPairs = assignments
|
||||
.map((ci, gi) => ({ ci, gi }))
|
||||
.sort((a, b) => clusters[a.ci].start - clusters[b.ci].start)
|
||||
|
||||
for (const { ci, gi } of sortedPairs) {
|
||||
const cl = clusters[ci]
|
||||
const clusterW = cl.end - cl.start + 1
|
||||
const autoFontPx = refFontSize * (clusterW / groupWidths[gi])
|
||||
const fontRatio = Math.min(autoFontPx / ch, 1.0)
|
||||
wordPos.push({
|
||||
xPct: cell.bbox_pct.x + (cl.start / cw) * cell.bbox_pct.w,
|
||||
wPct: ((cl.end - cl.start + 1) / cw) * cell.bbox_pct.w,
|
||||
yPct: cell.bbox_pct.y,
|
||||
hPct: cell.bbox_pct.h,
|
||||
text: groups[gi],
|
||||
fontRatio,
|
||||
})
|
||||
}
|
||||
} else {
|
||||
// Single group OR not enough clusters:
|
||||
// use the WIDEST cluster (not first-to-last span which pulls in
|
||||
// stray pixels from adjacent page areas like box borders)
|
||||
const widest = clusters.reduce((best, c) =>
|
||||
(c.end - c.start) > (best.end - best.start) ? c : best, clusters[0])
|
||||
const clusterW = widest.end - widest.start + 1
|
||||
const measured = ctx.measureText(cell.text.trim())
|
||||
const autoFontPx = refFontSize * (clusterW / measured.width)
|
||||
const fontRatio = Math.min(autoFontPx / ch, 1.0)
|
||||
wordPos.push({
|
||||
xPct: cell.bbox_pct.x + (widest.start / cw) * cell.bbox_pct.w,
|
||||
wPct: ((widest.end - widest.start + 1) / cw) * cell.bbox_pct.w,
|
||||
yPct: cell.bbox_pct.y,
|
||||
hPct: cell.bbox_pct.h,
|
||||
text: cell.text.trim(),
|
||||
fontRatio,
|
||||
})
|
||||
}
|
||||
|
||||
positions.set(cell.cell_id, wordPos)
|
||||
}
|
||||
|
||||
// Normalise: find the most common fontRatio (mode) and apply it to all
|
||||
const allRatios: number[] = []
|
||||
for (const wps of positions.values()) {
|
||||
for (const wp of wps) allRatios.push(wp.fontRatio)
|
||||
}
|
||||
if (allRatios.length > 0) {
|
||||
const buckets = new Map<number, number>()
|
||||
for (const r of allRatios) {
|
||||
const key = Math.round(r * 50) / 50
|
||||
buckets.set(key, (buckets.get(key) || 0) + 1)
|
||||
}
|
||||
let modeRatio = allRatios[0]
|
||||
let modeCount = 0
|
||||
for (const [ratio, count] of buckets) {
|
||||
if (count > modeCount) { modeRatio = ratio; modeCount = count }
|
||||
}
|
||||
for (const wps of positions.values()) {
|
||||
for (const wp of wps) wp.fontRatio = modeRatio
|
||||
}
|
||||
}
|
||||
|
||||
setCellWordPositions(positions)
|
||||
}
|
||||
img.src = imageUrl
|
||||
}, [active, cells, imageUrl, rotation])
|
||||
|
||||
return cellWordPositions
|
||||
}
|
||||
@@ -1,231 +0,0 @@
|
||||
import { useEffect, useState } from 'react'
|
||||
import type { GridCell } from '@/app/(admin)/ai/ocr-overlay/types'
|
||||
|
||||
export interface WordPosition {
|
||||
xPct: number
|
||||
wPct: number
|
||||
yPct: number
|
||||
hPct: number
|
||||
text: string
|
||||
fontRatio: number
|
||||
}
|
||||
|
||||
/**
|
||||
* "Slide from left" positioning using OCR word bounding boxes.
|
||||
*
|
||||
* TEXT comes from cell.text (cleaned, IPA-corrected).
|
||||
* POSITIONS come from word_boxes (exact OCR coordinates).
|
||||
*
|
||||
* Tokens from cell.text are matched 1:1 (in order) to word_boxes
|
||||
* sorted left-to-right. This guarantees:
|
||||
* - ALL words from cell.text appear (no dropping)
|
||||
* - Words preserve their reading order
|
||||
* - Each word lands on its correct black-text position
|
||||
* - No red words overlap each other
|
||||
*
|
||||
* If token count != box count, extra tokens get estimated positions
|
||||
* (spread across remaining space).
|
||||
*
|
||||
* Fallback: pixel-projection slide if no word_boxes available.
|
||||
*/
|
||||
export function useSlideWordPositions(
|
||||
imageUrl: string,
|
||||
cells: GridCell[],
|
||||
active: boolean,
|
||||
rotation: 0 | 180 = 0,
|
||||
): Map<string, WordPosition[]> {
|
||||
const [result, setResult] = useState<Map<string, WordPosition[]>>(new Map())
|
||||
|
||||
useEffect(() => {
|
||||
if (!active || cells.length === 0 || !imageUrl) return
|
||||
|
||||
const img = new Image()
|
||||
img.crossOrigin = 'anonymous'
|
||||
img.onload = () => {
|
||||
const imgW = img.naturalWidth
|
||||
const imgH = img.naturalHeight
|
||||
|
||||
const hasWordBoxes = cells.some(c => c.word_boxes && c.word_boxes.length > 0)
|
||||
|
||||
if (hasWordBoxes) {
|
||||
// --- WORD-BOX PATH: use OCR positions directly ---
|
||||
// Each word_box already has exact coordinates from OCR.
|
||||
// Use them as-is — no fuzzy matching needed.
|
||||
const positions = new Map<string, WordPosition[]>()
|
||||
|
||||
for (const cell of cells) {
|
||||
if (!cell.bbox_pct || !cell.text) continue
|
||||
|
||||
const boxes = (cell.word_boxes || [])
|
||||
.filter(wb => wb.text.trim())
|
||||
.sort((a, b) => a.left - b.left)
|
||||
|
||||
if (boxes.length === 0) {
|
||||
// No word_boxes — spread tokens evenly across cell
|
||||
const tokens = cell.text.split(/\s+/).filter(Boolean)
|
||||
if (tokens.length === 0) continue
|
||||
const fallbackW = cell.bbox_pct.w / tokens.length
|
||||
const wordPos = tokens.map((t, i) => ({
|
||||
xPct: cell.bbox_pct.x + i * fallbackW,
|
||||
wPct: fallbackW,
|
||||
yPct: cell.bbox_pct.y,
|
||||
hPct: cell.bbox_pct.h,
|
||||
text: t,
|
||||
fontRatio: 1.0,
|
||||
}))
|
||||
positions.set(cell.cell_id, wordPos)
|
||||
continue
|
||||
}
|
||||
|
||||
// Use each word_box directly with its OCR coordinates
|
||||
const wordPos: WordPosition[] = boxes.map(box => ({
|
||||
xPct: (box.left / imgW) * 100,
|
||||
wPct: (box.width / imgW) * 100,
|
||||
yPct: (box.top / imgH) * 100,
|
||||
hPct: (box.height / imgH) * 100,
|
||||
text: box.text,
|
||||
fontRatio: 1.0,
|
||||
}))
|
||||
|
||||
if (wordPos.length > 0) {
|
||||
positions.set(cell.cell_id, wordPos)
|
||||
}
|
||||
}
|
||||
|
||||
setResult(positions)
|
||||
return
|
||||
}
|
||||
|
||||
// --- FALLBACK: pixel-projection slide (no word_boxes) ---
|
||||
const canvas = document.createElement('canvas')
|
||||
canvas.width = imgW
|
||||
canvas.height = imgH
|
||||
const ctx = canvas.getContext('2d')
|
||||
if (!ctx) return
|
||||
|
||||
if (rotation === 180) {
|
||||
ctx.translate(imgW, imgH)
|
||||
ctx.rotate(Math.PI)
|
||||
ctx.drawImage(img, 0, 0)
|
||||
ctx.setTransform(1, 0, 0, 1, 0, 0)
|
||||
} else {
|
||||
ctx.drawImage(img, 0, 0)
|
||||
}
|
||||
|
||||
const refFontSize = 40
|
||||
const fontFam = "'Liberation Sans', Arial, sans-serif"
|
||||
ctx.font = `${refFontSize}px ${fontFam}`
|
||||
|
||||
const cellHeights = cells
|
||||
.filter(c => c.bbox_pct && c.bbox_pct.h > 0)
|
||||
.map(c => Math.round(c.bbox_pct.h / 100 * imgH))
|
||||
.sort((a, b) => a - b)
|
||||
const medianCh = cellHeights.length > 0
|
||||
? cellHeights[Math.floor(cellHeights.length / 2)]
|
||||
: 30
|
||||
|
||||
const renderedFontImgPx = medianCh * 0.7
|
||||
const measureScale = renderedFontImgPx / refFontSize
|
||||
const spaceWidthPx = Math.max(2, Math.round(ctx.measureText(' ').width * measureScale))
|
||||
|
||||
const positions = new Map<string, WordPosition[]>()
|
||||
|
||||
for (const cell of cells) {
|
||||
if (!cell.bbox_pct || !cell.text) continue
|
||||
|
||||
let cx: number, cy: number
|
||||
const cw = Math.round(cell.bbox_pct.w / 100 * imgW)
|
||||
const ch = Math.round(cell.bbox_pct.h / 100 * imgH)
|
||||
|
||||
if (rotation === 180) {
|
||||
cx = Math.round((100 - cell.bbox_pct.x - cell.bbox_pct.w) / 100 * imgW)
|
||||
cy = Math.round((100 - cell.bbox_pct.y - cell.bbox_pct.h) / 100 * imgH)
|
||||
} else {
|
||||
cx = Math.round(cell.bbox_pct.x / 100 * imgW)
|
||||
cy = Math.round(cell.bbox_pct.y / 100 * imgH)
|
||||
}
|
||||
if (cw <= 0 || ch <= 0) continue
|
||||
if (cx < 0) cx = 0
|
||||
if (cy < 0) cy = 0
|
||||
if (cx + cw > imgW || cy + ch > imgH) continue
|
||||
|
||||
const imageData = ctx.getImageData(cx, cy, cw, ch)
|
||||
const proj = new Float32Array(cw)
|
||||
for (let y = 0; y < ch; y++) {
|
||||
for (let x = 0; x < cw; x++) {
|
||||
const idx = (y * cw + x) * 4
|
||||
const lum = 0.299 * imageData.data[idx] + 0.587 * imageData.data[idx + 1] + 0.114 * imageData.data[idx + 2]
|
||||
if (lum < 128) proj[x]++
|
||||
}
|
||||
}
|
||||
|
||||
const threshold = Math.max(1, ch * 0.03)
|
||||
const ink = new Uint8Array(cw)
|
||||
for (let x = 0; x < cw; x++) {
|
||||
ink[x] = proj[x] >= threshold ? 1 : 0
|
||||
}
|
||||
if (rotation === 180) {
|
||||
ink.reverse()
|
||||
}
|
||||
|
||||
const tokens = cell.text.split(/\s+/).filter(Boolean)
|
||||
if (tokens.length === 0) continue
|
||||
|
||||
const tokenWidthsPx = tokens.map(t =>
|
||||
Math.max(4, Math.round(ctx.measureText(t).width * measureScale))
|
||||
)
|
||||
|
||||
const wordPos: WordPosition[] = []
|
||||
let cursor = 0
|
||||
|
||||
for (let ti = 0; ti < tokens.length; ti++) {
|
||||
const tokenW = tokenWidthsPx[ti]
|
||||
const coverageNeeded = Math.max(1, Math.round(tokenW * 0.15))
|
||||
let bestX = cursor
|
||||
|
||||
const searchLimit = Math.max(cursor, cw - tokenW)
|
||||
|
||||
for (let x = cursor; x <= searchLimit; x++) {
|
||||
let inkCount = 0
|
||||
const spanEnd = Math.min(x + tokenW, cw)
|
||||
for (let dx = 0; dx < spanEnd - x; dx++) {
|
||||
inkCount += ink[x + dx]
|
||||
}
|
||||
if (inkCount >= coverageNeeded) {
|
||||
bestX = x
|
||||
break
|
||||
}
|
||||
if (x > cursor + cw * 0.3 && ti > 0) {
|
||||
bestX = cursor
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if (bestX + tokenW > cw) {
|
||||
bestX = Math.max(0, cw - tokenW)
|
||||
}
|
||||
|
||||
wordPos.push({
|
||||
xPct: cell.bbox_pct.x + (bestX / cw) * cell.bbox_pct.w,
|
||||
wPct: (tokenW / cw) * cell.bbox_pct.w,
|
||||
yPct: cell.bbox_pct.y,
|
||||
hPct: cell.bbox_pct.h,
|
||||
text: tokens[ti],
|
||||
fontRatio: 1.0,
|
||||
})
|
||||
|
||||
cursor = bestX + tokenW + spaceWidthPx
|
||||
}
|
||||
|
||||
if (wordPos.length > 0) {
|
||||
positions.set(cell.cell_id, wordPos)
|
||||
}
|
||||
}
|
||||
|
||||
setResult(positions)
|
||||
}
|
||||
img.src = imageUrl
|
||||
}, [active, cells, imageUrl, rotation])
|
||||
|
||||
return result
|
||||
}
|
||||
@@ -1,68 +0,0 @@
|
||||
'use client'
|
||||
|
||||
import type { SubSession } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
interface BoxSessionTabsProps {
|
||||
parentSessionId: string
|
||||
subSessions: SubSession[]
|
||||
activeSessionId: string
|
||||
onSessionChange: (sessionId: string) => void
|
||||
}
|
||||
|
||||
const STATUS_ICONS: Record<string, string> = {
|
||||
pending: '\u23F3', // hourglass
|
||||
processing: '\uD83D\uDD04', // arrows
|
||||
completed: '\u2713', // checkmark
|
||||
}
|
||||
|
||||
function getStatusIcon(sub: SubSession): string {
|
||||
if (sub.status === 'completed' || (sub.current_step && sub.current_step >= 9)) return STATUS_ICONS.completed
|
||||
if (sub.current_step && sub.current_step > 1) return STATUS_ICONS.processing
|
||||
return STATUS_ICONS.pending
|
||||
}
|
||||
|
||||
export function BoxSessionTabs({ parentSessionId, subSessions, activeSessionId, onSessionChange }: BoxSessionTabsProps) {
|
||||
if (subSessions.length === 0) return null
|
||||
|
||||
const isParentActive = activeSessionId === parentSessionId
|
||||
|
||||
return (
|
||||
<div className="flex items-center gap-1.5 px-1 py-1.5 bg-gray-50 dark:bg-gray-800/50 rounded-xl border border-gray-200 dark:border-gray-700">
|
||||
{/* Main session tab */}
|
||||
<button
|
||||
onClick={() => onSessionChange(parentSessionId)}
|
||||
className={`px-3 py-1.5 rounded-lg text-xs font-medium transition-colors ${
|
||||
isParentActive
|
||||
? 'bg-white dark:bg-gray-700 text-teal-700 dark:text-teal-400 shadow-sm ring-1 ring-teal-300 dark:ring-teal-600'
|
||||
: 'text-gray-500 dark:text-gray-400 hover:bg-white/50 dark:hover:bg-gray-700/50'
|
||||
}`}
|
||||
>
|
||||
Hauptseite
|
||||
</button>
|
||||
|
||||
<div className="w-px h-5 bg-gray-200 dark:bg-gray-700" />
|
||||
|
||||
{/* Sub-session tabs */}
|
||||
{subSessions.map((sub) => {
|
||||
const isActive = activeSessionId === sub.id
|
||||
const icon = getStatusIcon(sub)
|
||||
|
||||
return (
|
||||
<button
|
||||
key={sub.id}
|
||||
onClick={() => onSessionChange(sub.id)}
|
||||
className={`px-3 py-1.5 rounded-lg text-xs font-medium transition-colors ${
|
||||
isActive
|
||||
? 'bg-white dark:bg-gray-700 text-teal-700 dark:text-teal-400 shadow-sm ring-1 ring-teal-300 dark:ring-teal-600'
|
||||
: 'text-gray-500 dark:text-gray-400 hover:bg-white/50 dark:hover:bg-gray-700/50'
|
||||
}`}
|
||||
title={sub.name}
|
||||
>
|
||||
<span className="mr-1">{icon}</span>
|
||||
Box {sub.box_index + 1}
|
||||
</button>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -1,320 +0,0 @@
|
||||
'use client'
|
||||
|
||||
import { useState, useMemo } from 'react'
|
||||
import type { ColumnResult, ColumnGroundTruth, PageRegion } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
interface ColumnControlsProps {
|
||||
columnResult: ColumnResult | null
|
||||
onRerun: () => void
|
||||
onManualMode: () => void
|
||||
onGtMode: () => void
|
||||
onGroundTruth: (gt: ColumnGroundTruth) => void
|
||||
onNext: () => void
|
||||
isDetecting: boolean
|
||||
savedGtColumns: PageRegion[] | null
|
||||
}
|
||||
|
||||
const TYPE_COLORS: Record<string, string> = {
|
||||
column_en: 'bg-blue-100 text-blue-700 dark:bg-blue-900/30 dark:text-blue-400',
|
||||
column_de: 'bg-green-100 text-green-700 dark:bg-green-900/30 dark:text-green-400',
|
||||
column_example: 'bg-orange-100 text-orange-700 dark:bg-orange-900/30 dark:text-orange-400',
|
||||
column_text: 'bg-cyan-100 text-cyan-700 dark:bg-cyan-900/30 dark:text-cyan-400',
|
||||
page_ref: 'bg-purple-100 text-purple-700 dark:bg-purple-900/30 dark:text-purple-400',
|
||||
column_marker: 'bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400',
|
||||
column_ignore: 'bg-gray-100 text-gray-500 dark:bg-gray-700/30 dark:text-gray-500',
|
||||
header: 'bg-gray-100 text-gray-600 dark:bg-gray-700/50 dark:text-gray-400',
|
||||
footer: 'bg-gray-100 text-gray-600 dark:bg-gray-700/50 dark:text-gray-400',
|
||||
}
|
||||
|
||||
const TYPE_LABELS: Record<string, string> = {
|
||||
column_en: 'EN',
|
||||
column_de: 'DE',
|
||||
column_example: 'Beispiel',
|
||||
column_text: 'Text',
|
||||
page_ref: 'Seite',
|
||||
column_marker: 'Marker',
|
||||
column_ignore: 'Ignorieren',
|
||||
header: 'Header',
|
||||
footer: 'Footer',
|
||||
}
|
||||
|
||||
const METHOD_LABELS: Record<string, string> = {
|
||||
content: 'Inhalt',
|
||||
position_enhanced: 'Position',
|
||||
position_fallback: 'Fallback',
|
||||
}
|
||||
|
||||
interface DiffRow {
|
||||
index: number
|
||||
autoCol: PageRegion | null
|
||||
gtCol: PageRegion | null
|
||||
diffX: number | null
|
||||
diffW: number | null
|
||||
typeMismatch: boolean
|
||||
}
|
||||
|
||||
/** Match auto columns to GT columns by overlap on X-axis (IoU > 50%) */
|
||||
function computeDiff(autoCols: PageRegion[], gtCols: PageRegion[]): DiffRow[] {
|
||||
const rows: DiffRow[] = []
|
||||
const usedGt = new Set<number>()
|
||||
const usedAuto = new Set<number>()
|
||||
|
||||
// Match auto → GT by best X-axis overlap
|
||||
for (let ai = 0; ai < autoCols.length; ai++) {
|
||||
const a = autoCols[ai]
|
||||
let bestIdx = -1
|
||||
let bestIoU = 0
|
||||
|
||||
for (let gi = 0; gi < gtCols.length; gi++) {
|
||||
if (usedGt.has(gi)) continue
|
||||
const g = gtCols[gi]
|
||||
const overlapStart = Math.max(a.x, g.x)
|
||||
const overlapEnd = Math.min(a.x + a.width, g.x + g.width)
|
||||
const overlap = Math.max(0, overlapEnd - overlapStart)
|
||||
const union = (a.width + g.width) - overlap
|
||||
const iou = union > 0 ? overlap / union : 0
|
||||
if (iou > bestIoU) {
|
||||
bestIoU = iou
|
||||
bestIdx = gi
|
||||
}
|
||||
}
|
||||
|
||||
if (bestIdx >= 0 && bestIoU > 0.3) {
|
||||
usedGt.add(bestIdx)
|
||||
usedAuto.add(ai)
|
||||
const g = gtCols[bestIdx]
|
||||
rows.push({
|
||||
index: rows.length + 1,
|
||||
autoCol: a,
|
||||
gtCol: g,
|
||||
diffX: g.x - a.x,
|
||||
diffW: g.width - a.width,
|
||||
typeMismatch: a.type !== g.type,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Unmatched auto columns
|
||||
for (let ai = 0; ai < autoCols.length; ai++) {
|
||||
if (usedAuto.has(ai)) continue
|
||||
rows.push({
|
||||
index: rows.length + 1,
|
||||
autoCol: autoCols[ai],
|
||||
gtCol: null,
|
||||
diffX: null,
|
||||
diffW: null,
|
||||
typeMismatch: false,
|
||||
})
|
||||
}
|
||||
|
||||
// Unmatched GT columns
|
||||
for (let gi = 0; gi < gtCols.length; gi++) {
|
||||
if (usedGt.has(gi)) continue
|
||||
rows.push({
|
||||
index: rows.length + 1,
|
||||
autoCol: null,
|
||||
gtCol: gtCols[gi],
|
||||
diffX: null,
|
||||
diffW: null,
|
||||
typeMismatch: false,
|
||||
})
|
||||
}
|
||||
|
||||
return rows
|
||||
}
|
||||
|
||||
export function ColumnControls({ columnResult, onRerun, onManualMode, onGtMode, onGroundTruth, onNext, isDetecting, savedGtColumns }: ColumnControlsProps) {
|
||||
const [gtSaved, setGtSaved] = useState(false)
|
||||
|
||||
const diffRows = useMemo(() => {
|
||||
if (!columnResult || !savedGtColumns) return null
|
||||
const autoCols = columnResult.columns.filter(c => c.type.startsWith('column') || c.type === 'page_ref')
|
||||
const gtCols = savedGtColumns.filter(c => c.type.startsWith('column') || c.type === 'page_ref')
|
||||
return computeDiff(autoCols, gtCols)
|
||||
}, [columnResult, savedGtColumns])
|
||||
|
||||
if (!columnResult) return null
|
||||
|
||||
const columns = columnResult.columns.filter((c: PageRegion) => c.type.startsWith('column') || c.type === 'page_ref')
|
||||
const headerFooter = columnResult.columns.filter((c: PageRegion) => !c.type.startsWith('column') && c.type !== 'page_ref')
|
||||
|
||||
const handleGt = (isCorrect: boolean) => {
|
||||
onGroundTruth({ is_correct: isCorrect })
|
||||
setGtSaved(true)
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-4">
|
||||
{/* Summary */}
|
||||
<div className="flex items-center gap-3 flex-wrap">
|
||||
<div className="text-sm text-gray-600 dark:text-gray-400">
|
||||
<span className="font-medium text-gray-800 dark:text-gray-200">{columns.length} Spalten</span> erkannt
|
||||
{columnResult.duration_seconds > 0 && (
|
||||
<span className="ml-2 text-xs">({columnResult.duration_seconds}s)</span>
|
||||
)}
|
||||
</div>
|
||||
<button
|
||||
onClick={onRerun}
|
||||
disabled={isDetecting}
|
||||
className="text-xs px-2 py-1 bg-gray-100 dark:bg-gray-700 rounded hover:bg-gray-200 dark:hover:bg-gray-600 transition-colors disabled:opacity-50"
|
||||
>
|
||||
Erneut erkennen
|
||||
</button>
|
||||
<button
|
||||
onClick={onManualMode}
|
||||
className="text-xs px-2 py-1 bg-teal-100 text-teal-700 dark:bg-teal-900/30 dark:text-teal-400 rounded hover:bg-teal-200 dark:hover:bg-teal-900/50 transition-colors"
|
||||
>
|
||||
Manuell markieren
|
||||
</button>
|
||||
<button
|
||||
onClick={onGtMode}
|
||||
className="text-xs px-2 py-1 bg-amber-100 text-amber-700 dark:bg-amber-900/30 dark:text-amber-400 rounded hover:bg-amber-200 dark:hover:bg-amber-900/50 transition-colors"
|
||||
>
|
||||
{savedGtColumns ? 'Ground Truth bearbeiten' : 'Ground Truth eintragen'}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Column list */}
|
||||
<div className="space-y-2">
|
||||
{columns.map((col: PageRegion, i: number) => (
|
||||
<div key={i} className="flex items-center gap-3 text-sm">
|
||||
<span className={`px-2 py-0.5 rounded text-xs font-medium ${TYPE_COLORS[col.type] || ''}`}>
|
||||
{TYPE_LABELS[col.type] || col.type}
|
||||
</span>
|
||||
{col.classification_confidence != null && col.classification_confidence < 1.0 && (
|
||||
<span className="text-xs font-medium text-gray-600 dark:text-gray-300">
|
||||
{Math.round(col.classification_confidence * 100)}%
|
||||
</span>
|
||||
)}
|
||||
{col.classification_method && (
|
||||
<span className="text-xs text-gray-400 dark:text-gray-500">
|
||||
({METHOD_LABELS[col.classification_method] || col.classification_method})
|
||||
</span>
|
||||
)}
|
||||
<span className="text-gray-500 dark:text-gray-400 text-xs font-mono">
|
||||
x={col.x} y={col.y} {col.width}x{col.height}px
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
{headerFooter.map((r: PageRegion, i: number) => (
|
||||
<div key={`hf-${i}`} className="flex items-center gap-3 text-sm">
|
||||
<span className={`px-2 py-0.5 rounded text-xs font-medium ${TYPE_COLORS[r.type] || ''}`}>
|
||||
{TYPE_LABELS[r.type] || r.type}
|
||||
</span>
|
||||
<span className="text-gray-500 dark:text-gray-400 text-xs font-mono">
|
||||
x={r.x} y={r.y} {r.width}x{r.height}px
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
|
||||
{/* Diff table (Auto vs GT) */}
|
||||
{diffRows && diffRows.length > 0 && (
|
||||
<div className="border-t border-gray-100 dark:border-gray-700 pt-3">
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-2">
|
||||
Vergleich: Auto vs Ground Truth
|
||||
</div>
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full text-xs">
|
||||
<thead>
|
||||
<tr className="text-gray-500 dark:text-gray-400 border-b border-gray-100 dark:border-gray-700">
|
||||
<th className="text-left py-1 pr-2">#</th>
|
||||
<th className="text-left py-1 pr-2">Auto (Typ, x, w)</th>
|
||||
<th className="text-left py-1 pr-2">GT (Typ, x, w)</th>
|
||||
<th className="text-right py-1 pr-2">Diff X</th>
|
||||
<th className="text-right py-1">Diff W</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{diffRows.map((row) => (
|
||||
<tr
|
||||
key={row.index}
|
||||
className={
|
||||
!row.autoCol || !row.gtCol || row.typeMismatch
|
||||
? 'bg-red-50 dark:bg-red-900/10'
|
||||
: (row.diffX !== null && Math.abs(row.diffX) > 20) || (row.diffW !== null && Math.abs(row.diffW) > 20)
|
||||
? 'bg-amber-50 dark:bg-amber-900/10'
|
||||
: ''
|
||||
}
|
||||
>
|
||||
<td className="py-1 pr-2 font-mono text-gray-400">{row.index}</td>
|
||||
<td className="py-1 pr-2 font-mono">
|
||||
{row.autoCol ? (
|
||||
<span>
|
||||
<span className={`inline-block px-1 rounded ${TYPE_COLORS[row.autoCol.type] || ''}`}>
|
||||
{TYPE_LABELS[row.autoCol.type] || row.autoCol.type}
|
||||
</span>
|
||||
{' '}{row.autoCol.x}, {row.autoCol.width}
|
||||
</span>
|
||||
) : (
|
||||
<span className="text-red-400">fehlt</span>
|
||||
)}
|
||||
</td>
|
||||
<td className="py-1 pr-2 font-mono">
|
||||
{row.gtCol ? (
|
||||
<span>
|
||||
<span className={`inline-block px-1 rounded ${TYPE_COLORS[row.gtCol.type] || ''}`}>
|
||||
{TYPE_LABELS[row.gtCol.type] || row.gtCol.type}
|
||||
</span>
|
||||
{' '}{row.gtCol.x}, {row.gtCol.width}
|
||||
</span>
|
||||
) : (
|
||||
<span className="text-red-400">fehlt</span>
|
||||
)}
|
||||
</td>
|
||||
<td className="py-1 pr-2 text-right font-mono">
|
||||
{row.diffX !== null ? (
|
||||
<span className={Math.abs(row.diffX) > 20 ? 'text-amber-600 dark:text-amber-400' : 'text-gray-500'}>
|
||||
{row.diffX > 0 ? '+' : ''}{row.diffX}
|
||||
</span>
|
||||
) : '—'}
|
||||
</td>
|
||||
<td className="py-1 text-right font-mono">
|
||||
{row.diffW !== null ? (
|
||||
<span className={Math.abs(row.diffW) > 20 ? 'text-amber-600 dark:text-amber-400' : 'text-gray-500'}>
|
||||
{row.diffW > 0 ? '+' : ''}{row.diffW}
|
||||
</span>
|
||||
) : '—'}
|
||||
</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Ground Truth + Navigation */}
|
||||
<div className="flex items-center justify-between pt-2 border-t border-gray-100 dark:border-gray-700">
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-sm text-gray-500 dark:text-gray-400">Spalten korrekt?</span>
|
||||
{gtSaved ? (
|
||||
<span className="text-xs text-green-600 dark:text-green-400">Gespeichert</span>
|
||||
) : (
|
||||
<>
|
||||
<button
|
||||
onClick={() => handleGt(true)}
|
||||
className="text-xs px-3 py-1 bg-green-100 text-green-700 dark:bg-green-900/30 dark:text-green-400 rounded hover:bg-green-200 dark:hover:bg-green-900/50 transition-colors"
|
||||
>
|
||||
Ja
|
||||
</button>
|
||||
<button
|
||||
onClick={() => handleGt(false)}
|
||||
className="text-xs px-3 py-1 bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400 rounded hover:bg-red-200 dark:hover:bg-red-900/50 transition-colors"
|
||||
>
|
||||
Nein
|
||||
</button>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<button
|
||||
onClick={onNext}
|
||||
className="px-4 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors text-sm font-medium"
|
||||
>
|
||||
Weiter
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -1,209 +0,0 @@
|
||||
'use client'
|
||||
|
||||
import { useState } from 'react'
|
||||
import type { DeskewResult, DeskewGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
interface DeskewControlsProps {
|
||||
deskewResult: DeskewResult | null
|
||||
showBinarized: boolean
|
||||
onToggleBinarized: () => void
|
||||
showGrid: boolean
|
||||
onToggleGrid: () => void
|
||||
onManualDeskew: (angle: number) => void
|
||||
onGroundTruth: (gt: DeskewGroundTruth) => void
|
||||
onNext: () => void
|
||||
isApplying: boolean
|
||||
}
|
||||
|
||||
const METHOD_LABELS: Record<string, string> = {
|
||||
hough: 'Hough-Linien',
|
||||
word_alignment: 'Wortausrichtung',
|
||||
manual: 'Manuell',
|
||||
}
|
||||
|
||||
export function DeskewControls({
|
||||
deskewResult,
|
||||
showBinarized,
|
||||
onToggleBinarized,
|
||||
showGrid,
|
||||
onToggleGrid,
|
||||
onManualDeskew,
|
||||
onGroundTruth,
|
||||
onNext,
|
||||
isApplying,
|
||||
}: DeskewControlsProps) {
|
||||
const [manualAngle, setManualAngle] = useState(0)
|
||||
const [gtFeedback, setGtFeedback] = useState<'correct' | 'incorrect' | null>(null)
|
||||
const [gtNotes, setGtNotes] = useState('')
|
||||
const [gtSaved, setGtSaved] = useState(false)
|
||||
|
||||
const handleGroundTruth = (isCorrect: boolean) => {
|
||||
setGtFeedback(isCorrect ? 'correct' : 'incorrect')
|
||||
if (isCorrect) {
|
||||
onGroundTruth({ is_correct: true })
|
||||
setGtSaved(true)
|
||||
}
|
||||
}
|
||||
|
||||
const handleGroundTruthIncorrect = () => {
|
||||
onGroundTruth({
|
||||
is_correct: false,
|
||||
corrected_angle: manualAngle !== 0 ? manualAngle : undefined,
|
||||
notes: gtNotes || undefined,
|
||||
})
|
||||
setGtSaved(true)
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Results */}
|
||||
{deskewResult && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
||||
<div className="flex flex-wrap items-center gap-3 text-sm">
|
||||
<div>
|
||||
<span className="text-gray-500">Winkel:</span>{' '}
|
||||
<span className="font-mono font-medium">{deskewResult.angle_applied}°</span>
|
||||
</div>
|
||||
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
|
||||
<div>
|
||||
<span className="text-gray-500">Methode:</span>{' '}
|
||||
<span className="inline-flex items-center px-2 py-0.5 rounded-full text-xs font-medium bg-teal-100 text-teal-700 dark:bg-teal-900/40 dark:text-teal-300">
|
||||
{METHOD_LABELS[deskewResult.method_used] || deskewResult.method_used}
|
||||
</span>
|
||||
</div>
|
||||
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
|
||||
<div>
|
||||
<span className="text-gray-500">Konfidenz:</span>{' '}
|
||||
<span className="font-mono">{Math.round(deskewResult.confidence * 100)}%</span>
|
||||
</div>
|
||||
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
|
||||
<div className="text-gray-400 text-xs">
|
||||
Hough: {deskewResult.angle_hough}° | WA: {deskewResult.angle_word_alignment}°
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Toggles */}
|
||||
<div className="flex gap-3 mt-3">
|
||||
<button
|
||||
onClick={onToggleBinarized}
|
||||
className={`text-xs px-3 py-1 rounded-full border transition-colors ${
|
||||
showBinarized
|
||||
? 'bg-teal-100 border-teal-300 text-teal-700 dark:bg-teal-900/40 dark:border-teal-600 dark:text-teal-300'
|
||||
: 'border-gray-300 text-gray-500 dark:border-gray-600 dark:text-gray-400'
|
||||
}`}
|
||||
>
|
||||
Binarisiert anzeigen
|
||||
</button>
|
||||
<button
|
||||
onClick={onToggleGrid}
|
||||
className={`text-xs px-3 py-1 rounded-full border transition-colors ${
|
||||
showGrid
|
||||
? 'bg-teal-100 border-teal-300 text-teal-700 dark:bg-teal-900/40 dark:border-teal-600 dark:text-teal-300'
|
||||
: 'border-gray-300 text-gray-500 dark:border-gray-600 dark:text-gray-400'
|
||||
}`}
|
||||
>
|
||||
Raster anzeigen
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Manual angle */}
|
||||
{deskewResult && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
||||
<div className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">Manuelle Korrektur</div>
|
||||
<div className="flex items-center gap-3">
|
||||
<span className="text-xs text-gray-400 w-8 text-right">-5°</span>
|
||||
<input
|
||||
type="range"
|
||||
min={-5}
|
||||
max={5}
|
||||
step={0.1}
|
||||
value={manualAngle}
|
||||
onChange={(e) => setManualAngle(parseFloat(e.target.value))}
|
||||
className="flex-1 h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer dark:bg-gray-700 accent-teal-500"
|
||||
/>
|
||||
<span className="text-xs text-gray-400 w-8">+5°</span>
|
||||
<span className="font-mono text-sm w-14 text-right">{manualAngle.toFixed(1)}°</span>
|
||||
<button
|
||||
onClick={() => onManualDeskew(manualAngle)}
|
||||
disabled={isApplying}
|
||||
className="px-3 py-1.5 text-sm bg-teal-600 text-white rounded-md hover:bg-teal-700 disabled:opacity-50 transition-colors"
|
||||
>
|
||||
{isApplying ? '...' : 'Anwenden'}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Ground Truth */}
|
||||
{deskewResult && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
||||
<div className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||
Rotation korrekt?
|
||||
</div>
|
||||
<p className="text-xs text-gray-400 mb-2">Nur die Drehung bewerten — Woelbung/Verzerrung wird im naechsten Schritt korrigiert.</p>
|
||||
{!gtSaved ? (
|
||||
<div className="space-y-3">
|
||||
<div className="flex gap-2">
|
||||
<button
|
||||
onClick={() => handleGroundTruth(true)}
|
||||
className={`px-4 py-1.5 rounded-md text-sm font-medium transition-colors ${
|
||||
gtFeedback === 'correct'
|
||||
? 'bg-green-100 text-green-700 ring-2 ring-green-400'
|
||||
: 'bg-gray-100 text-gray-600 hover:bg-green-50 dark:bg-gray-700 dark:text-gray-300'
|
||||
}`}
|
||||
>
|
||||
Ja
|
||||
</button>
|
||||
<button
|
||||
onClick={() => handleGroundTruth(false)}
|
||||
className={`px-4 py-1.5 rounded-md text-sm font-medium transition-colors ${
|
||||
gtFeedback === 'incorrect'
|
||||
? 'bg-red-100 text-red-700 ring-2 ring-red-400'
|
||||
: 'bg-gray-100 text-gray-600 hover:bg-red-50 dark:bg-gray-700 dark:text-gray-300'
|
||||
}`}
|
||||
>
|
||||
Nein
|
||||
</button>
|
||||
</div>
|
||||
{gtFeedback === 'incorrect' && (
|
||||
<div className="space-y-2">
|
||||
<textarea
|
||||
value={gtNotes}
|
||||
onChange={(e) => setGtNotes(e.target.value)}
|
||||
placeholder="Notizen zur Korrektur..."
|
||||
className="w-full text-sm border border-gray-300 dark:border-gray-600 rounded-md p-2 bg-white dark:bg-gray-900 text-gray-800 dark:text-gray-200"
|
||||
rows={2}
|
||||
/>
|
||||
<button
|
||||
onClick={handleGroundTruthIncorrect}
|
||||
className="text-sm px-3 py-1 bg-red-600 text-white rounded-md hover:bg-red-700 transition-colors"
|
||||
>
|
||||
Feedback speichern
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
) : (
|
||||
<div className="text-sm text-green-600 dark:text-green-400">
|
||||
Feedback gespeichert
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Next button */}
|
||||
{deskewResult && (
|
||||
<div className="flex justify-end">
|
||||
<button
|
||||
onClick={onNext}
|
||||
className="px-6 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium transition-colors"
|
||||
>
|
||||
Uebernehmen & Weiter →
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -1,553 +0,0 @@
|
||||
'use client'
|
||||
|
||||
import { useEffect, useState } from 'react'
|
||||
import type { DeskewResult, DewarpResult, DewarpDetection, DewarpGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
interface DewarpControlsProps {
|
||||
dewarpResult: DewarpResult | null
|
||||
deskewResult?: DeskewResult | null
|
||||
showGrid: boolean
|
||||
onToggleGrid: () => void
|
||||
onManualDewarp: (shearDegrees: number) => void
|
||||
onCombinedAdjust?: (rotationDegrees: number, shearDegrees: number) => void
|
||||
onGroundTruth: (gt: DewarpGroundTruth) => void
|
||||
onNext: () => void
|
||||
isApplying: boolean
|
||||
}
|
||||
|
||||
const METHOD_LABELS: Record<string, string> = {
|
||||
vertical_edge: 'A: Vertikale Kanten',
|
||||
projection: 'B: Projektions-Varianz',
|
||||
hough_lines: 'C: Hough-Linien',
|
||||
text_lines: 'D: Textzeilenanalyse',
|
||||
manual: 'Manuell',
|
||||
manual_combined: 'Manuell (kombiniert)',
|
||||
none: 'Keine Korrektur',
|
||||
}
|
||||
|
||||
const SHEAR_METHOD_KEYS = ['vertical_edge', 'projection', 'hough_lines', 'text_lines'] as const
|
||||
|
||||
/** Colour for a confidence value (0-1). */
|
||||
function confColor(conf: number): string {
|
||||
if (conf >= 0.7) return 'text-green-600 dark:text-green-400'
|
||||
if (conf >= 0.5) return 'text-yellow-600 dark:text-yellow-400'
|
||||
return 'text-gray-400'
|
||||
}
|
||||
|
||||
/** Short confidence bar (visual). */
|
||||
function ConfBar({ value }: { value: number }) {
|
||||
const pct = Math.round(value * 100)
|
||||
const bg = value >= 0.7 ? 'bg-green-500' : value >= 0.5 ? 'bg-yellow-500' : 'bg-gray-400'
|
||||
return (
|
||||
<div className="flex items-center gap-1.5">
|
||||
<div className="w-16 h-1.5 bg-gray-200 dark:bg-gray-700 rounded-full overflow-hidden">
|
||||
<div className={`h-full rounded-full ${bg}`} style={{ width: `${pct}%` }} />
|
||||
</div>
|
||||
<span className={`text-xs font-mono ${confColor(value)}`}>{pct}%</span>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
/** A single slider row for fine-tuning. */
|
||||
function FineTuneSlider({
|
||||
label,
|
||||
value,
|
||||
onChange,
|
||||
min,
|
||||
max,
|
||||
step,
|
||||
unit = '\u00B0',
|
||||
radioName,
|
||||
radioChecked,
|
||||
onRadioChange,
|
||||
}: {
|
||||
label: string
|
||||
value: number
|
||||
onChange: (v: number) => void
|
||||
min: number
|
||||
max: number
|
||||
step: number
|
||||
unit?: string
|
||||
radioName?: string
|
||||
radioChecked?: boolean
|
||||
onRadioChange?: () => void
|
||||
}) {
|
||||
return (
|
||||
<div className="flex items-center gap-2">
|
||||
{radioName !== undefined && (
|
||||
<input
|
||||
type="radio"
|
||||
name={radioName}
|
||||
checked={radioChecked}
|
||||
onChange={onRadioChange}
|
||||
className="w-3.5 h-3.5 accent-teal-500"
|
||||
/>
|
||||
)}
|
||||
<span className="text-xs text-gray-500 dark:text-gray-400 w-36 shrink-0">{label}</span>
|
||||
<span className="text-xs text-gray-400 w-8 text-right">{min}{unit}</span>
|
||||
<input
|
||||
type="range"
|
||||
min={min * 100}
|
||||
max={max * 100}
|
||||
step={step * 100}
|
||||
value={Math.round(value * 100)}
|
||||
onChange={(e) => onChange(parseInt(e.target.value) / 100)}
|
||||
className="flex-1 h-1.5 bg-gray-200 rounded-lg appearance-none cursor-pointer dark:bg-gray-700 accent-teal-500"
|
||||
/>
|
||||
<span className="text-xs text-gray-400 w-8">+{max}{unit}</span>
|
||||
<span className="font-mono text-xs w-14 text-right tabular-nums">
|
||||
{value >= 0 ? '+' : ''}{value.toFixed(2)}{unit}
|
||||
</span>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export function DewarpControls({
|
||||
dewarpResult,
|
||||
deskewResult,
|
||||
showGrid,
|
||||
onToggleGrid,
|
||||
onManualDewarp,
|
||||
onCombinedAdjust,
|
||||
onGroundTruth,
|
||||
onNext,
|
||||
isApplying,
|
||||
}: DewarpControlsProps) {
|
||||
const [manualShear, setManualShear] = useState(0)
|
||||
const [gtFeedback, setGtFeedback] = useState<'correct' | 'incorrect' | null>(null)
|
||||
const [gtNotes, setGtNotes] = useState('')
|
||||
const [gtSaved, setGtSaved] = useState(false)
|
||||
const [showDetails, setShowDetails] = useState(false)
|
||||
const [showFineTune, setShowFineTune] = useState(false)
|
||||
|
||||
// Fine-tuning rotation sliders (3 passes)
|
||||
const [p1Iterative, setP1Iterative] = useState(0)
|
||||
const [p2Residual, setP2Residual] = useState(0)
|
||||
const [p3Textline, setP3Textline] = useState(0)
|
||||
|
||||
// Fine-tuning shear sliders (4 methods) + selected method
|
||||
const [shearValues, setShearValues] = useState<Record<string, number>>({
|
||||
vertical_edge: 0,
|
||||
projection: 0,
|
||||
hough_lines: 0,
|
||||
text_lines: 0,
|
||||
})
|
||||
const [selectedShearMethod, setSelectedShearMethod] = useState<string>('vertical_edge')
|
||||
|
||||
// Initialize slider to auto-detected value when result arrives
|
||||
useEffect(() => {
|
||||
if (dewarpResult && dewarpResult.shear_degrees !== undefined) {
|
||||
setManualShear(dewarpResult.shear_degrees)
|
||||
}
|
||||
}, [dewarpResult?.shear_degrees])
|
||||
|
||||
// Initialize fine-tuning sliders from deskew result
|
||||
useEffect(() => {
|
||||
if (deskewResult) {
|
||||
setP1Iterative(deskewResult.angle_iterative ?? 0)
|
||||
setP2Residual(deskewResult.angle_residual ?? 0)
|
||||
setP3Textline(deskewResult.angle_textline ?? 0)
|
||||
}
|
||||
}, [deskewResult])
|
||||
|
||||
// Initialize shear sliders from dewarp detections
|
||||
useEffect(() => {
|
||||
if (dewarpResult?.detections) {
|
||||
const newValues = { ...shearValues }
|
||||
let bestMethod = selectedShearMethod
|
||||
let bestConf = -1
|
||||
for (const d of dewarpResult.detections) {
|
||||
if (d.method in newValues) {
|
||||
newValues[d.method] = d.shear_degrees
|
||||
if (d.confidence > bestConf) {
|
||||
bestConf = d.confidence
|
||||
bestMethod = d.method
|
||||
}
|
||||
}
|
||||
}
|
||||
setShearValues(newValues)
|
||||
// Select the method that was actually used, or the highest confidence
|
||||
if (dewarpResult.method_used && dewarpResult.method_used in newValues) {
|
||||
setSelectedShearMethod(dewarpResult.method_used)
|
||||
} else {
|
||||
setSelectedShearMethod(bestMethod)
|
||||
}
|
||||
}
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [dewarpResult?.detections])
|
||||
|
||||
const rotationSum = p1Iterative + p2Residual + p3Textline
|
||||
const activeShear = shearValues[selectedShearMethod] ?? 0
|
||||
|
||||
const handleGroundTruth = (isCorrect: boolean) => {
|
||||
setGtFeedback(isCorrect ? 'correct' : 'incorrect')
|
||||
if (isCorrect) {
|
||||
onGroundTruth({ is_correct: true })
|
||||
setGtSaved(true)
|
||||
}
|
||||
}
|
||||
|
||||
const handleGroundTruthIncorrect = () => {
|
||||
onGroundTruth({
|
||||
is_correct: false,
|
||||
corrected_shear: manualShear !== 0 ? manualShear : undefined,
|
||||
notes: gtNotes || undefined,
|
||||
})
|
||||
setGtSaved(true)
|
||||
}
|
||||
|
||||
const handleShearValueChange = (method: string, value: number) => {
|
||||
setShearValues((prev) => ({ ...prev, [method]: value }))
|
||||
}
|
||||
|
||||
const handleFineTunePreview = () => {
|
||||
if (onCombinedAdjust) {
|
||||
onCombinedAdjust(rotationSum, activeShear)
|
||||
}
|
||||
}
|
||||
|
||||
const wasRejected = dewarpResult && dewarpResult.method_used === 'none' && (dewarpResult.detections || []).length > 0
|
||||
const wasApplied = dewarpResult && dewarpResult.method_used !== 'none' && dewarpResult.method_used !== 'manual' && dewarpResult.method_used !== 'manual_combined'
|
||||
const detections = dewarpResult?.detections || []
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Summary banner */}
|
||||
{dewarpResult && (
|
||||
<div className={`rounded-lg border p-4 ${
|
||||
wasRejected
|
||||
? 'bg-amber-50 border-amber-200 dark:bg-amber-900/20 dark:border-amber-700'
|
||||
: wasApplied
|
||||
? 'bg-green-50 border-green-200 dark:bg-green-900/20 dark:border-green-700'
|
||||
: 'bg-white border-gray-200 dark:bg-gray-800 dark:border-gray-700'
|
||||
}`}>
|
||||
{/* Status line */}
|
||||
<div className="flex items-center gap-2 mb-3">
|
||||
<span className={`text-lg ${wasRejected ? '' : wasApplied ? '' : ''}`}>
|
||||
{wasRejected ? '\u26A0\uFE0F' : wasApplied ? '\u2705' : '\u2796'}
|
||||
</span>
|
||||
<span className="text-sm font-medium text-gray-800 dark:text-gray-200">
|
||||
{wasRejected
|
||||
? 'Quality Gate: Korrektur verworfen (Projektion nicht verbessert)'
|
||||
: wasApplied
|
||||
? `Korrektur angewendet: ${dewarpResult.shear_degrees.toFixed(2)}\u00B0`
|
||||
: dewarpResult.method_used === 'manual' || dewarpResult.method_used === 'manual_combined'
|
||||
? `Manuelle Korrektur: ${dewarpResult.shear_degrees.toFixed(2)}\u00B0`
|
||||
: 'Keine Korrektur noetig'}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{/* Key metrics */}
|
||||
<div className="flex flex-wrap items-center gap-4 text-sm">
|
||||
<div>
|
||||
<span className="text-gray-500">Scherung:</span>{' '}
|
||||
<span className="font-mono font-medium">{dewarpResult.shear_degrees.toFixed(2)}\u00B0</span>
|
||||
</div>
|
||||
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
|
||||
<div>
|
||||
<span className="text-gray-500">Methode:</span>{' '}
|
||||
<span className="inline-flex items-center px-2 py-0.5 rounded-full text-xs font-medium bg-teal-100 text-teal-700 dark:bg-teal-900/40 dark:text-teal-300">
|
||||
{dewarpResult.method_used.includes('+')
|
||||
? `Ensemble (${dewarpResult.method_used.split('+').map(m => METHOD_LABELS[m] || m).join(' + ')})`
|
||||
: METHOD_LABELS[dewarpResult.method_used] || dewarpResult.method_used}
|
||||
</span>
|
||||
</div>
|
||||
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
|
||||
<div className="flex items-center gap-1.5">
|
||||
<span className="text-gray-500">Konfidenz:</span>
|
||||
<ConfBar value={dewarpResult.confidence} />
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Toggles row */}
|
||||
<div className="flex gap-2 mt-3">
|
||||
<button
|
||||
onClick={onToggleGrid}
|
||||
className={`text-xs px-3 py-1 rounded-full border transition-colors ${
|
||||
showGrid
|
||||
? 'bg-teal-100 border-teal-300 text-teal-700 dark:bg-teal-900/40 dark:border-teal-600 dark:text-teal-300'
|
||||
: 'border-gray-300 text-gray-500 dark:border-gray-600 dark:text-gray-400'
|
||||
}`}
|
||||
>
|
||||
Raster
|
||||
</button>
|
||||
{detections.length > 0 && (
|
||||
<button
|
||||
onClick={() => setShowDetails(v => !v)}
|
||||
className={`text-xs px-3 py-1 rounded-full border transition-colors ${
|
||||
showDetails
|
||||
? 'bg-blue-100 border-blue-300 text-blue-700 dark:bg-blue-900/40 dark:border-blue-600 dark:text-blue-300'
|
||||
: 'border-gray-300 text-gray-500 dark:border-gray-600 dark:text-gray-400'
|
||||
}`}
|
||||
>
|
||||
Details ({detections.length} Methoden)
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Detailed detections */}
|
||||
{showDetails && detections.length > 0 && (
|
||||
<div className="mt-3 pt-3 border-t border-gray-200 dark:border-gray-700">
|
||||
<div className="text-xs text-gray-500 mb-2">Einzelne Detektoren:</div>
|
||||
<div className="space-y-1.5">
|
||||
{detections.map((d: DewarpDetection) => {
|
||||
const isUsed = dewarpResult.method_used.includes(d.method)
|
||||
const aboveThreshold = d.confidence >= 0.5
|
||||
return (
|
||||
<div
|
||||
key={d.method}
|
||||
className={`flex items-center gap-3 text-xs px-2 py-1.5 rounded ${
|
||||
isUsed
|
||||
? 'bg-teal-50 dark:bg-teal-900/20'
|
||||
: 'bg-gray-50 dark:bg-gray-800'
|
||||
}`}
|
||||
>
|
||||
<span className="w-4 text-center">
|
||||
{isUsed ? '\u2713' : aboveThreshold ? '\u2012' : '\u2717'}
|
||||
</span>
|
||||
<span className={`w-40 ${isUsed ? 'font-medium text-gray-800 dark:text-gray-200' : 'text-gray-500'}`}>
|
||||
{METHOD_LABELS[d.method] || d.method}
|
||||
</span>
|
||||
<span className="font-mono w-16 text-right">
|
||||
{d.shear_degrees.toFixed(2)}\u00B0
|
||||
</span>
|
||||
<ConfBar value={d.confidence} />
|
||||
{!aboveThreshold && (
|
||||
<span className="text-gray-400 ml-1">(unter Schwelle)</span>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
{wasRejected && (
|
||||
<div className="mt-2 text-xs text-amber-600 dark:text-amber-400">
|
||||
Die Korrektur wurde verworfen, weil die horizontale Projektions-Varianz nach Anwendung nicht besser war als vorher.
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Manual shear angle slider */}
|
||||
{dewarpResult && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
||||
<div className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">Scherwinkel (manuell)</div>
|
||||
<div className="flex items-center gap-3">
|
||||
<span className="text-xs text-gray-400 w-10 text-right">-2.0\u00B0</span>
|
||||
<input
|
||||
type="range"
|
||||
min={-200}
|
||||
max={200}
|
||||
step={5}
|
||||
value={Math.round(manualShear * 100)}
|
||||
onChange={(e) => setManualShear(parseInt(e.target.value) / 100)}
|
||||
className="flex-1 h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer dark:bg-gray-700 accent-teal-500"
|
||||
/>
|
||||
<span className="text-xs text-gray-400 w-10">+2.0\u00B0</span>
|
||||
<span className="font-mono text-sm w-16 text-right">{manualShear.toFixed(2)}\u00B0</span>
|
||||
<button
|
||||
onClick={() => onManualDewarp(manualShear)}
|
||||
disabled={isApplying}
|
||||
className="px-3 py-1.5 text-sm bg-teal-600 text-white rounded-md hover:bg-teal-700 disabled:opacity-50 transition-colors"
|
||||
>
|
||||
{isApplying ? '...' : 'Anwenden'}
|
||||
</button>
|
||||
</div>
|
||||
<p className="text-xs text-gray-400 mt-1">
|
||||
Scherung der vertikalen Achse in Grad. Positiv = Spalten nach rechts kippen, negativ = nach links.
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Fine-tuning panel */}
|
||||
{dewarpResult && onCombinedAdjust && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700">
|
||||
<button
|
||||
onClick={() => setShowFineTune(v => !v)}
|
||||
className="w-full flex items-center justify-between p-4 text-left"
|
||||
>
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-sm">⚙️</span>
|
||||
<span className="text-sm font-medium text-gray-700 dark:text-gray-300">Feinabstimmung</span>
|
||||
<span className="text-xs text-gray-400">(7 Regler)</span>
|
||||
</div>
|
||||
<span className="text-gray-400 text-sm">{showFineTune ? '\u25B2' : '\u25BC'}</span>
|
||||
</button>
|
||||
|
||||
{showFineTune && (
|
||||
<div className="px-4 pb-4 space-y-5">
|
||||
{/* Rotation section */}
|
||||
<div>
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider mb-2">
|
||||
Rotation (Begradigung)
|
||||
</div>
|
||||
<div className="space-y-2">
|
||||
<FineTuneSlider
|
||||
label="P1 Iterative Projection"
|
||||
value={p1Iterative}
|
||||
onChange={setP1Iterative}
|
||||
min={-5}
|
||||
max={5}
|
||||
step={0.05}
|
||||
/>
|
||||
<FineTuneSlider
|
||||
label="P2 Word-Alignment"
|
||||
value={p2Residual}
|
||||
onChange={setP2Residual}
|
||||
min={-3}
|
||||
max={3}
|
||||
step={0.05}
|
||||
/>
|
||||
<FineTuneSlider
|
||||
label="P3 Textline-Regression"
|
||||
value={p3Textline}
|
||||
onChange={setP3Textline}
|
||||
min={-3}
|
||||
max={3}
|
||||
step={0.05}
|
||||
/>
|
||||
<div className="flex items-center gap-2 pt-1 border-t border-gray-100 dark:border-gray-700">
|
||||
<span className="text-xs text-gray-500 dark:text-gray-400 w-36 shrink-0">Summe Rotation</span>
|
||||
<span className="font-mono text-sm font-medium text-teal-600 dark:text-teal-400">
|
||||
{rotationSum >= 0 ? '+' : ''}{rotationSum.toFixed(2)}\u00B0
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Shear section */}
|
||||
<div>
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider mb-2">
|
||||
Scherung (Entzerrung) — einen Wert waehlen
|
||||
</div>
|
||||
<div className="space-y-2">
|
||||
{SHEAR_METHOD_KEYS.map((method) => (
|
||||
<FineTuneSlider
|
||||
key={method}
|
||||
label={METHOD_LABELS[method] || method}
|
||||
value={shearValues[method]}
|
||||
onChange={(v) => handleShearValueChange(method, v)}
|
||||
min={-5}
|
||||
max={5}
|
||||
step={0.05}
|
||||
radioName="shear-method"
|
||||
radioChecked={selectedShearMethod === method}
|
||||
onRadioChange={() => setSelectedShearMethod(method)}
|
||||
/>
|
||||
))}
|
||||
<div className="flex items-center gap-2 pt-1 border-t border-gray-100 dark:border-gray-700">
|
||||
<span className="text-xs text-gray-500 dark:text-gray-400 w-36 shrink-0">Gewaehlte Scherung</span>
|
||||
<span className="font-mono text-sm font-medium text-teal-600 dark:text-teal-400">
|
||||
{activeShear >= 0 ? '+' : ''}{activeShear.toFixed(2)}\u00B0
|
||||
</span>
|
||||
<span className="text-xs text-gray-400 ml-1">
|
||||
({METHOD_LABELS[selectedShearMethod]})
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Preview + Save */}
|
||||
<div className="flex items-center gap-3 pt-2">
|
||||
<button
|
||||
onClick={handleFineTunePreview}
|
||||
disabled={isApplying}
|
||||
className="px-4 py-2 text-sm bg-teal-600 text-white rounded-md hover:bg-teal-700 disabled:opacity-50 transition-colors"
|
||||
>
|
||||
{isApplying ? 'Wird angewendet...' : 'Vorschau'}
|
||||
</button>
|
||||
<button
|
||||
onClick={() => {
|
||||
onGroundTruth({
|
||||
is_correct: false,
|
||||
corrected_shear: activeShear,
|
||||
notes: `Fine-tuned: rotation=${rotationSum.toFixed(3)}, shear=${activeShear.toFixed(3)} (${selectedShearMethod})`,
|
||||
})
|
||||
setGtSaved(true)
|
||||
}}
|
||||
disabled={gtSaved}
|
||||
className="px-4 py-2 text-sm bg-blue-600 text-white rounded-md hover:bg-blue-700 disabled:opacity-50 transition-colors"
|
||||
>
|
||||
{gtSaved ? 'Gespeichert' : 'Als Ground Truth speichern'}
|
||||
</button>
|
||||
<span className="text-xs text-gray-400">
|
||||
Rotation: {rotationSum >= 0 ? '+' : ''}{rotationSum.toFixed(2)}\u00B0 + Scherung: {activeShear >= 0 ? '+' : ''}{activeShear.toFixed(2)}\u00B0
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Ground Truth */}
|
||||
{dewarpResult && !showFineTune && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
||||
<div className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||
Spalten vertikal ausgerichtet?
|
||||
</div>
|
||||
<p className="text-xs text-gray-400 mb-2">Pruefen ob die Spaltenraender jetzt senkrecht zum Raster stehen.</p>
|
||||
{!gtSaved ? (
|
||||
<div className="space-y-3">
|
||||
<div className="flex gap-2">
|
||||
<button
|
||||
onClick={() => handleGroundTruth(true)}
|
||||
className={`px-4 py-1.5 rounded-md text-sm font-medium transition-colors ${
|
||||
gtFeedback === 'correct'
|
||||
? 'bg-green-100 text-green-700 ring-2 ring-green-400'
|
||||
: 'bg-gray-100 text-gray-600 hover:bg-green-50 dark:bg-gray-700 dark:text-gray-300'
|
||||
}`}
|
||||
>
|
||||
Ja
|
||||
</button>
|
||||
<button
|
||||
onClick={() => handleGroundTruth(false)}
|
||||
className={`px-4 py-1.5 rounded-md text-sm font-medium transition-colors ${
|
||||
gtFeedback === 'incorrect'
|
||||
? 'bg-red-100 text-red-700 ring-2 ring-red-400'
|
||||
: 'bg-gray-100 text-gray-600 hover:bg-red-50 dark:bg-gray-700 dark:text-gray-300'
|
||||
}`}
|
||||
>
|
||||
Nein
|
||||
</button>
|
||||
</div>
|
||||
{gtFeedback === 'incorrect' && (
|
||||
<div className="space-y-2">
|
||||
<textarea
|
||||
value={gtNotes}
|
||||
onChange={(e) => setGtNotes(e.target.value)}
|
||||
placeholder="Notizen zur Korrektur..."
|
||||
className="w-full text-sm border border-gray-300 dark:border-gray-600 rounded-md p-2 bg-white dark:bg-gray-900 text-gray-800 dark:text-gray-200"
|
||||
rows={2}
|
||||
/>
|
||||
<button
|
||||
onClick={handleGroundTruthIncorrect}
|
||||
className="text-sm px-3 py-1 bg-red-600 text-white rounded-md hover:bg-red-700 transition-colors"
|
||||
>
|
||||
Feedback speichern
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
) : (
|
||||
<div className="text-sm text-green-600 dark:text-green-400">
|
||||
Feedback gespeichert
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Next button */}
|
||||
{dewarpResult && (
|
||||
<div className="flex justify-end">
|
||||
<button
|
||||
onClick={onNext}
|
||||
className="px-6 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium transition-colors"
|
||||
>
|
||||
Uebernehmen & Weiter →
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -1,403 +0,0 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useRef, useState } from 'react'
|
||||
import type { GridCell } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
// Column type → colour mapping
|
||||
const COL_TYPE_COLORS: Record<string, string> = {
|
||||
column_en: '#3b82f6', // blue-500
|
||||
column_de: '#22c55e', // green-500
|
||||
column_example: '#f97316', // orange-500
|
||||
column_text: '#a855f7', // purple-500
|
||||
page_ref: '#06b6d4', // cyan-500
|
||||
column_marker: '#6b7280', // gray-500
|
||||
}
|
||||
|
||||
interface FabricReconstructionCanvasProps {
|
||||
sessionId: string
|
||||
cells: GridCell[]
|
||||
onCellsChanged: (updates: { cell_id: string; text: string }[]) => void
|
||||
}
|
||||
|
||||
// Fabric.js types (subset used here)
|
||||
interface FabricCanvas {
|
||||
add: (...objects: FabricObject[]) => FabricCanvas
|
||||
remove: (...objects: FabricObject[]) => FabricCanvas
|
||||
setBackgroundImage: (img: FabricImage, callback: () => void) => void
|
||||
renderAll: () => void
|
||||
getObjects: () => FabricObject[]
|
||||
dispose: () => void
|
||||
on: (event: string, handler: (e: FabricEvent) => void) => void
|
||||
setWidth: (w: number) => void
|
||||
setHeight: (h: number) => void
|
||||
getActiveObject: () => FabricObject | null
|
||||
discardActiveObject: () => FabricCanvas
|
||||
requestRenderAll: () => void
|
||||
setZoom: (z: number) => void
|
||||
getZoom: () => number
|
||||
}
|
||||
|
||||
interface FabricObject {
|
||||
type?: string
|
||||
left?: number
|
||||
top?: number
|
||||
width?: number
|
||||
height?: number
|
||||
text?: string
|
||||
set: (props: Record<string, unknown>) => FabricObject
|
||||
get: (prop: string) => unknown
|
||||
data?: Record<string, unknown>
|
||||
selectable?: boolean
|
||||
on?: (event: string, handler: () => void) => void
|
||||
setCoords?: () => void
|
||||
}
|
||||
|
||||
interface FabricImage extends FabricObject {
|
||||
width?: number
|
||||
height?: number
|
||||
scaleX?: number
|
||||
scaleY?: number
|
||||
}
|
||||
|
||||
interface FabricEvent {
|
||||
target?: FabricObject
|
||||
e?: MouseEvent
|
||||
}
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
type FabricModule = any
|
||||
|
||||
export function FabricReconstructionCanvas({
|
||||
sessionId,
|
||||
cells,
|
||||
onCellsChanged,
|
||||
}: FabricReconstructionCanvasProps) {
|
||||
const canvasElRef = useRef<HTMLCanvasElement>(null)
|
||||
const fabricRef = useRef<FabricCanvas | null>(null)
|
||||
const fabricModuleRef = useRef<FabricModule>(null)
|
||||
const [ready, setReady] = useState(false)
|
||||
const [opacity, setOpacity] = useState(30)
|
||||
const [zoom, setZoom] = useState(100)
|
||||
const [selectedCell, setSelectedCell] = useState<string | null>(null)
|
||||
const [error, setError] = useState('')
|
||||
|
||||
// Undo/Redo
|
||||
const undoStackRef = useRef<{ cellId: string; oldText: string; newText: string }[]>([])
|
||||
const redoStackRef = useRef<{ cellId: string; oldText: string; newText: string }[]>([])
|
||||
|
||||
// ---- Initialise Fabric.js ----
|
||||
useEffect(() => {
|
||||
let disposed = false
|
||||
|
||||
async function init() {
|
||||
try {
|
||||
const fabricModule = await import('fabric')
|
||||
if (disposed) return
|
||||
fabricModuleRef.current = fabricModule
|
||||
|
||||
const canvasEl = canvasElRef.current
|
||||
if (!canvasEl) return
|
||||
|
||||
// Load background image first to get dimensions
|
||||
const imgUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
|
||||
|
||||
const bgImg = await fabricModule.FabricImage.fromURL(imgUrl, { crossOrigin: 'anonymous' }) as FabricImage
|
||||
|
||||
if (disposed) return
|
||||
|
||||
const imgW = (bgImg.width || 800) * (bgImg.scaleX || 1)
|
||||
const imgH = (bgImg.height || 600) * (bgImg.scaleY || 1)
|
||||
|
||||
bgImg.set({ opacity: opacity / 100, selectable: false, evented: false } as Record<string, unknown>)
|
||||
|
||||
const canvas = new fabricModule.Canvas(canvasEl, {
|
||||
width: imgW,
|
||||
height: imgH,
|
||||
selection: true,
|
||||
preserveObjectStacking: true,
|
||||
backgroundImage: bgImg,
|
||||
}) as unknown as FabricCanvas
|
||||
|
||||
fabricRef.current = canvas
|
||||
canvas.renderAll()
|
||||
|
||||
// Add cell objects
|
||||
addCellObjects(canvas, fabricModule, cells, imgW, imgH)
|
||||
|
||||
// Listen for text changes
|
||||
canvas.on('object:modified', (e: FabricEvent) => {
|
||||
if (e.target?.data?.cellId) {
|
||||
const cellId = e.target.data.cellId as string
|
||||
const newText = (e.target.text || '') as string
|
||||
onCellsChanged([{ cell_id: cellId, text: newText }])
|
||||
}
|
||||
})
|
||||
|
||||
// Selection tracking
|
||||
canvas.on('selection:created', (e: FabricEvent) => {
|
||||
if (e.target?.data?.cellId) setSelectedCell(e.target.data.cellId as string)
|
||||
})
|
||||
canvas.on('selection:updated', (e: FabricEvent) => {
|
||||
if (e.target?.data?.cellId) setSelectedCell(e.target.data.cellId as string)
|
||||
})
|
||||
canvas.on('selection:cleared', () => setSelectedCell(null))
|
||||
|
||||
setReady(true)
|
||||
} catch (err) {
|
||||
if (!disposed) setError(err instanceof Error ? err.message : 'Fabric.js konnte nicht geladen werden')
|
||||
}
|
||||
}
|
||||
|
||||
init()
|
||||
|
||||
return () => {
|
||||
disposed = true
|
||||
fabricRef.current?.dispose()
|
||||
fabricRef.current = null
|
||||
}
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [sessionId])
|
||||
|
||||
function addCellObjects(
|
||||
canvas: FabricCanvas,
|
||||
fabricModule: FabricModule,
|
||||
gridCells: GridCell[],
|
||||
imgW: number,
|
||||
imgH: number,
|
||||
) {
|
||||
for (const cell of gridCells) {
|
||||
const color = COL_TYPE_COLORS[cell.col_type] || '#6b7280'
|
||||
const x = (cell.bbox_pct.x / 100) * imgW
|
||||
const y = (cell.bbox_pct.y / 100) * imgH
|
||||
const w = (cell.bbox_pct.w / 100) * imgW
|
||||
const h = (cell.bbox_pct.h / 100) * imgH
|
||||
|
||||
const fontSize = Math.max(8, Math.min(18, h * 0.55))
|
||||
|
||||
const textObj = new fabricModule.IText(cell.text || '', {
|
||||
left: x,
|
||||
top: y,
|
||||
width: w,
|
||||
fontSize,
|
||||
fontFamily: 'monospace',
|
||||
fill: '#000000',
|
||||
backgroundColor: `${color}22`,
|
||||
padding: 2,
|
||||
editable: true,
|
||||
selectable: true,
|
||||
lockScalingFlip: true,
|
||||
data: {
|
||||
cellId: cell.cell_id,
|
||||
colType: cell.col_type,
|
||||
rowIndex: cell.row_index,
|
||||
colIndex: cell.col_index,
|
||||
originalText: cell.text,
|
||||
},
|
||||
})
|
||||
|
||||
// Border colour matches column type
|
||||
textObj.set({
|
||||
borderColor: color,
|
||||
cornerColor: color,
|
||||
cornerSize: 6,
|
||||
transparentCorners: false,
|
||||
} as Record<string, unknown>)
|
||||
|
||||
canvas.add(textObj)
|
||||
}
|
||||
canvas.renderAll()
|
||||
}
|
||||
|
||||
// ---- Opacity slider ----
|
||||
const handleOpacityChange = useCallback((val: number) => {
|
||||
setOpacity(val)
|
||||
const canvas = fabricRef.current
|
||||
if (!canvas) return
|
||||
// Fabric v6: backgroundImage is a direct property on the canvas
|
||||
const bgImg = (canvas as unknown as { backgroundImage?: FabricObject }).backgroundImage
|
||||
if (bgImg) {
|
||||
bgImg.set({ opacity: val / 100 })
|
||||
canvas.renderAll()
|
||||
}
|
||||
}, [])
|
||||
|
||||
// ---- Zoom ----
|
||||
const handleZoomChange = useCallback((val: number) => {
|
||||
setZoom(val)
|
||||
const canvas = fabricRef.current
|
||||
if (!canvas) return
|
||||
;(canvas as unknown as { zoom: number }).zoom = val / 100
|
||||
canvas.requestRenderAll()
|
||||
}, [])
|
||||
|
||||
// ---- Undo / Redo via keyboard ----
|
||||
useEffect(() => {
|
||||
const handler = (e: KeyboardEvent) => {
|
||||
if (!(e.metaKey || e.ctrlKey) || e.key !== 'z') return
|
||||
e.preventDefault()
|
||||
|
||||
const canvas = fabricRef.current
|
||||
if (!canvas) return
|
||||
|
||||
if (e.shiftKey) {
|
||||
// Redo
|
||||
const action = redoStackRef.current.pop()
|
||||
if (!action) return
|
||||
undoStackRef.current.push(action)
|
||||
const obj = canvas.getObjects().find(
|
||||
(o: FabricObject) => o.data?.cellId === action.cellId
|
||||
)
|
||||
if (obj) {
|
||||
obj.set({ text: action.newText } as Record<string, unknown>)
|
||||
canvas.renderAll()
|
||||
onCellsChanged([{ cell_id: action.cellId, text: action.newText }])
|
||||
}
|
||||
} else {
|
||||
// Undo
|
||||
const action = undoStackRef.current.pop()
|
||||
if (!action) return
|
||||
redoStackRef.current.push(action)
|
||||
const obj = canvas.getObjects().find(
|
||||
(o: FabricObject) => o.data?.cellId === action.cellId
|
||||
)
|
||||
if (obj) {
|
||||
obj.set({ text: action.oldText } as Record<string, unknown>)
|
||||
canvas.renderAll()
|
||||
onCellsChanged([{ cell_id: action.cellId, text: action.oldText }])
|
||||
}
|
||||
}
|
||||
}
|
||||
document.addEventListener('keydown', handler)
|
||||
return () => document.removeEventListener('keydown', handler)
|
||||
}, [onCellsChanged])
|
||||
|
||||
// ---- Delete selected cell (via context-menu or Delete key) ----
|
||||
useEffect(() => {
|
||||
const handler = (e: KeyboardEvent) => {
|
||||
if (e.key !== 'Delete' && e.key !== 'Backspace') return
|
||||
// Only delete if not currently editing text inside an IText
|
||||
const canvas = fabricRef.current
|
||||
if (!canvas) return
|
||||
const active = canvas.getActiveObject()
|
||||
if (!active) return
|
||||
// If the IText is in editing mode, let the keypress pass through
|
||||
if ((active as unknown as Record<string, boolean>).isEditing) return
|
||||
e.preventDefault()
|
||||
canvas.remove(active)
|
||||
canvas.discardActiveObject()
|
||||
canvas.renderAll()
|
||||
}
|
||||
document.addEventListener('keydown', handler)
|
||||
return () => document.removeEventListener('keydown', handler)
|
||||
}, [])
|
||||
|
||||
// ---- Export helpers ----
|
||||
const handleExportPdf = useCallback(() => {
|
||||
window.open(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reconstruction/export/pdf`,
|
||||
'_blank'
|
||||
)
|
||||
}, [sessionId])
|
||||
|
||||
const handleExportDocx = useCallback(() => {
|
||||
window.open(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reconstruction/export/docx`,
|
||||
'_blank'
|
||||
)
|
||||
}, [sessionId])
|
||||
|
||||
if (error) {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-8 text-red-500 text-sm">
|
||||
<p>Fabric.js Editor konnte nicht geladen werden:</p>
|
||||
<p className="text-xs mt-1 text-gray-400">{error}</p>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-2">
|
||||
{/* Toolbar */}
|
||||
<div className="flex items-center gap-3 bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 px-3 py-2 text-xs">
|
||||
{/* Opacity slider */}
|
||||
<label className="flex items-center gap-1.5 text-gray-500">
|
||||
Hintergrund
|
||||
<input
|
||||
type="range"
|
||||
min={0} max={100}
|
||||
value={opacity}
|
||||
onChange={e => handleOpacityChange(Number(e.target.value))}
|
||||
className="w-20 h-1 accent-teal-500"
|
||||
/>
|
||||
<span className="w-8 text-right">{opacity}%</span>
|
||||
</label>
|
||||
|
||||
<div className="w-px h-5 bg-gray-300 dark:bg-gray-600" />
|
||||
|
||||
{/* Zoom */}
|
||||
<label className="flex items-center gap-1.5 text-gray-500">
|
||||
Zoom
|
||||
<button onClick={() => handleZoomChange(Math.max(25, zoom - 25))}
|
||||
className="px-1.5 py-0.5 border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700">
|
||||
−
|
||||
</button>
|
||||
<span className="w-8 text-center">{zoom}%</span>
|
||||
<button onClick={() => handleZoomChange(Math.min(200, zoom + 25))}
|
||||
className="px-1.5 py-0.5 border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700">
|
||||
+
|
||||
</button>
|
||||
<button onClick={() => handleZoomChange(100)}
|
||||
className="px-1.5 py-0.5 border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700">
|
||||
Fit
|
||||
</button>
|
||||
</label>
|
||||
|
||||
<div className="w-px h-5 bg-gray-300 dark:bg-gray-600" />
|
||||
|
||||
{/* Selected cell info */}
|
||||
{selectedCell && (
|
||||
<span className="text-gray-400">
|
||||
Zelle: <span className="text-gray-600 dark:text-gray-300">{selectedCell}</span>
|
||||
</span>
|
||||
)}
|
||||
|
||||
<div className="flex-1" />
|
||||
|
||||
{/* Export buttons */}
|
||||
<button onClick={handleExportPdf}
|
||||
className="px-2.5 py-1 border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700">
|
||||
PDF
|
||||
</button>
|
||||
<button onClick={handleExportDocx}
|
||||
className="px-2.5 py-1 border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700">
|
||||
DOCX
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Canvas */}
|
||||
<div className="border rounded-lg overflow-auto dark:border-gray-700 bg-gray-100 dark:bg-gray-900"
|
||||
style={{ maxHeight: '75vh' }}>
|
||||
{!ready && (
|
||||
<div className="flex items-center justify-center py-12">
|
||||
<div className="animate-spin rounded-full h-5 w-5 border-b-2 border-teal-500" />
|
||||
<span className="ml-2 text-sm text-gray-500">Canvas wird geladen...</span>
|
||||
</div>
|
||||
)}
|
||||
<canvas ref={canvasElRef} />
|
||||
</div>
|
||||
|
||||
{/* Legend */}
|
||||
<div className="flex items-center gap-4 text-xs text-gray-500">
|
||||
{Object.entries(COL_TYPE_COLORS).map(([type, color]) => (
|
||||
<span key={type} className="flex items-center gap-1">
|
||||
<span className="w-3 h-3 rounded" style={{ backgroundColor: color + '44', border: `1px solid ${color}` }} />
|
||||
{type.replace('column_', '').replace('page_', '')}
|
||||
</span>
|
||||
))}
|
||||
<span className="ml-auto text-gray-400">Doppelklick = Text bearbeiten | Delete = Zelle entfernen | Cmd+Z = Undo</span>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -1,143 +0,0 @@
|
||||
'use client'
|
||||
|
||||
import { useState } from 'react'
|
||||
|
||||
const A4_WIDTH_MM = 210
|
||||
const A4_HEIGHT_MM = 297
|
||||
|
||||
interface ImageCompareViewProps {
|
||||
originalUrl: string | null
|
||||
deskewedUrl: string | null
|
||||
showGrid: boolean
|
||||
showGridLeft?: boolean
|
||||
showBinarized: boolean
|
||||
binarizedUrl: string | null
|
||||
leftLabel?: string
|
||||
rightLabel?: string
|
||||
}
|
||||
|
||||
function MmGridOverlay() {
|
||||
const lines: React.ReactNode[] = []
|
||||
|
||||
// Vertical lines every 10mm
|
||||
for (let mm = 0; mm <= A4_WIDTH_MM; mm += 10) {
|
||||
const x = (mm / A4_WIDTH_MM) * 100
|
||||
const is50 = mm % 50 === 0
|
||||
lines.push(
|
||||
<line
|
||||
key={`v-${mm}`}
|
||||
x1={x} y1={0} x2={x} y2={100}
|
||||
stroke={is50 ? 'rgba(59, 130, 246, 0.4)' : 'rgba(59, 130, 246, 0.15)'}
|
||||
strokeWidth={is50 ? 0.12 : 0.05}
|
||||
/>
|
||||
)
|
||||
// Label every 50mm
|
||||
if (is50 && mm > 0) {
|
||||
lines.push(
|
||||
<text key={`vl-${mm}`} x={x} y={1.2} fill="rgba(59,130,246,0.6)" fontSize="1.2" textAnchor="middle">
|
||||
{mm}
|
||||
</text>
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Horizontal lines every 10mm
|
||||
for (let mm = 0; mm <= A4_HEIGHT_MM; mm += 10) {
|
||||
const y = (mm / A4_HEIGHT_MM) * 100
|
||||
const is50 = mm % 50 === 0
|
||||
lines.push(
|
||||
<line
|
||||
key={`h-${mm}`}
|
||||
x1={0} y1={y} x2={100} y2={y}
|
||||
stroke={is50 ? 'rgba(59, 130, 246, 0.4)' : 'rgba(59, 130, 246, 0.15)'}
|
||||
strokeWidth={is50 ? 0.12 : 0.05}
|
||||
/>
|
||||
)
|
||||
if (is50 && mm > 0) {
|
||||
lines.push(
|
||||
<text key={`hl-${mm}`} x={0.5} y={y + 0.6} fill="rgba(59,130,246,0.6)" fontSize="1.2">
|
||||
{mm}
|
||||
</text>
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<svg
|
||||
viewBox="0 0 100 100"
|
||||
preserveAspectRatio="none"
|
||||
className="absolute inset-0 w-full h-full pointer-events-none"
|
||||
style={{ zIndex: 10 }}
|
||||
>
|
||||
<g style={{ pointerEvents: 'none' }}>{lines}</g>
|
||||
</svg>
|
||||
)
|
||||
}
|
||||
|
||||
export function ImageCompareView({
|
||||
originalUrl,
|
||||
deskewedUrl,
|
||||
showGrid,
|
||||
showGridLeft,
|
||||
showBinarized,
|
||||
binarizedUrl,
|
||||
leftLabel,
|
||||
rightLabel,
|
||||
}: ImageCompareViewProps) {
|
||||
const [leftError, setLeftError] = useState(false)
|
||||
const [rightError, setRightError] = useState(false)
|
||||
|
||||
const rightUrl = showBinarized && binarizedUrl ? binarizedUrl : deskewedUrl
|
||||
|
||||
return (
|
||||
<div className="grid grid-cols-1 lg:grid-cols-2 gap-4">
|
||||
{/* Left: Original */}
|
||||
<div className="space-y-2">
|
||||
<h3 className="text-sm font-medium text-gray-500 dark:text-gray-400">{leftLabel || 'Original (unbearbeitet)'}</h3>
|
||||
<div className="relative bg-gray-100 dark:bg-gray-900 rounded-lg overflow-hidden border border-gray-200 dark:border-gray-700"
|
||||
style={{ aspectRatio: '210/297' }}>
|
||||
{originalUrl && !leftError ? (
|
||||
<>
|
||||
<img
|
||||
src={originalUrl}
|
||||
alt="Original Scan"
|
||||
className="w-full h-full object-contain"
|
||||
onError={() => setLeftError(true)}
|
||||
/>
|
||||
{showGridLeft && <MmGridOverlay />}
|
||||
</>
|
||||
) : (
|
||||
<div className="flex items-center justify-center h-full text-gray-400">
|
||||
{leftError ? 'Fehler beim Laden' : 'Noch kein Bild'}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Right: Deskewed with Grid */}
|
||||
<div className="space-y-2">
|
||||
<h3 className="text-sm font-medium text-gray-500 dark:text-gray-400">
|
||||
{rightLabel || `${showBinarized ? 'Binarisiert' : 'Begradigt'}${showGrid ? ' + Raster (mm)' : ''}`}
|
||||
</h3>
|
||||
<div className="relative bg-gray-100 dark:bg-gray-900 rounded-lg overflow-hidden border border-gray-200 dark:border-gray-700"
|
||||
style={{ aspectRatio: '210/297' }}>
|
||||
{rightUrl && !rightError ? (
|
||||
<>
|
||||
<img
|
||||
src={rightUrl}
|
||||
alt={rightLabel || 'Bearbeitetes Bild'}
|
||||
className="w-full h-full object-contain"
|
||||
onError={() => setRightError(true)}
|
||||
/>
|
||||
{showGrid && <MmGridOverlay />}
|
||||
</>
|
||||
) : (
|
||||
<div className="flex items-center justify-center h-full text-gray-400">
|
||||
{rightError ? 'Fehler beim Laden' : `${rightLabel || 'Verarbeitung'} laeuft...`}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -1,359 +0,0 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useRef, useState } from 'react'
|
||||
import type { ColumnTypeKey, PageRegion } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
const COLUMN_TYPES: { value: ColumnTypeKey; label: string }[] = [
|
||||
{ value: 'column_en', label: 'EN' },
|
||||
{ value: 'column_de', label: 'DE' },
|
||||
{ value: 'column_example', label: 'Beispiel' },
|
||||
{ value: 'column_text', label: 'Text' },
|
||||
{ value: 'page_ref', label: 'Seite' },
|
||||
{ value: 'column_marker', label: 'Marker' },
|
||||
{ value: 'column_ignore', label: 'Ignorieren' },
|
||||
]
|
||||
|
||||
const TYPE_OVERLAY_COLORS: Record<string, string> = {
|
||||
column_en: 'rgba(59, 130, 246, 0.12)',
|
||||
column_de: 'rgba(34, 197, 94, 0.12)',
|
||||
column_example: 'rgba(249, 115, 22, 0.12)',
|
||||
column_text: 'rgba(6, 182, 212, 0.12)',
|
||||
page_ref: 'rgba(168, 85, 247, 0.12)',
|
||||
column_marker: 'rgba(239, 68, 68, 0.12)',
|
||||
column_ignore: 'rgba(128, 128, 128, 0.06)',
|
||||
}
|
||||
|
||||
const TYPE_BADGE_COLORS: Record<string, string> = {
|
||||
column_en: 'bg-blue-100 text-blue-700 dark:bg-blue-900/30 dark:text-blue-400',
|
||||
column_de: 'bg-green-100 text-green-700 dark:bg-green-900/30 dark:text-green-400',
|
||||
column_example: 'bg-orange-100 text-orange-700 dark:bg-orange-900/30 dark:text-orange-400',
|
||||
column_text: 'bg-cyan-100 text-cyan-700 dark:bg-cyan-900/30 dark:text-cyan-400',
|
||||
page_ref: 'bg-purple-100 text-purple-700 dark:bg-purple-900/30 dark:text-purple-400',
|
||||
column_marker: 'bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400',
|
||||
column_ignore: 'bg-gray-100 text-gray-500 dark:bg-gray-700/30 dark:text-gray-500',
|
||||
}
|
||||
|
||||
// Default column type sequence for newly created columns
|
||||
const DEFAULT_TYPE_SEQUENCE: ColumnTypeKey[] = [
|
||||
'page_ref', 'column_en', 'column_de', 'column_example', 'column_text',
|
||||
]
|
||||
|
||||
const MIN_DIVIDER_DISTANCE_PERCENT = 2 // Minimum 2% apart
|
||||
|
||||
interface ManualColumnEditorProps {
|
||||
imageUrl: string
|
||||
imageWidth: number
|
||||
imageHeight: number
|
||||
onApply: (columns: PageRegion[]) => void
|
||||
onCancel: () => void
|
||||
applying: boolean
|
||||
mode?: 'manual' | 'ground-truth'
|
||||
layout?: 'two-column' | 'stacked'
|
||||
initialDividers?: number[]
|
||||
initialColumnTypes?: ColumnTypeKey[]
|
||||
}
|
||||
|
||||
export function ManualColumnEditor({
|
||||
imageUrl,
|
||||
imageWidth,
|
||||
imageHeight,
|
||||
onApply,
|
||||
onCancel,
|
||||
applying,
|
||||
mode = 'manual',
|
||||
layout = 'two-column',
|
||||
initialDividers,
|
||||
initialColumnTypes,
|
||||
}: ManualColumnEditorProps) {
|
||||
const containerRef = useRef<HTMLDivElement>(null)
|
||||
const [dividers, setDividers] = useState<number[]>(initialDividers ?? [])
|
||||
const [columnTypes, setColumnTypes] = useState<ColumnTypeKey[]>(initialColumnTypes ?? [])
|
||||
const [dragging, setDragging] = useState<number | null>(null)
|
||||
const [imageLoaded, setImageLoaded] = useState(false)
|
||||
|
||||
const isGT = mode === 'ground-truth'
|
||||
|
||||
// Sync columnTypes length when dividers change
|
||||
useEffect(() => {
|
||||
const numColumns = dividers.length + 1
|
||||
setColumnTypes(prev => {
|
||||
if (prev.length === numColumns) return prev
|
||||
const next = [...prev]
|
||||
while (next.length < numColumns) {
|
||||
const idx = next.length
|
||||
next.push(DEFAULT_TYPE_SEQUENCE[idx] || 'column_text')
|
||||
}
|
||||
while (next.length > numColumns) {
|
||||
next.pop()
|
||||
}
|
||||
return next
|
||||
})
|
||||
}, [dividers.length])
|
||||
|
||||
const getXPercent = useCallback((clientX: number): number => {
|
||||
if (!containerRef.current) return 0
|
||||
const rect = containerRef.current.getBoundingClientRect()
|
||||
const pct = ((clientX - rect.left) / rect.width) * 100
|
||||
return Math.max(0, Math.min(100, pct))
|
||||
}, [])
|
||||
|
||||
const canPlaceDivider = useCallback((xPct: number, excludeIndex?: number): boolean => {
|
||||
for (let i = 0; i < dividers.length; i++) {
|
||||
if (i === excludeIndex) continue
|
||||
if (Math.abs(dividers[i] - xPct) < MIN_DIVIDER_DISTANCE_PERCENT) return false
|
||||
}
|
||||
return xPct > MIN_DIVIDER_DISTANCE_PERCENT && xPct < (100 - MIN_DIVIDER_DISTANCE_PERCENT)
|
||||
}, [dividers])
|
||||
|
||||
// Click on image to add a divider
|
||||
const handleImageClick = useCallback((e: React.MouseEvent) => {
|
||||
if (dragging !== null) return
|
||||
// Don't add if clicking on a divider handle
|
||||
if ((e.target as HTMLElement).dataset.divider) return
|
||||
|
||||
const xPct = getXPercent(e.clientX)
|
||||
if (!canPlaceDivider(xPct)) return
|
||||
|
||||
setDividers(prev => [...prev, xPct].sort((a, b) => a - b))
|
||||
}, [dragging, getXPercent, canPlaceDivider])
|
||||
|
||||
// Drag handlers
|
||||
const handleDividerMouseDown = useCallback((e: React.MouseEvent, index: number) => {
|
||||
e.stopPropagation()
|
||||
e.preventDefault()
|
||||
setDragging(index)
|
||||
}, [])
|
||||
|
||||
useEffect(() => {
|
||||
if (dragging === null) return
|
||||
|
||||
const handleMouseMove = (e: MouseEvent) => {
|
||||
const xPct = getXPercent(e.clientX)
|
||||
if (canPlaceDivider(xPct, dragging)) {
|
||||
setDividers(prev => {
|
||||
const next = [...prev]
|
||||
next[dragging] = xPct
|
||||
return next.sort((a, b) => a - b)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
const handleMouseUp = () => {
|
||||
setDragging(null)
|
||||
}
|
||||
|
||||
window.addEventListener('mousemove', handleMouseMove)
|
||||
window.addEventListener('mouseup', handleMouseUp)
|
||||
return () => {
|
||||
window.removeEventListener('mousemove', handleMouseMove)
|
||||
window.removeEventListener('mouseup', handleMouseUp)
|
||||
}
|
||||
}, [dragging, getXPercent, canPlaceDivider])
|
||||
|
||||
const removeDivider = useCallback((index: number) => {
|
||||
setDividers(prev => prev.filter((_, i) => i !== index))
|
||||
}, [])
|
||||
|
||||
const updateColumnType = useCallback((colIndex: number, type: ColumnTypeKey) => {
|
||||
setColumnTypes(prev => {
|
||||
const next = [...prev]
|
||||
next[colIndex] = type
|
||||
return next
|
||||
})
|
||||
}, [])
|
||||
|
||||
const handleApply = useCallback(() => {
|
||||
// Build PageRegion array from dividers
|
||||
const sorted = [...dividers].sort((a, b) => a - b)
|
||||
const columns: PageRegion[] = []
|
||||
|
||||
for (let i = 0; i <= sorted.length; i++) {
|
||||
const leftPct = i === 0 ? 0 : sorted[i - 1]
|
||||
const rightPct = i === sorted.length ? 100 : sorted[i]
|
||||
const x = Math.round((leftPct / 100) * imageWidth)
|
||||
const w = Math.round(((rightPct - leftPct) / 100) * imageWidth)
|
||||
|
||||
columns.push({
|
||||
type: columnTypes[i] || 'column_text',
|
||||
x,
|
||||
y: 0,
|
||||
width: w,
|
||||
height: imageHeight,
|
||||
classification_confidence: 1.0,
|
||||
classification_method: 'manual',
|
||||
})
|
||||
}
|
||||
|
||||
onApply(columns)
|
||||
}, [dividers, columnTypes, imageWidth, imageHeight, onApply])
|
||||
|
||||
// Compute column regions for overlay
|
||||
const sorted = [...dividers].sort((a, b) => a - b)
|
||||
const columnRegions = Array.from({ length: sorted.length + 1 }, (_, i) => ({
|
||||
leftPct: i === 0 ? 0 : sorted[i - 1],
|
||||
rightPct: i === sorted.length ? 100 : sorted[i],
|
||||
type: columnTypes[i] || 'column_text',
|
||||
}))
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Layout: image + controls */}
|
||||
<div className={layout === 'stacked' ? 'space-y-4' : 'grid grid-cols-2 gap-4'}>
|
||||
{/* Left: Interactive image */}
|
||||
<div>
|
||||
<div className="flex items-center justify-between mb-1">
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400">
|
||||
Klicken um Trennlinien zu setzen
|
||||
</div>
|
||||
<button
|
||||
onClick={onCancel}
|
||||
className="text-xs px-2 py-0.5 text-gray-500 hover:text-gray-700 dark:text-gray-400 dark:hover:text-gray-200"
|
||||
>
|
||||
Abbrechen
|
||||
</button>
|
||||
</div>
|
||||
<div
|
||||
ref={containerRef}
|
||||
className="relative border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900 cursor-crosshair select-none"
|
||||
onClick={handleImageClick}
|
||||
>
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
<img
|
||||
src={imageUrl}
|
||||
alt="Entzerrtes Bild"
|
||||
className="w-full h-auto block"
|
||||
draggable={false}
|
||||
onLoad={() => setImageLoaded(true)}
|
||||
/>
|
||||
|
||||
{imageLoaded && (
|
||||
<>
|
||||
{/* Column overlays */}
|
||||
{columnRegions.map((region, i) => (
|
||||
<div
|
||||
key={`col-${i}`}
|
||||
className="absolute top-0 bottom-0 pointer-events-none"
|
||||
style={{
|
||||
left: `${region.leftPct}%`,
|
||||
width: `${region.rightPct - region.leftPct}%`,
|
||||
backgroundColor: TYPE_OVERLAY_COLORS[region.type] || 'rgba(128,128,128,0.08)',
|
||||
}}
|
||||
>
|
||||
<span className="absolute top-1 left-1/2 -translate-x-1/2 text-[10px] font-medium text-gray-600 dark:text-gray-300 bg-white/80 dark:bg-gray-800/80 px-1 rounded">
|
||||
{i + 1}
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
|
||||
{/* Divider lines */}
|
||||
{sorted.map((xPct, i) => (
|
||||
<div
|
||||
key={`div-${i}`}
|
||||
data-divider="true"
|
||||
className="absolute top-0 bottom-0 group"
|
||||
style={{
|
||||
left: `${xPct}%`,
|
||||
transform: 'translateX(-50%)',
|
||||
width: '12px',
|
||||
cursor: 'col-resize',
|
||||
zIndex: 10,
|
||||
}}
|
||||
onMouseDown={(e) => handleDividerMouseDown(e, i)}
|
||||
>
|
||||
{/* Visible line */}
|
||||
<div
|
||||
data-divider="true"
|
||||
className="absolute top-0 bottom-0 left-1/2 -translate-x-1/2 w-0.5 border-l-2 border-dashed border-red-500"
|
||||
/>
|
||||
{/* Delete button */}
|
||||
<button
|
||||
data-divider="true"
|
||||
onClick={(e) => {
|
||||
e.stopPropagation()
|
||||
removeDivider(i)
|
||||
}}
|
||||
className="absolute top-2 left-1/2 -translate-x-1/2 w-4 h-4 bg-red-500 text-white rounded-full text-[10px] leading-none flex items-center justify-center opacity-0 group-hover:opacity-100 transition-opacity z-20"
|
||||
title="Linie entfernen"
|
||||
>
|
||||
x
|
||||
</button>
|
||||
</div>
|
||||
))}
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Right: Column type assignment + actions */}
|
||||
<div className="space-y-4">
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Spaltentypen
|
||||
</div>
|
||||
|
||||
{dividers.length === 0 ? (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-6 text-center">
|
||||
<div className="text-3xl mb-2">👆</div>
|
||||
<p className="text-sm text-gray-500 dark:text-gray-400">
|
||||
Klicken Sie auf das Bild links, um vertikale Trennlinien zwischen den Spalten zu setzen.
|
||||
</p>
|
||||
<p className="text-xs text-gray-400 dark:text-gray-500 mt-2">
|
||||
Linien koennen per Drag verschoben und per Hover geloescht werden.
|
||||
</p>
|
||||
</div>
|
||||
) : (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-3">
|
||||
<div className="text-sm text-gray-600 dark:text-gray-400">
|
||||
<span className="font-medium text-gray-800 dark:text-gray-200">
|
||||
{dividers.length} Linien = {dividers.length + 1} Spalten
|
||||
</span>
|
||||
</div>
|
||||
<div className="grid gap-2">
|
||||
{columnRegions.map((region, i) => (
|
||||
<div key={i} className="flex items-center gap-3">
|
||||
<span className={`w-16 text-center px-2 py-0.5 rounded text-xs font-medium ${TYPE_BADGE_COLORS[region.type] || 'bg-gray-100 text-gray-600'}`}>
|
||||
Spalte {i + 1}
|
||||
</span>
|
||||
<select
|
||||
value={columnTypes[i] || 'column_text'}
|
||||
onChange={(e) => updateColumnType(i, e.target.value as ColumnTypeKey)}
|
||||
className="text-sm border border-gray-200 dark:border-gray-600 rounded px-2 py-1 bg-white dark:bg-gray-700 text-gray-800 dark:text-gray-200"
|
||||
>
|
||||
{COLUMN_TYPES.map(t => (
|
||||
<option key={t.value} value={t.value}>{t.label}</option>
|
||||
))}
|
||||
</select>
|
||||
<span className="text-xs text-gray-400 font-mono">
|
||||
{Math.round(region.rightPct - region.leftPct)}%
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Action buttons */}
|
||||
<div className="flex flex-col gap-2">
|
||||
<button
|
||||
onClick={handleApply}
|
||||
disabled={dividers.length === 0 || applying}
|
||||
className="w-full px-4 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors text-sm font-medium disabled:opacity-50 disabled:cursor-not-allowed"
|
||||
>
|
||||
{applying
|
||||
? 'Wird gespeichert...'
|
||||
: isGT
|
||||
? `${dividers.length + 1} Spalten als Ground Truth speichern`
|
||||
: `${dividers.length + 1} Spalten uebernehmen`}
|
||||
</button>
|
||||
<button
|
||||
onClick={() => setDividers([])}
|
||||
disabled={dividers.length === 0}
|
||||
className="text-xs px-3 py-2 text-gray-500 hover:text-gray-700 dark:text-gray-400 dark:hover:text-gray-200 disabled:opacity-50"
|
||||
>
|
||||
Alle Linien entfernen
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -1,115 +0,0 @@
|
||||
'use client'
|
||||
|
||||
import { PipelineStep, DocumentTypeResult } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
const DOC_TYPE_LABELS: Record<string, string> = {
|
||||
vocab_table: 'Vokabeltabelle',
|
||||
full_text: 'Volltext',
|
||||
generic_table: 'Tabelle',
|
||||
}
|
||||
|
||||
interface PipelineStepperProps {
|
||||
steps: PipelineStep[]
|
||||
currentStep: number
|
||||
onStepClick: (index: number) => void
|
||||
onReprocess?: (index: number) => void
|
||||
docTypeResult?: DocumentTypeResult | null
|
||||
onDocTypeChange?: (docType: DocumentTypeResult['doc_type']) => void
|
||||
}
|
||||
|
||||
export function PipelineStepper({
|
||||
steps,
|
||||
currentStep,
|
||||
onStepClick,
|
||||
onReprocess,
|
||||
docTypeResult,
|
||||
onDocTypeChange,
|
||||
}: PipelineStepperProps) {
|
||||
return (
|
||||
<div className="space-y-2">
|
||||
<div className="flex items-center justify-between px-4 py-3 bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700">
|
||||
{steps.map((step, index) => {
|
||||
const isActive = index === currentStep
|
||||
const isCompleted = step.status === 'completed'
|
||||
const isFailed = step.status === 'failed'
|
||||
const isSkipped = step.status === 'skipped'
|
||||
const isClickable = (index <= currentStep || isCompleted) && !isSkipped
|
||||
|
||||
return (
|
||||
<div key={step.id} className="flex items-center">
|
||||
{index > 0 && (
|
||||
<div
|
||||
className={`h-0.5 w-8 mx-1 ${
|
||||
isSkipped
|
||||
? 'bg-gray-200 dark:bg-gray-700 border-t border-dashed border-gray-400'
|
||||
: index <= currentStep ? 'bg-teal-400' : 'bg-gray-300 dark:bg-gray-600'
|
||||
}`}
|
||||
/>
|
||||
)}
|
||||
<div className="relative group">
|
||||
<button
|
||||
onClick={() => isClickable && onStepClick(index)}
|
||||
disabled={!isClickable}
|
||||
className={`flex items-center gap-1.5 px-3 py-1.5 rounded-full text-sm font-medium transition-all ${
|
||||
isSkipped
|
||||
? 'bg-gray-100 text-gray-400 dark:bg-gray-800 dark:text-gray-600 line-through'
|
||||
: isActive
|
||||
? 'bg-teal-100 text-teal-700 dark:bg-teal-900/40 dark:text-teal-300 ring-2 ring-teal-400'
|
||||
: isCompleted
|
||||
? 'bg-green-100 text-green-700 dark:bg-green-900/40 dark:text-green-300'
|
||||
: isFailed
|
||||
? 'bg-red-100 text-red-700 dark:bg-red-900/40 dark:text-red-300'
|
||||
: 'text-gray-400 dark:text-gray-500'
|
||||
} ${isClickable ? 'cursor-pointer hover:opacity-80' : 'cursor-default'}`}
|
||||
>
|
||||
<span className="text-base">
|
||||
{isSkipped ? '-' : isCompleted ? '\u2713' : isFailed ? '\u2717' : step.icon}
|
||||
</span>
|
||||
<span className="hidden sm:inline">{step.name}</span>
|
||||
<span className="sm:hidden">{index + 1}</span>
|
||||
</button>
|
||||
{/* Reprocess button — shown on completed steps on hover */}
|
||||
{isCompleted && onReprocess && (
|
||||
<button
|
||||
onClick={(e) => { e.stopPropagation(); onReprocess(index) }}
|
||||
className="absolute -top-1 -right-1 w-4 h-4 bg-orange-500 text-white rounded-full text-[9px] leading-none opacity-0 group-hover:opacity-100 transition-opacity flex items-center justify-center"
|
||||
title={`Ab hier neu verarbeiten`}
|
||||
>
|
||||
↻
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
|
||||
{/* Document type badge */}
|
||||
{docTypeResult && (
|
||||
<div className="flex items-center gap-2 px-4 py-2 bg-blue-50 dark:bg-blue-900/20 rounded-lg border border-blue-200 dark:border-blue-800 text-sm">
|
||||
<span className="text-blue-600 dark:text-blue-400 font-medium">
|
||||
Dokumenttyp:
|
||||
</span>
|
||||
{onDocTypeChange ? (
|
||||
<select
|
||||
value={docTypeResult.doc_type}
|
||||
onChange={(e) => onDocTypeChange(e.target.value as DocumentTypeResult['doc_type'])}
|
||||
className="bg-white dark:bg-gray-800 border border-blue-300 dark:border-blue-700 rounded px-2 py-0.5 text-sm text-blue-700 dark:text-blue-300"
|
||||
>
|
||||
<option value="vocab_table">Vokabeltabelle</option>
|
||||
<option value="generic_table">Tabelle (generisch)</option>
|
||||
<option value="full_text">Volltext</option>
|
||||
</select>
|
||||
) : (
|
||||
<span className="text-blue-700 dark:text-blue-300">
|
||||
{DOC_TYPE_LABELS[docTypeResult.doc_type] || docTypeResult.doc_type}
|
||||
</span>
|
||||
)}
|
||||
<span className="text-blue-400 dark:text-blue-500 text-xs">
|
||||
({Math.round(docTypeResult.confidence * 100)}% Konfidenz)
|
||||
</span>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -1,432 +0,0 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useState } from 'react'
|
||||
import type { ColumnResult, ColumnGroundTruth, PageRegion, SubSession } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
import { ColumnControls } from './ColumnControls'
|
||||
import { ManualColumnEditor } from './ManualColumnEditor'
|
||||
import type { ColumnTypeKey } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
type ViewMode = 'normal' | 'ground-truth' | 'manual'
|
||||
|
||||
interface StepColumnDetectionProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
onBoxSessionsCreated?: (subSessions: SubSession[]) => void
|
||||
}
|
||||
|
||||
/** Convert PageRegion[] to divider percentages + column types for ManualColumnEditor */
|
||||
function columnsToEditorState(
|
||||
columns: PageRegion[],
|
||||
imageWidth: number
|
||||
): { dividers: number[]; columnTypes: ColumnTypeKey[] } {
|
||||
if (!columns.length || !imageWidth) return { dividers: [], columnTypes: [] }
|
||||
|
||||
const sorted = [...columns].sort((a, b) => a.x - b.x)
|
||||
const dividers: number[] = []
|
||||
const columnTypes: ColumnTypeKey[] = sorted.map(c => c.type)
|
||||
|
||||
for (let i = 1; i < sorted.length; i++) {
|
||||
const xPct = (sorted[i].x / imageWidth) * 100
|
||||
dividers.push(xPct)
|
||||
}
|
||||
|
||||
return { dividers, columnTypes }
|
||||
}
|
||||
|
||||
export function StepColumnDetection({ sessionId, onNext, onBoxSessionsCreated }: StepColumnDetectionProps) {
|
||||
const [columnResult, setColumnResult] = useState<ColumnResult | null>(null)
|
||||
const [detecting, setDetecting] = useState(false)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [viewMode, setViewMode] = useState<ViewMode>('normal')
|
||||
const [applying, setApplying] = useState(false)
|
||||
const [imageDimensions, setImageDimensions] = useState<{ width: number; height: number } | null>(null)
|
||||
const [savedGtColumns, setSavedGtColumns] = useState<PageRegion[] | null>(null)
|
||||
const [creatingBoxSessions, setCreatingBoxSessions] = useState(false)
|
||||
const [existingSubSessions, setExistingSubSessions] = useState<SubSession[] | null>(null)
|
||||
const [isSubSession, setIsSubSession] = useState(false)
|
||||
|
||||
// Fetch session info (image dimensions) + check for cached column result
|
||||
useEffect(() => {
|
||||
if (!sessionId || imageDimensions) return
|
||||
|
||||
const fetchSessionInfo = async () => {
|
||||
try {
|
||||
const infoRes = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
|
||||
if (infoRes.ok) {
|
||||
const info = await infoRes.json()
|
||||
if (info.image_width && info.image_height) {
|
||||
setImageDimensions({ width: info.image_width, height: info.image_height })
|
||||
}
|
||||
const isSub = !!info.parent_session_id
|
||||
setIsSubSession(isSub)
|
||||
if (info.sub_sessions && info.sub_sessions.length > 0) {
|
||||
setExistingSubSessions(info.sub_sessions)
|
||||
onBoxSessionsCreated?.(info.sub_sessions)
|
||||
}
|
||||
if (info.column_result) {
|
||||
setColumnResult(info.column_result)
|
||||
// Sub-session with pseudo-column already set → auto-advance
|
||||
if (isSub) {
|
||||
onNext()
|
||||
return
|
||||
}
|
||||
return
|
||||
}
|
||||
// Sub-session without columns → auto-detect (creates pseudo-column)
|
||||
if (isSub) {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/columns`, { method: 'POST' })
|
||||
if (res.ok) {
|
||||
const data: ColumnResult = await res.json()
|
||||
setColumnResult(data)
|
||||
onNext()
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to fetch session info:', e)
|
||||
}
|
||||
|
||||
// No cached result - run auto-detection
|
||||
runAutoDetection()
|
||||
}
|
||||
|
||||
fetchSessionInfo()
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [sessionId])
|
||||
|
||||
// Load saved GT if exists
|
||||
useEffect(() => {
|
||||
if (!sessionId) return
|
||||
const fetchGt = async () => {
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/ground-truth/columns`)
|
||||
if (res.ok) {
|
||||
const data = await res.json()
|
||||
const corrected = data.columns_gt?.corrected_columns
|
||||
if (corrected) setSavedGtColumns(corrected)
|
||||
}
|
||||
} catch {
|
||||
// No saved GT - that's fine
|
||||
}
|
||||
}
|
||||
fetchGt()
|
||||
}, [sessionId])
|
||||
|
||||
const runAutoDetection = useCallback(async () => {
|
||||
if (!sessionId) return
|
||||
setDetecting(true)
|
||||
setError(null)
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/columns`, {
|
||||
method: 'POST',
|
||||
})
|
||||
if (!res.ok) {
|
||||
const err = await res.json().catch(() => ({ detail: res.statusText }))
|
||||
throw new Error(err.detail || 'Spaltenerkennung fehlgeschlagen')
|
||||
}
|
||||
const data: ColumnResult = await res.json()
|
||||
setColumnResult(data)
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||
} finally {
|
||||
setDetecting(false)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
const handleRerun = useCallback(() => {
|
||||
runAutoDetection()
|
||||
}, [runAutoDetection])
|
||||
|
||||
const handleGroundTruth = useCallback(async (gt: ColumnGroundTruth) => {
|
||||
if (!sessionId) return
|
||||
try {
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/ground-truth/columns`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(gt),
|
||||
})
|
||||
} catch (e) {
|
||||
console.error('Ground truth save failed:', e)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
const handleManualApply = useCallback(async (columns: PageRegion[]) => {
|
||||
if (!sessionId) return
|
||||
setApplying(true)
|
||||
setError(null)
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/columns/manual`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ columns }),
|
||||
})
|
||||
if (!res.ok) {
|
||||
const err = await res.json().catch(() => ({ detail: res.statusText }))
|
||||
throw new Error(err.detail || 'Manuelle Spalten konnten nicht gespeichert werden')
|
||||
}
|
||||
const data = await res.json()
|
||||
setColumnResult({
|
||||
columns: data.columns,
|
||||
duration_seconds: data.duration_seconds ?? 0,
|
||||
})
|
||||
setViewMode('normal')
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Fehler beim Speichern')
|
||||
} finally {
|
||||
setApplying(false)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
const handleGtApply = useCallback(async (columns: PageRegion[]) => {
|
||||
if (!sessionId) return
|
||||
setApplying(true)
|
||||
setError(null)
|
||||
try {
|
||||
const gt: ColumnGroundTruth = {
|
||||
is_correct: false,
|
||||
corrected_columns: columns,
|
||||
}
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/ground-truth/columns`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(gt),
|
||||
})
|
||||
setSavedGtColumns(columns)
|
||||
setViewMode('normal')
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Fehler beim Speichern')
|
||||
} finally {
|
||||
setApplying(false)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
// Count box zones from column result
|
||||
const boxZones = columnResult?.zones?.filter(z => z.zone_type === 'box') || []
|
||||
const boxCount = boxZones.length
|
||||
|
||||
const createBoxSessions = useCallback(async () => {
|
||||
if (!sessionId) return
|
||||
setCreatingBoxSessions(true)
|
||||
setError(null)
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/create-box-sessions`, {
|
||||
method: 'POST',
|
||||
})
|
||||
if (!res.ok) {
|
||||
const err = await res.json().catch(() => ({ detail: res.statusText }))
|
||||
throw new Error(err.detail || 'Box-Sessions konnten nicht erstellt werden')
|
||||
}
|
||||
const data = await res.json()
|
||||
const subs: SubSession[] = data.sub_sessions.map((s: { id: string; name?: string; box_index: number }) => ({
|
||||
id: s.id,
|
||||
name: s.name || `Box ${s.box_index + 1}`,
|
||||
box_index: s.box_index,
|
||||
current_step: 1,
|
||||
status: 'pending',
|
||||
}))
|
||||
setExistingSubSessions(subs)
|
||||
onBoxSessionsCreated?.(subs)
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Fehler beim Erstellen der Box-Sessions')
|
||||
} finally {
|
||||
setCreatingBoxSessions(false)
|
||||
}
|
||||
}, [sessionId, onBoxSessionsCreated])
|
||||
|
||||
if (!sessionId) {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||
<div className="text-5xl mb-4">📊</div>
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||
Schritt 3: Spaltenerkennung
|
||||
</h3>
|
||||
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||
Bitte zuerst Schritt 1 und 2 abschliessen.
|
||||
</p>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const dewarpedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
|
||||
const overlayUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/columns-overlay`
|
||||
|
||||
// Pre-compute editor state from saved GT or auto columns for GT mode
|
||||
const gtInitial = savedGtColumns
|
||||
? columnsToEditorState(savedGtColumns, imageDimensions?.width ?? 1000)
|
||||
: undefined
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Loading indicator */}
|
||||
{detecting && (
|
||||
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
|
||||
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
|
||||
Spaltenerkennung laeuft...
|
||||
</div>
|
||||
)}
|
||||
|
||||
{viewMode === 'manual' ? (
|
||||
/* Manual column editor - overwrites column_result */
|
||||
<ManualColumnEditor
|
||||
imageUrl={dewarpedUrl}
|
||||
imageWidth={imageDimensions?.width ?? 1000}
|
||||
imageHeight={imageDimensions?.height ?? 1400}
|
||||
onApply={handleManualApply}
|
||||
onCancel={() => setViewMode('normal')}
|
||||
applying={applying}
|
||||
mode="manual"
|
||||
/>
|
||||
) : viewMode === 'ground-truth' ? (
|
||||
/* GT mode: auto result (left, readonly) + GT editor (right) */
|
||||
<div className="grid grid-cols-2 gap-4">
|
||||
{/* Left: Auto result (readonly overlay) */}
|
||||
<div>
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Auto-Ergebnis (readonly)
|
||||
</div>
|
||||
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900">
|
||||
{columnResult ? (
|
||||
// eslint-disable-next-line @next/next/no-img-element
|
||||
<img
|
||||
src={`${overlayUrl}?t=${Date.now()}`}
|
||||
alt="Auto Spalten-Overlay"
|
||||
className="w-full h-auto"
|
||||
/>
|
||||
) : (
|
||||
<div className="aspect-[3/4] flex items-center justify-center text-gray-400 text-sm">
|
||||
Keine Auto-Daten
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
{/* Auto column list */}
|
||||
{columnResult && (
|
||||
<div className="mt-2 space-y-1">
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400">
|
||||
Auto: {columnResult.columns.length} Spalten
|
||||
</div>
|
||||
{columnResult.columns
|
||||
.filter(c => c.type.startsWith('column') || c.type === 'page_ref')
|
||||
.map((col, i) => (
|
||||
<div key={i} className="text-xs text-gray-500 dark:text-gray-400 font-mono">
|
||||
{i + 1}. {col.type} x={col.x} w={col.width}
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Right: GT editor */}
|
||||
<div>
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Ground Truth Editor
|
||||
</div>
|
||||
<ManualColumnEditor
|
||||
imageUrl={dewarpedUrl}
|
||||
imageWidth={imageDimensions?.width ?? 1000}
|
||||
imageHeight={imageDimensions?.height ?? 1400}
|
||||
onApply={handleGtApply}
|
||||
onCancel={() => setViewMode('normal')}
|
||||
applying={applying}
|
||||
mode="ground-truth"
|
||||
layout="stacked"
|
||||
initialDividers={gtInitial?.dividers}
|
||||
initialColumnTypes={gtInitial?.columnTypes}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
/* Normal mode: overlay (left) vs clean (right) */
|
||||
<div className="grid grid-cols-2 gap-4">
|
||||
<div>
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Mit Spalten-Overlay
|
||||
</div>
|
||||
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900">
|
||||
{columnResult ? (
|
||||
// eslint-disable-next-line @next/next/no-img-element
|
||||
<img
|
||||
src={`${overlayUrl}?t=${Date.now()}`}
|
||||
alt="Spalten-Overlay"
|
||||
className="w-full h-auto"
|
||||
/>
|
||||
) : (
|
||||
<div className="aspect-[3/4] flex items-center justify-center text-gray-400 text-sm">
|
||||
{detecting ? 'Erkenne Spalten...' : 'Keine Daten'}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
<div>
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Entzerrtes Bild
|
||||
</div>
|
||||
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900">
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
<img
|
||||
src={dewarpedUrl}
|
||||
alt="Entzerrt"
|
||||
className="w-full h-auto"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Box zone info */}
|
||||
{viewMode === 'normal' && boxCount > 0 && (
|
||||
<div className="bg-amber-50 dark:bg-amber-900/20 border border-amber-200 dark:border-amber-700 rounded-xl p-4 flex items-center justify-between">
|
||||
<div className="flex items-center gap-3">
|
||||
<span className="text-2xl">📦</span>
|
||||
<div>
|
||||
<div className="text-sm font-medium text-amber-800 dark:text-amber-300">
|
||||
{boxCount} Box{boxCount > 1 ? 'en' : ''} erkannt
|
||||
</div>
|
||||
<div className="text-xs text-amber-600 dark:text-amber-400">
|
||||
Box-Bereiche werden separat verarbeitet
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{existingSubSessions && existingSubSessions.length > 0 ? (
|
||||
<div className="text-xs text-amber-700 dark:text-amber-300 font-medium">
|
||||
{existingSubSessions.length} Box-Session{existingSubSessions.length > 1 ? 's' : ''} vorhanden
|
||||
</div>
|
||||
) : (
|
||||
<button
|
||||
onClick={createBoxSessions}
|
||||
disabled={creatingBoxSessions}
|
||||
className="px-4 py-2 bg-amber-600 text-white rounded-lg hover:bg-amber-700 transition-colors text-sm font-medium disabled:opacity-50 flex items-center gap-2"
|
||||
>
|
||||
{creatingBoxSessions && (
|
||||
<div className="animate-spin w-3.5 h-3.5 border-2 border-white border-t-transparent rounded-full" />
|
||||
)}
|
||||
Box-Sessions erstellen
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Controls */}
|
||||
{viewMode === 'normal' && (
|
||||
<ColumnControls
|
||||
columnResult={columnResult}
|
||||
onRerun={handleRerun}
|
||||
onManualMode={() => setViewMode('manual')}
|
||||
onGtMode={() => setViewMode('ground-truth')}
|
||||
onGroundTruth={handleGroundTruth}
|
||||
onNext={onNext}
|
||||
isDetecting={detecting}
|
||||
savedGtColumns={savedGtColumns}
|
||||
/>
|
||||
)}
|
||||
|
||||
{error && (
|
||||
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
'use client'
|
||||
|
||||
export function StepCoordinates() {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||
<div className="text-5xl mb-4">📍</div>
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||
Schritt 5: Koordinatenzuweisung
|
||||
</h3>
|
||||
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||
Exakte Positionszuweisung fuer jedes Wort auf der Seite.
|
||||
Dieser Schritt wird in einer zukuenftigen Version implementiert.
|
||||
</p>
|
||||
<div className="mt-6 px-4 py-2 bg-amber-100 dark:bg-amber-900/30 text-amber-700 dark:text-amber-400 rounded-full text-sm font-medium">
|
||||
Kommt bald
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -1,200 +0,0 @@
|
||||
'use client'
|
||||
|
||||
import { useEffect, useState } from 'react'
|
||||
import type { CropResult } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
import { ImageCompareView } from './ImageCompareView'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
interface StepCropProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
}
|
||||
|
||||
export function StepCrop({ sessionId, onNext }: StepCropProps) {
|
||||
const [cropResult, setCropResult] = useState<CropResult | null>(null)
|
||||
const [cropping, setCropping] = useState(false)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [hasRun, setHasRun] = useState(false)
|
||||
|
||||
// Auto-trigger crop on mount
|
||||
useEffect(() => {
|
||||
if (!sessionId || hasRun) return
|
||||
setHasRun(true)
|
||||
|
||||
const runCrop = async () => {
|
||||
setCropping(true)
|
||||
setError(null)
|
||||
|
||||
try {
|
||||
// Check if session already has crop result
|
||||
const sessionRes = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
|
||||
if (sessionRes.ok) {
|
||||
const sessionData = await sessionRes.json()
|
||||
if (sessionData.crop_result) {
|
||||
setCropResult(sessionData.crop_result)
|
||||
setCropping(false)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/crop`, {
|
||||
method: 'POST',
|
||||
})
|
||||
|
||||
if (!res.ok) {
|
||||
throw new Error('Zuschnitt fehlgeschlagen')
|
||||
}
|
||||
|
||||
const data = await res.json()
|
||||
setCropResult(data)
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||
} finally {
|
||||
setCropping(false)
|
||||
}
|
||||
}
|
||||
|
||||
runCrop()
|
||||
}, [sessionId, hasRun])
|
||||
|
||||
const handleSkip = async () => {
|
||||
if (!sessionId) return
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/crop/skip`, {
|
||||
method: 'POST',
|
||||
})
|
||||
if (res.ok) {
|
||||
const data = await res.json()
|
||||
setCropResult(data)
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Skip crop failed:', e)
|
||||
}
|
||||
onNext()
|
||||
}
|
||||
|
||||
if (!sessionId) {
|
||||
return <div className="text-sm text-gray-400">Keine Session ausgewaehlt.</div>
|
||||
}
|
||||
|
||||
const dewarpedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/dewarped`
|
||||
const croppedUrl = cropResult
|
||||
? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
|
||||
: null
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Loading indicator */}
|
||||
{cropping && (
|
||||
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
|
||||
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
|
||||
Scannerraender werden erkannt...
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Image comparison */}
|
||||
<ImageCompareView
|
||||
originalUrl={dewarpedUrl}
|
||||
deskewedUrl={croppedUrl}
|
||||
showGrid={false}
|
||||
showBinarized={false}
|
||||
binarizedUrl={null}
|
||||
leftLabel="Entzerrt"
|
||||
rightLabel="Zugeschnitten"
|
||||
/>
|
||||
|
||||
{/* Crop result info */}
|
||||
{cropResult && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
||||
<div className="flex flex-wrap items-center gap-3 text-sm">
|
||||
{(cropResult as Record<string, unknown>).multi_page ? (
|
||||
<>
|
||||
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-blue-50 dark:bg-blue-900/20 text-blue-700 dark:text-blue-400 text-xs font-medium">
|
||||
Mehrseitig: {(cropResult as Record<string, unknown>).page_count as number} Seiten erkannt
|
||||
</span>
|
||||
{((cropResult as Record<string, unknown>).sub_sessions as Array<{id: string; name: string; page_index: number}> | undefined)?.map((sub) => (
|
||||
<span key={sub.id} className="text-gray-400 text-xs">
|
||||
Seite {sub.page_index + 1}
|
||||
</span>
|
||||
))}
|
||||
</>
|
||||
) : cropResult.crop_applied ? (
|
||||
<>
|
||||
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-amber-50 dark:bg-amber-900/20 text-amber-700 dark:text-amber-400 text-xs font-medium">
|
||||
Zugeschnitten
|
||||
</span>
|
||||
{cropResult.detected_format && (
|
||||
<>
|
||||
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
|
||||
<span className="text-gray-600 dark:text-gray-400">
|
||||
Format: <span className="font-medium">{cropResult.detected_format}</span>
|
||||
{cropResult.format_confidence != null && (
|
||||
<span className="text-gray-400 ml-1">
|
||||
({Math.round(cropResult.format_confidence * 100)}%)
|
||||
</span>
|
||||
)}
|
||||
</span>
|
||||
</>
|
||||
)}
|
||||
{cropResult.original_size && cropResult.cropped_size && (
|
||||
<>
|
||||
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
|
||||
<span className="text-gray-400 text-xs">
|
||||
{cropResult.original_size.width}x{cropResult.original_size.height} → {cropResult.cropped_size.width}x{cropResult.cropped_size.height}
|
||||
</span>
|
||||
</>
|
||||
)}
|
||||
{cropResult.border_fractions && (
|
||||
<>
|
||||
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
|
||||
<span className="text-gray-400 text-xs">
|
||||
Raender: O={pct(cropResult.border_fractions.top)} U={pct(cropResult.border_fractions.bottom)} L={pct(cropResult.border_fractions.left)} R={pct(cropResult.border_fractions.right)}
|
||||
</span>
|
||||
</>
|
||||
)}
|
||||
</>
|
||||
) : (
|
||||
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-green-50 dark:bg-green-900/20 text-green-700 dark:text-green-400 text-xs font-medium">
|
||||
Kein Zuschnitt noetig
|
||||
</span>
|
||||
)}
|
||||
{cropResult.duration_seconds != null && (
|
||||
<span className="text-gray-400 text-xs ml-auto">
|
||||
{cropResult.duration_seconds}s
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Action buttons */}
|
||||
{cropResult && (
|
||||
<div className="flex justify-between">
|
||||
<button
|
||||
onClick={handleSkip}
|
||||
className="px-4 py-2 text-sm text-gray-500 hover:text-gray-700 dark:text-gray-400 dark:hover:text-gray-200 transition-colors"
|
||||
>
|
||||
Ueberspringen
|
||||
</button>
|
||||
<button
|
||||
onClick={onNext}
|
||||
className="px-6 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium transition-colors"
|
||||
>
|
||||
Weiter →
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{error && (
|
||||
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
function pct(v: number): string {
|
||||
return `${(v * 100).toFixed(1)}%`
|
||||
}
|
||||
@@ -1,183 +0,0 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useState } from 'react'
|
||||
import type { DeskewGroundTruth, DeskewResult, SessionInfo } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
import { DeskewControls } from './DeskewControls'
|
||||
import { ImageCompareView } from './ImageCompareView'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
interface StepDeskewProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
}
|
||||
|
||||
export function StepDeskew({ sessionId, onNext }: StepDeskewProps) {
|
||||
const [session, setSession] = useState<SessionInfo | null>(null)
|
||||
const [deskewResult, setDeskewResult] = useState<DeskewResult | null>(null)
|
||||
const [deskewing, setDeskewing] = useState(false)
|
||||
const [applying, setApplying] = useState(false)
|
||||
const [showBinarized, setShowBinarized] = useState(false)
|
||||
const [showGrid, setShowGrid] = useState(true)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [hasAutoRun, setHasAutoRun] = useState(false)
|
||||
|
||||
// Load session and auto-trigger deskew
|
||||
useEffect(() => {
|
||||
if (!sessionId || session) return
|
||||
|
||||
const loadAndDeskew = async () => {
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
|
||||
if (!res.ok) return
|
||||
const data = await res.json()
|
||||
|
||||
const sessionInfo: SessionInfo = {
|
||||
session_id: data.session_id,
|
||||
filename: data.filename,
|
||||
image_width: data.image_width,
|
||||
image_height: data.image_height,
|
||||
// Use oriented image as "before" view (deskew runs right after orientation)
|
||||
original_image_url: `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/oriented`,
|
||||
}
|
||||
setSession(sessionInfo)
|
||||
|
||||
// If deskew result already exists, use it
|
||||
if (data.deskew_result) {
|
||||
const dr: DeskewResult = {
|
||||
...data.deskew_result,
|
||||
deskewed_image_url: `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/deskewed`,
|
||||
binarized_image_url: `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/binarized`,
|
||||
}
|
||||
setDeskewResult(dr)
|
||||
return
|
||||
}
|
||||
|
||||
// Auto-trigger deskew if not already done
|
||||
if (!hasAutoRun) {
|
||||
setHasAutoRun(true)
|
||||
setDeskewing(true)
|
||||
const deskewRes = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/deskew`, {
|
||||
method: 'POST',
|
||||
})
|
||||
|
||||
if (!deskewRes.ok) {
|
||||
throw new Error('Begradigung fehlgeschlagen')
|
||||
}
|
||||
|
||||
const deskewData: DeskewResult = await deskewRes.json()
|
||||
deskewData.deskewed_image_url = `${KLAUSUR_API}${deskewData.deskewed_image_url}`
|
||||
deskewData.binarized_image_url = `${KLAUSUR_API}${deskewData.binarized_image_url}`
|
||||
setDeskewResult(deskewData)
|
||||
}
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Fehler beim Laden')
|
||||
} finally {
|
||||
setDeskewing(false)
|
||||
}
|
||||
}
|
||||
|
||||
loadAndDeskew()
|
||||
}, [sessionId, session, hasAutoRun])
|
||||
|
||||
const handleManualDeskew = useCallback(async (angle: number) => {
|
||||
if (!sessionId) return
|
||||
setApplying(true)
|
||||
setError(null)
|
||||
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/deskew/manual`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ angle }),
|
||||
})
|
||||
|
||||
if (!res.ok) throw new Error('Manuelle Begradigung fehlgeschlagen')
|
||||
|
||||
const data = await res.json()
|
||||
setDeskewResult((prev) =>
|
||||
prev
|
||||
? {
|
||||
...prev,
|
||||
angle_applied: data.angle_applied,
|
||||
method_used: data.method_used,
|
||||
deskewed_image_url: `${KLAUSUR_API}${data.deskewed_image_url}?t=${Date.now()}`,
|
||||
}
|
||||
: null,
|
||||
)
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Fehler')
|
||||
} finally {
|
||||
setApplying(false)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
const handleGroundTruth = useCallback(async (gt: DeskewGroundTruth) => {
|
||||
if (!sessionId) return
|
||||
try {
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/ground-truth/deskew`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(gt),
|
||||
})
|
||||
} catch (e) {
|
||||
console.error('Ground truth save failed:', e)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
if (!sessionId) {
|
||||
return <div className="text-sm text-gray-400">Keine Session ausgewaehlt.</div>
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Filename */}
|
||||
{session && (
|
||||
<div className="text-sm text-gray-500 dark:text-gray-400">
|
||||
Datei: <span className="font-medium text-gray-700 dark:text-gray-300">{session.filename}</span>
|
||||
{' '}({session.image_width} x {session.image_height} px)
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Loading indicator */}
|
||||
{deskewing && (
|
||||
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
|
||||
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
|
||||
Begradigung laeuft (beide Methoden)...
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Image comparison */}
|
||||
{session && (
|
||||
<ImageCompareView
|
||||
originalUrl={session.original_image_url}
|
||||
deskewedUrl={deskewResult?.deskewed_image_url ?? null}
|
||||
showGrid={showGrid}
|
||||
showBinarized={showBinarized}
|
||||
binarizedUrl={deskewResult?.binarized_image_url ?? null}
|
||||
leftLabel="Orientiert"
|
||||
rightLabel="Begradigt"
|
||||
/>
|
||||
)}
|
||||
|
||||
{/* Controls */}
|
||||
<DeskewControls
|
||||
deskewResult={deskewResult}
|
||||
showBinarized={showBinarized}
|
||||
onToggleBinarized={() => setShowBinarized((v) => !v)}
|
||||
showGrid={showGrid}
|
||||
onToggleGrid={() => setShowGrid((v) => !v)}
|
||||
onManualDeskew={handleManualDeskew}
|
||||
onGroundTruth={handleGroundTruth}
|
||||
onNext={onNext}
|
||||
isApplying={applying}
|
||||
/>
|
||||
|
||||
{error && (
|
||||
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -1,204 +0,0 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useState } from 'react'
|
||||
import type { DeskewResult, DewarpResult, DewarpGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
import { DewarpControls } from './DewarpControls'
|
||||
import { ImageCompareView } from './ImageCompareView'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
interface StepDewarpProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
}
|
||||
|
||||
export function StepDewarp({ sessionId, onNext }: StepDewarpProps) {
|
||||
const [dewarpResult, setDewarpResult] = useState<DewarpResult | null>(null)
|
||||
const [deskewResult, setDeskewResult] = useState<DeskewResult | null>(null)
|
||||
const [dewarping, setDewarping] = useState(false)
|
||||
const [applying, setApplying] = useState(false)
|
||||
const [showGrid, setShowGrid] = useState(true)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
|
||||
// Load session info to get deskew_result (for fine-tuning init values)
|
||||
useEffect(() => {
|
||||
if (!sessionId) return
|
||||
const loadSession = async () => {
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
|
||||
if (res.ok) {
|
||||
const data = await res.json()
|
||||
if (data.deskew_result) {
|
||||
setDeskewResult(data.deskew_result)
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to load session info:', e)
|
||||
}
|
||||
}
|
||||
loadSession()
|
||||
}, [sessionId])
|
||||
|
||||
// Auto-trigger dewarp when component mounts with a sessionId
|
||||
useEffect(() => {
|
||||
if (!sessionId || dewarpResult) return
|
||||
|
||||
const runDewarp = async () => {
|
||||
setDewarping(true)
|
||||
setError(null)
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/dewarp`, {
|
||||
method: 'POST',
|
||||
})
|
||||
if (!res.ok) {
|
||||
const err = await res.json().catch(() => ({ detail: res.statusText }))
|
||||
throw new Error(err.detail || 'Entzerrung fehlgeschlagen')
|
||||
}
|
||||
const data: DewarpResult = await res.json()
|
||||
data.dewarped_image_url = `${KLAUSUR_API}${data.dewarped_image_url}`
|
||||
setDewarpResult(data)
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||
} finally {
|
||||
setDewarping(false)
|
||||
}
|
||||
}
|
||||
|
||||
runDewarp()
|
||||
}, [sessionId, dewarpResult])
|
||||
|
||||
const handleManualDewarp = useCallback(async (shearDegrees: number) => {
|
||||
if (!sessionId) return
|
||||
setApplying(true)
|
||||
setError(null)
|
||||
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/dewarp/manual`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ shear_degrees: shearDegrees }),
|
||||
})
|
||||
if (!res.ok) throw new Error('Manuelle Entzerrung fehlgeschlagen')
|
||||
|
||||
const data = await res.json()
|
||||
setDewarpResult((prev) =>
|
||||
prev
|
||||
? {
|
||||
...prev,
|
||||
method_used: data.method_used,
|
||||
shear_degrees: data.shear_degrees,
|
||||
dewarped_image_url: `${KLAUSUR_API}${data.dewarped_image_url}?t=${Date.now()}`,
|
||||
}
|
||||
: null,
|
||||
)
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Fehler')
|
||||
} finally {
|
||||
setApplying(false)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
const handleCombinedAdjust = useCallback(async (rotationDegrees: number, shearDegrees: number) => {
|
||||
if (!sessionId) return
|
||||
setApplying(true)
|
||||
setError(null)
|
||||
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/adjust-combined`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ rotation_degrees: rotationDegrees, shear_degrees: shearDegrees }),
|
||||
})
|
||||
if (!res.ok) throw new Error('Kombinierte Anpassung fehlgeschlagen')
|
||||
|
||||
const data = await res.json()
|
||||
setDewarpResult((prev) =>
|
||||
prev
|
||||
? {
|
||||
...prev,
|
||||
method_used: data.method_used,
|
||||
shear_degrees: data.shear_degrees,
|
||||
dewarped_image_url: `${KLAUSUR_API}${data.dewarped_image_url}?t=${Date.now()}`,
|
||||
}
|
||||
: null,
|
||||
)
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Fehler')
|
||||
} finally {
|
||||
setApplying(false)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
const handleGroundTruth = useCallback(async (gt: DewarpGroundTruth) => {
|
||||
if (!sessionId) return
|
||||
try {
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/ground-truth/dewarp`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(gt),
|
||||
})
|
||||
} catch (e) {
|
||||
console.error('Ground truth save failed:', e)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
if (!sessionId) {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||
<div className="text-5xl mb-4">🔧</div>
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||
Schritt 2: Entzerrung (Dewarp)
|
||||
</h3>
|
||||
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||
Bitte zuerst Schritt 1 (Begradigung) abschliessen.
|
||||
</p>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const deskewedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/deskewed`
|
||||
const dewarpedUrl = dewarpResult?.dewarped_image_url ?? null
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Loading indicator */}
|
||||
{dewarping && (
|
||||
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
|
||||
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
|
||||
Entzerrung laeuft (beide Methoden)...
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Image comparison: deskewed (left) vs dewarped (right) */}
|
||||
<ImageCompareView
|
||||
originalUrl={deskewedUrl}
|
||||
deskewedUrl={dewarpedUrl}
|
||||
showGrid={showGrid}
|
||||
showGridLeft={showGrid}
|
||||
showBinarized={false}
|
||||
binarizedUrl={null}
|
||||
leftLabel={`Begradigt (nach Deskew)${showGrid ? ' + Raster' : ''}`}
|
||||
rightLabel={`Entzerrt${showGrid ? ' + Raster (mm)' : ''}`}
|
||||
/>
|
||||
|
||||
{/* Controls */}
|
||||
<DewarpControls
|
||||
dewarpResult={dewarpResult}
|
||||
deskewResult={deskewResult}
|
||||
showGrid={showGrid}
|
||||
onToggleGrid={() => setShowGrid((v) => !v)}
|
||||
onManualDewarp={handleManualDewarp}
|
||||
onCombinedAdjust={handleCombinedAdjust}
|
||||
onGroundTruth={handleGroundTruth}
|
||||
onNext={onNext}
|
||||
isApplying={applying}
|
||||
/>
|
||||
|
||||
{error && (
|
||||
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -1,596 +0,0 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useRef, useState } from 'react'
|
||||
import type {
|
||||
GridCell, ColumnMeta, ImageRegion, ImageStyle,
|
||||
} from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
import { IMAGE_STYLES as STYLES } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
const COL_TYPE_COLORS: Record<string, string> = {
|
||||
column_en: '#3b82f6',
|
||||
column_de: '#22c55e',
|
||||
column_example: '#f97316',
|
||||
column_text: '#a855f7',
|
||||
page_ref: '#06b6d4',
|
||||
column_marker: '#6b7280',
|
||||
}
|
||||
|
||||
interface StepGroundTruthProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
}
|
||||
|
||||
interface SessionData {
|
||||
cells: GridCell[]
|
||||
columnsUsed: ColumnMeta[]
|
||||
imageWidth: number
|
||||
imageHeight: number
|
||||
originalImageUrl: string
|
||||
}
|
||||
|
||||
export function StepGroundTruth({ sessionId, onNext }: StepGroundTruthProps) {
|
||||
const [status, setStatus] = useState<'loading' | 'ready' | 'saving' | 'saved' | 'error'>('loading')
|
||||
const [error, setError] = useState('')
|
||||
const [session, setSession] = useState<SessionData | null>(null)
|
||||
const [imageRegions, setImageRegions] = useState<(ImageRegion & { generating?: boolean })[]>([])
|
||||
const [detecting, setDetecting] = useState(false)
|
||||
const [zoom, setZoom] = useState(100)
|
||||
const [syncScroll, setSyncScroll] = useState(true)
|
||||
const [notes, setNotes] = useState('')
|
||||
const [score, setScore] = useState<number | null>(null)
|
||||
const [drawingRegion, setDrawingRegion] = useState(false)
|
||||
const [dragStart, setDragStart] = useState<{ x: number; y: number } | null>(null)
|
||||
const [dragEnd, setDragEnd] = useState<{ x: number; y: number } | null>(null)
|
||||
|
||||
const leftPanelRef = useRef<HTMLDivElement>(null)
|
||||
const rightPanelRef = useRef<HTMLDivElement>(null)
|
||||
const reconRef = useRef<HTMLDivElement>(null)
|
||||
const [reconWidth, setReconWidth] = useState(0)
|
||||
|
||||
// Track reconstruction container width for font size calculation
|
||||
useEffect(() => {
|
||||
const el = reconRef.current
|
||||
if (!el) return
|
||||
const obs = new ResizeObserver(entries => {
|
||||
for (const entry of entries) setReconWidth(entry.contentRect.width)
|
||||
})
|
||||
obs.observe(el)
|
||||
return () => obs.disconnect()
|
||||
}, [session])
|
||||
|
||||
// Load session data
|
||||
useEffect(() => {
|
||||
if (!sessionId) return
|
||||
loadSessionData()
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [sessionId])
|
||||
|
||||
const loadSessionData = async () => {
|
||||
if (!sessionId) return
|
||||
setStatus('loading')
|
||||
try {
|
||||
const resp = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
|
||||
if (!resp.ok) throw new Error(`Failed to load session: ${resp.status}`)
|
||||
const data = await resp.json()
|
||||
|
||||
const wordResult = data.word_result || {}
|
||||
setSession({
|
||||
cells: wordResult.cells || [],
|
||||
columnsUsed: wordResult.columns_used || [],
|
||||
imageWidth: wordResult.image_width || data.image_width || 800,
|
||||
imageHeight: wordResult.image_height || data.image_height || 600,
|
||||
originalImageUrl: data.original_image_url
|
||||
? `${KLAUSUR_API}${data.original_image_url}`
|
||||
: `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/original`,
|
||||
})
|
||||
|
||||
// Load existing validation data
|
||||
const valResp = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reconstruction/validation`)
|
||||
if (valResp.ok) {
|
||||
const valData = await valResp.json()
|
||||
const validation = valData.validation
|
||||
if (validation) {
|
||||
setImageRegions(validation.image_regions || [])
|
||||
setNotes(validation.notes || '')
|
||||
setScore(validation.score ?? null)
|
||||
}
|
||||
}
|
||||
|
||||
setStatus('ready')
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : String(e))
|
||||
setStatus('error')
|
||||
}
|
||||
}
|
||||
|
||||
// Sync scroll between panels
|
||||
const handleScroll = useCallback((source: 'left' | 'right') => {
|
||||
if (!syncScroll) return
|
||||
const from = source === 'left' ? leftPanelRef.current : rightPanelRef.current
|
||||
const to = source === 'left' ? rightPanelRef.current : leftPanelRef.current
|
||||
if (from && to) {
|
||||
to.scrollTop = from.scrollTop
|
||||
to.scrollLeft = from.scrollLeft
|
||||
}
|
||||
}, [syncScroll])
|
||||
|
||||
// Detect images via VLM
|
||||
const handleDetectImages = async () => {
|
||||
if (!sessionId) return
|
||||
setDetecting(true)
|
||||
try {
|
||||
const resp = await fetch(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reconstruction/detect-images`,
|
||||
{ method: 'POST' }
|
||||
)
|
||||
if (!resp.ok) throw new Error(`Detection failed: ${resp.status}`)
|
||||
const data = await resp.json()
|
||||
setImageRegions(data.regions || [])
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : String(e))
|
||||
} finally {
|
||||
setDetecting(false)
|
||||
}
|
||||
}
|
||||
|
||||
// Generate image for a region
|
||||
const handleGenerateImage = async (index: number) => {
|
||||
if (!sessionId) return
|
||||
const region = imageRegions[index]
|
||||
if (!region) return
|
||||
|
||||
setImageRegions(prev => prev.map((r, i) => i === index ? { ...r, generating: true } : r))
|
||||
|
||||
try {
|
||||
const resp = await fetch(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reconstruction/generate-image`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
region_index: index,
|
||||
prompt: region.prompt,
|
||||
style: region.style,
|
||||
}),
|
||||
}
|
||||
)
|
||||
if (!resp.ok) throw new Error(`Generation failed: ${resp.status}`)
|
||||
const data = await resp.json()
|
||||
|
||||
setImageRegions(prev => prev.map((r, i) =>
|
||||
i === index ? { ...r, image_b64: data.image_b64, generating: false } : r
|
||||
))
|
||||
} catch (e) {
|
||||
setImageRegions(prev => prev.map((r, i) => i === index ? { ...r, generating: false } : r))
|
||||
setError(e instanceof Error ? e.message : String(e))
|
||||
}
|
||||
}
|
||||
|
||||
// Save validation
|
||||
const handleSave = async () => {
|
||||
if (!sessionId) {
|
||||
setError('Keine Session-ID vorhanden')
|
||||
return
|
||||
}
|
||||
setStatus('saving')
|
||||
setError('')
|
||||
try {
|
||||
const resp = await fetch(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reconstruction/validate`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ notes, score: score ?? 0 }),
|
||||
}
|
||||
)
|
||||
if (!resp.ok) {
|
||||
const body = await resp.text().catch(() => '')
|
||||
throw new Error(`Speichern fehlgeschlagen (${resp.status}): ${body}`)
|
||||
}
|
||||
setStatus('saved')
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : String(e))
|
||||
setStatus('ready')
|
||||
}
|
||||
}
|
||||
|
||||
// Handle manual region drawing on reconstruction
|
||||
const handleReconMouseDown = (e: React.MouseEvent<HTMLDivElement>) => {
|
||||
if (!drawingRegion) return
|
||||
const rect = e.currentTarget.getBoundingClientRect()
|
||||
const x = ((e.clientX - rect.left) / rect.width) * 100
|
||||
const y = ((e.clientY - rect.top) / rect.height) * 100
|
||||
setDragStart({ x, y })
|
||||
setDragEnd({ x, y })
|
||||
}
|
||||
|
||||
const handleReconMouseMove = (e: React.MouseEvent<HTMLDivElement>) => {
|
||||
if (!dragStart) return
|
||||
const rect = e.currentTarget.getBoundingClientRect()
|
||||
const x = ((e.clientX - rect.left) / rect.width) * 100
|
||||
const y = ((e.clientY - rect.top) / rect.height) * 100
|
||||
setDragEnd({ x, y })
|
||||
}
|
||||
|
||||
const handleReconMouseUp = () => {
|
||||
if (!dragStart || !dragEnd) return
|
||||
const x = Math.min(dragStart.x, dragEnd.x)
|
||||
const y = Math.min(dragStart.y, dragEnd.y)
|
||||
const w = Math.abs(dragEnd.x - dragStart.x)
|
||||
const h = Math.abs(dragEnd.y - dragStart.y)
|
||||
|
||||
if (w > 2 && h > 2) {
|
||||
setImageRegions(prev => [...prev, {
|
||||
bbox_pct: { x, y, w, h },
|
||||
prompt: '',
|
||||
description: 'Manually selected region',
|
||||
image_b64: null,
|
||||
style: 'educational' as ImageStyle,
|
||||
}])
|
||||
}
|
||||
|
||||
setDragStart(null)
|
||||
setDragEnd(null)
|
||||
setDrawingRegion(false)
|
||||
}
|
||||
|
||||
const handleRemoveRegion = (index: number) => {
|
||||
setImageRegions(prev => prev.filter((_, i) => i !== index))
|
||||
}
|
||||
|
||||
if (status === 'loading') {
|
||||
return (
|
||||
<div className="flex items-center justify-center py-16">
|
||||
<div className="animate-spin rounded-full h-8 w-8 border-b-2 border-teal-500 mr-3" />
|
||||
<span className="text-gray-500 dark:text-gray-400">Session wird geladen...</span>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
if (status === 'error' && !session) {
|
||||
return (
|
||||
<div className="text-center py-16">
|
||||
<p className="text-red-500">{error}</p>
|
||||
<button onClick={loadSessionData} className="mt-4 px-4 py-2 bg-teal-600 text-white rounded hover:bg-teal-700">
|
||||
Erneut laden
|
||||
</button>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
if (!session) return null
|
||||
|
||||
const aspect = session.imageHeight / session.imageWidth
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Header / Controls */}
|
||||
<div className="flex items-center justify-between flex-wrap gap-2">
|
||||
<h3 className="text-lg font-medium text-gray-800 dark:text-gray-200">
|
||||
Validierung — Original vs. Rekonstruktion
|
||||
</h3>
|
||||
<div className="flex items-center gap-3">
|
||||
<button
|
||||
onClick={handleDetectImages}
|
||||
disabled={detecting}
|
||||
className="px-3 py-1.5 text-sm bg-indigo-600 text-white rounded hover:bg-indigo-700 disabled:opacity-50"
|
||||
>
|
||||
{detecting ? 'Erkennung laeuft...' : 'Bilder erkennen'}
|
||||
</button>
|
||||
<label className="flex items-center gap-1.5 text-sm text-gray-600 dark:text-gray-400">
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={syncScroll}
|
||||
onChange={e => setSyncScroll(e.target.checked)}
|
||||
className="rounded"
|
||||
/>
|
||||
Sync Scroll
|
||||
</label>
|
||||
<div className="flex items-center gap-1.5">
|
||||
<button onClick={() => setZoom(z => Math.max(50, z - 25))} className="px-2 py-1 text-sm border rounded dark:border-gray-600 hover:bg-gray-100 dark:hover:bg-gray-700">-</button>
|
||||
<span className="text-sm text-gray-600 dark:text-gray-400 w-12 text-center">{zoom}%</span>
|
||||
<button onClick={() => setZoom(z => Math.min(200, z + 25))} className="px-2 py-1 text-sm border rounded dark:border-gray-600 hover:bg-gray-100 dark:hover:bg-gray-700">+</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{error && (
|
||||
<div className="p-2 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 text-sm rounded">
|
||||
{error}
|
||||
<button onClick={() => setError('')} className="ml-2 underline">Schliessen</button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Side-by-side panels */}
|
||||
<div className="grid grid-cols-2 gap-4" style={{ height: 'calc(100vh - 580px)', minHeight: 300 }}>
|
||||
{/* Left: Original */}
|
||||
<div className="border rounded-lg dark:border-gray-700 overflow-hidden flex flex-col">
|
||||
<div className="px-3 py-1.5 bg-gray-50 dark:bg-gray-800 text-sm font-medium text-gray-600 dark:text-gray-400 border-b dark:border-gray-700">
|
||||
Original
|
||||
</div>
|
||||
<div
|
||||
ref={leftPanelRef}
|
||||
className="flex-1 overflow-auto"
|
||||
onScroll={() => handleScroll('left')}
|
||||
>
|
||||
<div style={{ width: `${zoom}%`, minWidth: '100%' }}>
|
||||
<img
|
||||
src={session.originalImageUrl}
|
||||
alt="Original"
|
||||
className="w-full h-auto"
|
||||
draggable={false}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Right: Reconstruction */}
|
||||
<div className="border rounded-lg dark:border-gray-700 overflow-hidden flex flex-col">
|
||||
<div className="px-3 py-1.5 bg-gray-50 dark:bg-gray-800 text-sm font-medium text-gray-600 dark:text-gray-400 border-b dark:border-gray-700 flex items-center justify-between">
|
||||
<span>Rekonstruktion</span>
|
||||
<button
|
||||
onClick={() => setDrawingRegion(!drawingRegion)}
|
||||
className={`text-xs px-2 py-0.5 rounded ${drawingRegion ? 'bg-indigo-600 text-white' : 'bg-gray-200 dark:bg-gray-700 text-gray-600 dark:text-gray-400'}`}
|
||||
>
|
||||
{drawingRegion ? 'Region zeichnen...' : '+ Region'}
|
||||
</button>
|
||||
</div>
|
||||
<div
|
||||
ref={rightPanelRef}
|
||||
className="flex-1 overflow-auto"
|
||||
onScroll={() => handleScroll('right')}
|
||||
>
|
||||
<div style={{ width: `${zoom}%`, minWidth: '100%' }}>
|
||||
{/* Reconstruction container */}
|
||||
<div
|
||||
ref={reconRef}
|
||||
className="relative bg-white"
|
||||
style={{
|
||||
paddingBottom: `${aspect * 100}%`,
|
||||
cursor: drawingRegion ? 'crosshair' : 'default',
|
||||
}}
|
||||
onMouseDown={handleReconMouseDown}
|
||||
onMouseMove={handleReconMouseMove}
|
||||
onMouseUp={handleReconMouseUp}
|
||||
>
|
||||
{/* Row separator lines — derive from cells */}
|
||||
{(() => {
|
||||
const rowYs = new Set<number>()
|
||||
for (const cell of session.cells) {
|
||||
if (cell.col_index === 0 && cell.bbox_pct) {
|
||||
rowYs.add(cell.bbox_pct.y)
|
||||
}
|
||||
}
|
||||
return Array.from(rowYs).map((y, i) => (
|
||||
<div
|
||||
key={`row-${i}`}
|
||||
className="absolute left-0 right-0"
|
||||
style={{
|
||||
top: `${y}%`,
|
||||
height: '1px',
|
||||
backgroundColor: 'rgba(0,0,0,0.06)',
|
||||
}}
|
||||
/>
|
||||
))
|
||||
})()}
|
||||
|
||||
{/* Cell texts — black on white, font size derived from cell height */}
|
||||
{session.cells.map(cell => {
|
||||
if (!cell.bbox_pct || !cell.text) return null
|
||||
// Container height in px = reconWidth * aspect
|
||||
// Cell height in px = containerHeightPx * (bbox_pct.h / 100)
|
||||
// Font size ≈ 70% of cell height
|
||||
const containerH = reconWidth * aspect
|
||||
const cellHeightPx = containerH * (cell.bbox_pct.h / 100)
|
||||
const fontSize = Math.max(6, cellHeightPx * 0.7)
|
||||
return (
|
||||
<span
|
||||
key={cell.cell_id}
|
||||
className="absolute leading-none overflow-hidden whitespace-nowrap"
|
||||
style={{
|
||||
left: `${cell.bbox_pct.x}%`,
|
||||
top: `${cell.bbox_pct.y}%`,
|
||||
width: `${cell.bbox_pct.w}%`,
|
||||
height: `${cell.bbox_pct.h}%`,
|
||||
color: '#1a1a1a',
|
||||
fontSize: `${fontSize}px`,
|
||||
fontWeight: cell.is_bold ? 'bold' : 'normal',
|
||||
fontFamily: "'Liberation Sans', 'DejaVu Sans', Arial, sans-serif",
|
||||
display: 'flex',
|
||||
alignItems: 'center',
|
||||
padding: '0 1px',
|
||||
}}
|
||||
title={`${cell.cell_id}: ${cell.text}`}
|
||||
>
|
||||
{cell.text}
|
||||
</span>
|
||||
)
|
||||
})}
|
||||
|
||||
{/* Generated images at region positions */}
|
||||
{imageRegions.map((region, i) => (
|
||||
<div
|
||||
key={`region-${i}`}
|
||||
className="absolute border-2 border-dashed border-indigo-400"
|
||||
style={{
|
||||
left: `${region.bbox_pct.x}%`,
|
||||
top: `${region.bbox_pct.y}%`,
|
||||
width: `${region.bbox_pct.w}%`,
|
||||
height: `${region.bbox_pct.h}%`,
|
||||
}}
|
||||
>
|
||||
{region.image_b64 ? (
|
||||
<img src={region.image_b64} alt={region.description} className="w-full h-full object-cover" />
|
||||
) : (
|
||||
<div className="w-full h-full flex items-center justify-center bg-indigo-50/50 text-indigo-400 text-[0.5em]">
|
||||
{region.generating ? '...' : `Bild ${i + 1}`}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
))}
|
||||
|
||||
{/* Drawing rectangle */}
|
||||
{dragStart && dragEnd && (
|
||||
<div
|
||||
className="absolute border-2 border-dashed border-red-500 bg-red-100/20 pointer-events-none"
|
||||
style={{
|
||||
left: `${Math.min(dragStart.x, dragEnd.x)}%`,
|
||||
top: `${Math.min(dragStart.y, dragEnd.y)}%`,
|
||||
width: `${Math.abs(dragEnd.x - dragStart.x)}%`,
|
||||
height: `${Math.abs(dragEnd.y - dragStart.y)}%`,
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Image regions panel */}
|
||||
{imageRegions.length > 0 && (
|
||||
<div className="border rounded-lg dark:border-gray-700 p-4">
|
||||
<h4 className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-3">
|
||||
Bildbereiche ({imageRegions.length} gefunden)
|
||||
</h4>
|
||||
<div className="space-y-3">
|
||||
{imageRegions.map((region, i) => (
|
||||
<div key={i} className="flex items-start gap-3 p-3 bg-gray-50 dark:bg-gray-800 rounded-lg">
|
||||
{/* Preview thumbnail */}
|
||||
<div className="w-16 h-16 flex-shrink-0 border rounded dark:border-gray-600 overflow-hidden bg-white">
|
||||
{region.image_b64 ? (
|
||||
<img src={region.image_b64} alt="" className="w-full h-full object-cover" />
|
||||
) : (
|
||||
<div className="w-full h-full flex items-center justify-center text-gray-400 text-xs">
|
||||
{Math.round(region.bbox_pct.w)}x{Math.round(region.bbox_pct.h)}%
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Prompt + controls */}
|
||||
<div className="flex-1 min-w-0 space-y-2">
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-xs text-gray-500 dark:text-gray-400 flex-shrink-0">
|
||||
Bereich {i + 1}:
|
||||
</span>
|
||||
<input
|
||||
type="text"
|
||||
value={region.prompt}
|
||||
onChange={e => {
|
||||
setImageRegions(prev => prev.map((r, j) =>
|
||||
j === i ? { ...r, prompt: e.target.value } : r
|
||||
))
|
||||
}}
|
||||
placeholder="Beschreibung / Prompt..."
|
||||
className="flex-1 text-sm px-2 py-1 border rounded dark:border-gray-600 dark:bg-gray-700 dark:text-white"
|
||||
/>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
<select
|
||||
value={region.style}
|
||||
onChange={e => {
|
||||
setImageRegions(prev => prev.map((r, j) =>
|
||||
j === i ? { ...r, style: e.target.value as ImageStyle } : r
|
||||
))
|
||||
}}
|
||||
className="text-sm px-2 py-1 border rounded dark:border-gray-600 dark:bg-gray-700 dark:text-white"
|
||||
>
|
||||
{STYLES.map(s => (
|
||||
<option key={s.value} value={s.value}>{s.label}</option>
|
||||
))}
|
||||
</select>
|
||||
<button
|
||||
onClick={() => handleGenerateImage(i)}
|
||||
disabled={!!region.generating || !region.prompt}
|
||||
className="px-3 py-1 text-sm bg-teal-600 text-white rounded hover:bg-teal-700 disabled:opacity-50"
|
||||
>
|
||||
{region.generating ? 'Generiere...' : 'Generieren'}
|
||||
</button>
|
||||
<button
|
||||
onClick={() => handleRemoveRegion(i)}
|
||||
className="px-2 py-1 text-sm text-red-600 hover:bg-red-50 dark:hover:bg-red-900/20 rounded"
|
||||
>
|
||||
Entfernen
|
||||
</button>
|
||||
</div>
|
||||
{region.description && region.description !== region.prompt && (
|
||||
<p className="text-xs text-gray-400">{region.description}</p>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Notes and score */}
|
||||
<div className="border rounded-lg dark:border-gray-700 p-4 space-y-3">
|
||||
<div className="flex items-center gap-4">
|
||||
<label className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||
Bewertung (1-10):
|
||||
</label>
|
||||
<input
|
||||
type="number"
|
||||
min={1}
|
||||
max={10}
|
||||
value={score ?? ''}
|
||||
onChange={e => setScore(e.target.value ? parseInt(e.target.value) : null)}
|
||||
className="w-20 text-sm px-2 py-1 border rounded dark:border-gray-600 dark:bg-gray-700 dark:text-white"
|
||||
/>
|
||||
<div className="flex gap-1">
|
||||
{[1, 2, 3, 4, 5, 6, 7, 8, 9, 10].map(v => (
|
||||
<button
|
||||
key={v}
|
||||
onClick={() => setScore(v)}
|
||||
className={`w-7 h-7 text-xs rounded ${score === v ? 'bg-teal-600 text-white' : 'bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-400 hover:bg-gray-200 dark:hover:bg-gray-600'}`}
|
||||
>
|
||||
{v}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
<div>
|
||||
<label className="text-sm font-medium text-gray-700 dark:text-gray-300 block mb-1">
|
||||
Notizen:
|
||||
</label>
|
||||
<textarea
|
||||
value={notes}
|
||||
onChange={e => setNotes(e.target.value)}
|
||||
rows={3}
|
||||
placeholder="Anmerkungen zur Qualitaet der Rekonstruktion..."
|
||||
className="w-full text-sm px-3 py-2 border rounded dark:border-gray-600 dark:bg-gray-700 dark:text-white"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Actions — sticky bottom bar */}
|
||||
<div className="sticky bottom-0 bg-white dark:bg-gray-900 border-t dark:border-gray-700 py-3 px-1 -mx-1 flex items-center justify-between">
|
||||
<div className="text-sm text-gray-500 dark:text-gray-400">
|
||||
{status === 'saved' && <span className="text-green-600 dark:text-green-400">Validierung gespeichert</span>}
|
||||
{status === 'saving' && <span>Speichere...</span>}
|
||||
</div>
|
||||
<div className="flex items-center gap-3">
|
||||
<button
|
||||
onClick={handleSave}
|
||||
disabled={status === 'saving'}
|
||||
className="px-4 py-2 text-sm bg-gray-600 text-white rounded hover:bg-gray-700 disabled:opacity-50"
|
||||
>
|
||||
Speichern
|
||||
</button>
|
||||
<button
|
||||
onClick={async () => {
|
||||
await handleSave()
|
||||
onNext()
|
||||
}}
|
||||
disabled={status === 'saving'}
|
||||
className="px-4 py-2 text-sm bg-teal-600 text-white rounded hover:bg-teal-700 disabled:opacity-50"
|
||||
>
|
||||
Abschliessen
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -1,922 +0,0 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
|
||||
import type { GridCell, GridResult, WordEntry, ColumnMeta } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
import { usePixelWordPositions } from './usePixelWordPositions'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
interface LlmChange {
|
||||
row_index: number
|
||||
field: 'english' | 'german' | 'example'
|
||||
old: string
|
||||
new: string
|
||||
}
|
||||
|
||||
interface StepLlmReviewProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
}
|
||||
|
||||
interface ReviewMeta {
|
||||
total_entries: number
|
||||
to_review: number
|
||||
skipped: number
|
||||
model: string
|
||||
skipped_indices?: number[]
|
||||
}
|
||||
|
||||
interface StreamProgress {
|
||||
current: number
|
||||
total: number
|
||||
}
|
||||
|
||||
const FIELD_LABELS: Record<string, string> = {
|
||||
english: 'EN',
|
||||
german: 'DE',
|
||||
example: 'Beispiel',
|
||||
source_page: 'Seite',
|
||||
marker: 'Marker',
|
||||
text: 'Text',
|
||||
}
|
||||
|
||||
/** Map column type to WordEntry field name */
|
||||
const COL_TYPE_TO_FIELD: Record<string, string> = {
|
||||
column_en: 'english',
|
||||
column_de: 'german',
|
||||
column_example: 'example',
|
||||
page_ref: 'source_page',
|
||||
column_marker: 'marker',
|
||||
column_text: 'text',
|
||||
}
|
||||
|
||||
/** Column type → color class */
|
||||
const COL_TYPE_COLOR: Record<string, string> = {
|
||||
column_en: 'text-blue-600 dark:text-blue-400',
|
||||
column_de: 'text-green-600 dark:text-green-400',
|
||||
column_example: 'text-orange-600 dark:text-orange-400',
|
||||
page_ref: 'text-cyan-600 dark:text-cyan-400',
|
||||
column_marker: 'text-gray-500 dark:text-gray-400',
|
||||
column_text: 'text-gray-700 dark:text-gray-300',
|
||||
}
|
||||
|
||||
type RowStatus = 'pending' | 'active' | 'reviewed' | 'corrected' | 'skipped'
|
||||
|
||||
export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
|
||||
// Core state
|
||||
const [status, setStatus] = useState<'idle' | 'loading' | 'ready' | 'running' | 'done' | 'error' | 'applied'>('idle')
|
||||
const [meta, setMeta] = useState<ReviewMeta | null>(null)
|
||||
const [changes, setChanges] = useState<LlmChange[]>([])
|
||||
const [progress, setProgress] = useState<StreamProgress | null>(null)
|
||||
const [totalDuration, setTotalDuration] = useState(0)
|
||||
const [error, setError] = useState('')
|
||||
const [accepted, setAccepted] = useState<Set<number>>(new Set())
|
||||
const [applying, setApplying] = useState(false)
|
||||
|
||||
// Full vocab table state
|
||||
const [vocabEntries, setVocabEntries] = useState<WordEntry[]>([])
|
||||
const [columnsUsed, setColumnsUsed] = useState<ColumnMeta[]>([])
|
||||
const [activeRowIndices, setActiveRowIndices] = useState<Set<number>>(new Set())
|
||||
const [reviewedRows, setReviewedRows] = useState<Set<number>>(new Set())
|
||||
const [skippedRows, setSkippedRows] = useState<Set<number>>(new Set())
|
||||
const [correctedMap, setCorrectedMap] = useState<Map<number, LlmChange[]>>(new Map())
|
||||
|
||||
// Image
|
||||
const [imageNaturalSize, setImageNaturalSize] = useState<{ w: number; h: number } | null>(null)
|
||||
|
||||
// Overlay view state
|
||||
const [viewMode, setViewMode] = useState<'table' | 'overlay'>('table')
|
||||
const [fontScale, setFontScale] = useState(0.7)
|
||||
const [leftPaddingPct, setLeftPaddingPct] = useState(0)
|
||||
const [globalBold, setGlobalBold] = useState(false)
|
||||
const [cells, setCells] = useState<GridCell[]>([])
|
||||
const reconRef = useRef<HTMLDivElement>(null)
|
||||
const [reconWidth, setReconWidth] = useState(0)
|
||||
|
||||
// Pixel-analysed word positions via shared hook
|
||||
const overlayImageUrl = sessionId
|
||||
? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
|
||||
: ''
|
||||
const cellWordPositions = usePixelWordPositions(overlayImageUrl, cells, viewMode === 'overlay')
|
||||
|
||||
const tableRef = useRef<HTMLDivElement>(null)
|
||||
const activeRowRef = useRef<HTMLTableRowElement>(null)
|
||||
|
||||
// Track reconstruction container width for font size calculation
|
||||
useEffect(() => {
|
||||
const el = reconRef.current
|
||||
if (!el) return
|
||||
const obs = new ResizeObserver(entries => {
|
||||
for (const entry of entries) setReconWidth(entry.contentRect.width)
|
||||
})
|
||||
obs.observe(el)
|
||||
return () => obs.disconnect()
|
||||
}, [viewMode])
|
||||
|
||||
// Load session data on mount
|
||||
useEffect(() => {
|
||||
if (!sessionId) return
|
||||
loadSessionData()
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [sessionId])
|
||||
|
||||
const loadSessionData = async () => {
|
||||
if (!sessionId) return
|
||||
setStatus('loading')
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
|
||||
if (!res.ok) throw new Error(`HTTP ${res.status}`)
|
||||
const data = await res.json()
|
||||
|
||||
const wordResult: GridResult | undefined = data.word_result
|
||||
if (!wordResult) {
|
||||
setError('Keine Worterkennungsdaten gefunden. Bitte zuerst Schritt 5 abschliessen.')
|
||||
setStatus('error')
|
||||
return
|
||||
}
|
||||
|
||||
const entries = wordResult.vocab_entries || wordResult.entries || []
|
||||
setVocabEntries(entries)
|
||||
setColumnsUsed(wordResult.columns_used || [])
|
||||
setCells(wordResult.cells || [])
|
||||
|
||||
// Check if LLM review was already run
|
||||
const llmReview = wordResult.llm_review
|
||||
if (llmReview && llmReview.changes) {
|
||||
const existingChanges: LlmChange[] = llmReview.changes as LlmChange[]
|
||||
setChanges(existingChanges)
|
||||
setTotalDuration(llmReview.duration_ms || 0)
|
||||
|
||||
// Mark all rows as reviewed
|
||||
const allReviewed = new Set(entries.map((_: WordEntry, i: number) => i))
|
||||
setReviewedRows(allReviewed)
|
||||
|
||||
// Build corrected map
|
||||
const cMap = new Map<number, LlmChange[]>()
|
||||
for (const c of existingChanges) {
|
||||
const existing = cMap.get(c.row_index) || []
|
||||
existing.push(c)
|
||||
cMap.set(c.row_index, existing)
|
||||
}
|
||||
setCorrectedMap(cMap)
|
||||
|
||||
// Default: all accepted
|
||||
setAccepted(new Set(existingChanges.map((_: LlmChange, i: number) => i)))
|
||||
|
||||
setMeta({
|
||||
total_entries: entries.length,
|
||||
to_review: llmReview.entries_corrected !== undefined ? entries.length : entries.length,
|
||||
skipped: 0,
|
||||
model: llmReview.model_used || 'unknown',
|
||||
})
|
||||
setStatus('done')
|
||||
} else {
|
||||
setStatus('ready')
|
||||
}
|
||||
} catch (e: unknown) {
|
||||
setError(e instanceof Error ? e.message : String(e))
|
||||
setStatus('error')
|
||||
}
|
||||
}
|
||||
|
||||
const runReview = useCallback(async () => {
|
||||
if (!sessionId) return
|
||||
setStatus('running')
|
||||
setError('')
|
||||
setChanges([])
|
||||
setProgress(null)
|
||||
setMeta(null)
|
||||
setTotalDuration(0)
|
||||
setActiveRowIndices(new Set())
|
||||
setReviewedRows(new Set())
|
||||
setSkippedRows(new Set())
|
||||
setCorrectedMap(new Map())
|
||||
|
||||
try {
|
||||
const res = await fetch(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/llm-review?stream=true`,
|
||||
{ method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({}) },
|
||||
)
|
||||
|
||||
if (!res.ok) {
|
||||
const data = await res.json().catch(() => ({}))
|
||||
throw new Error(data.detail || `HTTP ${res.status}`)
|
||||
}
|
||||
|
||||
const reader = res.body!.getReader()
|
||||
const decoder = new TextDecoder()
|
||||
let buffer = ''
|
||||
let allChanges: LlmChange[] = []
|
||||
let allReviewed = new Set<number>()
|
||||
let allSkipped = new Set<number>()
|
||||
let cMap = new Map<number, LlmChange[]>()
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read()
|
||||
if (done) break
|
||||
buffer += decoder.decode(value, { stream: true })
|
||||
|
||||
while (buffer.includes('\n\n')) {
|
||||
const idx = buffer.indexOf('\n\n')
|
||||
const chunk = buffer.slice(0, idx).trim()
|
||||
buffer = buffer.slice(idx + 2)
|
||||
|
||||
if (!chunk.startsWith('data: ')) continue
|
||||
const dataStr = chunk.slice(6)
|
||||
|
||||
let event: any
|
||||
try { event = JSON.parse(dataStr) } catch { continue }
|
||||
|
||||
if (event.type === 'meta') {
|
||||
setMeta({
|
||||
total_entries: event.total_entries,
|
||||
to_review: event.to_review,
|
||||
skipped: event.skipped,
|
||||
model: event.model,
|
||||
skipped_indices: event.skipped_indices,
|
||||
})
|
||||
// Mark skipped rows
|
||||
if (event.skipped_indices) {
|
||||
allSkipped = new Set(event.skipped_indices)
|
||||
setSkippedRows(allSkipped)
|
||||
}
|
||||
}
|
||||
|
||||
if (event.type === 'batch') {
|
||||
const batchChanges: LlmChange[] = event.changes || []
|
||||
const batchRows: number[] = event.entries_reviewed || []
|
||||
|
||||
// Update active rows (currently being reviewed)
|
||||
setActiveRowIndices(new Set(batchRows))
|
||||
|
||||
// Accumulate changes
|
||||
allChanges = [...allChanges, ...batchChanges]
|
||||
setChanges(allChanges)
|
||||
setProgress(event.progress)
|
||||
|
||||
// Update corrected map
|
||||
for (const c of batchChanges) {
|
||||
const existing = cMap.get(c.row_index) || []
|
||||
existing.push(c)
|
||||
cMap.set(c.row_index, [...existing])
|
||||
}
|
||||
setCorrectedMap(new Map(cMap))
|
||||
|
||||
// Mark batch rows as reviewed
|
||||
for (const r of batchRows) {
|
||||
allReviewed.add(r)
|
||||
}
|
||||
setReviewedRows(new Set(allReviewed))
|
||||
|
||||
// Scroll to active row in table
|
||||
setTimeout(() => {
|
||||
activeRowRef.current?.scrollIntoView({ behavior: 'smooth', block: 'center' })
|
||||
}, 50)
|
||||
}
|
||||
|
||||
if (event.type === 'complete') {
|
||||
setActiveRowIndices(new Set())
|
||||
setTotalDuration(event.duration_ms)
|
||||
setAccepted(new Set(allChanges.map((_: LlmChange, i: number) => i)))
|
||||
// Mark all non-skipped as reviewed
|
||||
const allEntryIndices = vocabEntries.map((_: WordEntry, i: number) => i)
|
||||
for (const i of allEntryIndices) {
|
||||
if (!allSkipped.has(i)) allReviewed.add(i)
|
||||
}
|
||||
setReviewedRows(new Set(allReviewed))
|
||||
setStatus('done')
|
||||
}
|
||||
|
||||
if (event.type === 'error') {
|
||||
throw new Error(event.detail || 'Unbekannter Fehler')
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If stream ended without complete event
|
||||
if (allChanges.length === 0) {
|
||||
setStatus('done')
|
||||
}
|
||||
} catch (e: unknown) {
|
||||
const msg = e instanceof Error ? e.message : String(e)
|
||||
setError(msg)
|
||||
setStatus('error')
|
||||
}
|
||||
}, [sessionId, vocabEntries])
|
||||
|
||||
const toggleChange = (index: number) => {
|
||||
setAccepted(prev => {
|
||||
const next = new Set(prev)
|
||||
if (next.has(index)) next.delete(index)
|
||||
else next.add(index)
|
||||
return next
|
||||
})
|
||||
}
|
||||
|
||||
const toggleAll = () => {
|
||||
if (accepted.size === changes.length) {
|
||||
setAccepted(new Set())
|
||||
} else {
|
||||
setAccepted(new Set(changes.map((_: LlmChange, i: number) => i)))
|
||||
}
|
||||
}
|
||||
|
||||
const applyChanges = useCallback(async () => {
|
||||
if (!sessionId) return
|
||||
setApplying(true)
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/llm-review/apply`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ accepted_indices: Array.from(accepted) }),
|
||||
})
|
||||
if (!res.ok) {
|
||||
const data = await res.json().catch(() => ({}))
|
||||
throw new Error(data.detail || `HTTP ${res.status}`)
|
||||
}
|
||||
setStatus('applied')
|
||||
} catch (e: unknown) {
|
||||
setError(e instanceof Error ? e.message : String(e))
|
||||
} finally {
|
||||
setApplying(false)
|
||||
}
|
||||
}, [sessionId, accepted])
|
||||
|
||||
const getRowStatus = (rowIndex: number): RowStatus => {
|
||||
if (activeRowIndices.has(rowIndex)) return 'active'
|
||||
if (skippedRows.has(rowIndex)) return 'skipped'
|
||||
if (correctedMap.has(rowIndex)) return 'corrected'
|
||||
if (reviewedRows.has(rowIndex)) return 'reviewed'
|
||||
return 'pending'
|
||||
}
|
||||
|
||||
const dewarpedUrl = sessionId
|
||||
? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
|
||||
: ''
|
||||
|
||||
// Snap all cells in the same column to consistent x/w positions
|
||||
// Uses the median x and width per col_index so columns align vertically
|
||||
const colPositions = useMemo(() => {
|
||||
const byCol = new Map<number, { xs: number[]; ws: number[] }>()
|
||||
for (const cell of cells) {
|
||||
if (!cell.bbox_pct) continue
|
||||
const entry = byCol.get(cell.col_index) || { xs: [], ws: [] }
|
||||
entry.xs.push(cell.bbox_pct.x)
|
||||
entry.ws.push(cell.bbox_pct.w)
|
||||
byCol.set(cell.col_index, entry)
|
||||
}
|
||||
const result = new Map<number, { x: number; w: number }>()
|
||||
for (const [colIdx, { xs, ws }] of byCol) {
|
||||
xs.sort((a, b) => a - b)
|
||||
ws.sort((a, b) => a - b)
|
||||
const medianX = xs[Math.floor(xs.length / 2)]
|
||||
const medianW = ws[Math.floor(ws.length / 2)]
|
||||
result.set(colIdx, { x: medianX, w: medianW })
|
||||
}
|
||||
return result
|
||||
}, [cells])
|
||||
|
||||
if (!sessionId) {
|
||||
return <div className="text-center py-12 text-gray-400">Bitte zuerst eine Session auswaehlen.</div>
|
||||
}
|
||||
|
||||
// --- Loading session data ---
|
||||
if (status === 'loading' || status === 'idle') {
|
||||
return (
|
||||
<div className="flex items-center gap-3 justify-center py-12">
|
||||
<div className="animate-spin rounded-full h-5 w-5 border-b-2 border-teal-500" />
|
||||
<span className="text-gray-500">Session-Daten werden geladen...</span>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// --- Error ---
|
||||
if (status === 'error') {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-12 text-center">
|
||||
<div className="text-5xl mb-4">⚠️</div>
|
||||
<h3 className="text-lg font-medium text-red-600 dark:text-red-400 mb-2">Fehler bei OCR-Zeichenkorrektur</h3>
|
||||
<p className="text-sm text-gray-500 dark:text-gray-400 max-w-lg mb-4">{error}</p>
|
||||
<div className="flex gap-3">
|
||||
<button onClick={() => { setError(''); loadSessionData() }}
|
||||
className="px-5 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors text-sm">
|
||||
Erneut versuchen
|
||||
</button>
|
||||
<button onClick={onNext}
|
||||
className="px-5 py-2 bg-gray-200 dark:bg-gray-700 text-gray-700 dark:text-gray-300 rounded-lg hover:bg-gray-300 dark:hover:bg-gray-600 transition-colors text-sm">
|
||||
Ueberspringen →
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// --- Applied ---
|
||||
if (status === 'applied') {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-12 text-center">
|
||||
<div className="text-5xl mb-4">✅</div>
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">Korrekturen uebernommen</h3>
|
||||
<p className="text-sm text-gray-500 dark:text-gray-400 mb-6">
|
||||
{accepted.size} von {changes.length} Korrekturen wurden angewendet.
|
||||
</p>
|
||||
<button onClick={onNext}
|
||||
className="px-6 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors font-medium">
|
||||
Weiter →
|
||||
</button>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// Active entry for highlighting on image
|
||||
const activeEntry = vocabEntries.find((_: WordEntry, i: number) => activeRowIndices.has(i))
|
||||
|
||||
const pct = progress ? Math.round((progress.current / progress.total) * 100) : 0
|
||||
|
||||
/** Handle inline edit of a cell in the overlay */
|
||||
const handleCellEdit = (cellId: string, rowIndex: number, newText: string | null) => {
|
||||
if (newText === null) return
|
||||
setCells(prev => prev.map(c => c.cell_id === cellId ? { ...c, text: newText } : c))
|
||||
// Also update vocabEntries if this cell maps to a known field
|
||||
const cell = cells.find(c => c.cell_id === cellId)
|
||||
if (cell) {
|
||||
const field = COL_TYPE_TO_FIELD[cell.col_type]
|
||||
if (field) {
|
||||
setVocabEntries(prev => prev.map((e, i) =>
|
||||
i === rowIndex ? { ...e, [field]: newText } : e
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// --- Ready / Running / Done: 2-column layout ---
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Header */}
|
||||
<div className="flex items-center justify-between">
|
||||
<div>
|
||||
<h3 className="text-base font-medium text-gray-700 dark:text-gray-300">
|
||||
Schritt 6: Korrektur
|
||||
</h3>
|
||||
<p className="text-xs text-gray-400 mt-0.5">
|
||||
{status === 'ready' && `${vocabEntries.length} Eintraege bereit zur Pruefung`}
|
||||
{status === 'running' && meta && `${meta.model} · ${meta.to_review} zu pruefen, ${meta.skipped} uebersprungen`}
|
||||
{status === 'done' && (
|
||||
<>
|
||||
{changes.length} Korrektur{changes.length !== 1 ? 'en' : ''} gefunden
|
||||
{meta && <> · {meta.skipped} uebersprungen</>}
|
||||
{' '}· {totalDuration}ms · {meta?.model}
|
||||
</>
|
||||
)}
|
||||
</p>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
{status === 'ready' && (
|
||||
<button onClick={runReview}
|
||||
className="px-5 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors text-sm font-medium">
|
||||
Korrektur starten
|
||||
</button>
|
||||
)}
|
||||
{status === 'running' && (
|
||||
<div className="flex items-center gap-2 text-sm text-teal-600 dark:text-teal-400">
|
||||
<div className="animate-spin rounded-full h-4 w-4 border-b-2 border-teal-500" />
|
||||
{progress ? `${progress.current}/${progress.total}` : 'Startet...'}
|
||||
</div>
|
||||
)}
|
||||
{status === 'done' && changes.length > 0 && (
|
||||
<button onClick={toggleAll}
|
||||
className="text-xs px-3 py-1.5 border border-gray-300 dark:border-gray-600 rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 transition-colors text-gray-600 dark:text-gray-400">
|
||||
{accepted.size === changes.length ? 'Keine' : 'Alle'} auswaehlen
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Progress bar (while running) */}
|
||||
{status === 'running' && progress && (
|
||||
<div className="space-y-1">
|
||||
<div className="flex justify-between text-xs text-gray-400">
|
||||
<span>{progress.current} / {progress.total} Eintraege geprueft</span>
|
||||
<span>{pct}%</span>
|
||||
</div>
|
||||
<div className="w-full bg-gray-200 dark:bg-gray-700 rounded-full h-2">
|
||||
<div className="bg-teal-500 h-2 rounded-full transition-all duration-500" style={{ width: `${pct}%` }} />
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* View mode toggle */}
|
||||
<div className="flex items-center gap-1">
|
||||
<button
|
||||
onClick={() => setViewMode('table')}
|
||||
className={`px-3 py-1.5 text-xs rounded-l-lg border transition-colors ${
|
||||
viewMode === 'table'
|
||||
? 'bg-teal-600 text-white border-teal-600'
|
||||
: 'bg-white dark:bg-gray-800 text-gray-600 dark:text-gray-400 border-gray-300 dark:border-gray-600 hover:bg-gray-50 dark:hover:bg-gray-700'
|
||||
}`}
|
||||
>
|
||||
Tabelle
|
||||
</button>
|
||||
<button
|
||||
onClick={() => setViewMode('overlay')}
|
||||
className={`px-3 py-1.5 text-xs rounded-r-lg border transition-colors ${
|
||||
viewMode === 'overlay'
|
||||
? 'bg-teal-600 text-white border-teal-600'
|
||||
: 'bg-white dark:bg-gray-800 text-gray-600 dark:text-gray-400 border-gray-300 dark:border-gray-600 hover:bg-gray-50 dark:hover:bg-gray-700'
|
||||
}`}
|
||||
>
|
||||
Overlay
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Overlay toolbar */}
|
||||
{viewMode === 'overlay' && (
|
||||
<div className="flex items-center gap-4 flex-wrap bg-gray-50 dark:bg-gray-800/50 rounded-lg px-3 py-2">
|
||||
<label className="flex items-center gap-2 text-xs text-gray-600 dark:text-gray-400">
|
||||
Schrift
|
||||
<input
|
||||
type="range" min={30} max={120} value={Math.round(fontScale * 100)}
|
||||
onChange={e => setFontScale(Number(e.target.value) / 100)}
|
||||
className="w-24 h-1 accent-teal-600"
|
||||
/>
|
||||
<span className="w-8 text-right font-mono">{Math.round(fontScale * 100)}%</span>
|
||||
</label>
|
||||
<label className="flex items-center gap-2 text-xs text-gray-600 dark:text-gray-400">
|
||||
Einrueckung
|
||||
<input
|
||||
type="range" min={0} max={20} step={0.5} value={leftPaddingPct}
|
||||
onChange={e => setLeftPaddingPct(Number(e.target.value))}
|
||||
className="w-24 h-1 accent-teal-600"
|
||||
/>
|
||||
<span className="w-8 text-right font-mono">{leftPaddingPct}%</span>
|
||||
</label>
|
||||
<button
|
||||
onClick={() => setGlobalBold(b => !b)}
|
||||
className={`px-2 py-1 text-xs rounded border transition-colors font-bold ${
|
||||
globalBold
|
||||
? 'bg-teal-600 text-white border-teal-600'
|
||||
: 'bg-white dark:bg-gray-700 text-gray-600 dark:text-gray-400 border-gray-300 dark:border-gray-600'
|
||||
}`}
|
||||
>
|
||||
B
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* 2-column layout: Image + Table/Overlay */}
|
||||
<div className={`grid gap-4 ${viewMode === 'overlay' ? 'grid-cols-2' : 'grid-cols-3'}`}>
|
||||
{/* Left: Dewarped Image with highlight overlay */}
|
||||
<div className="col-span-1">
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Originalbild
|
||||
</div>
|
||||
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900 relative sticky top-4">
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
<img
|
||||
src={dewarpedUrl}
|
||||
alt="Dewarped"
|
||||
className="w-full h-auto"
|
||||
onLoad={(e) => {
|
||||
const img = e.target as HTMLImageElement
|
||||
setImageNaturalSize({ w: img.naturalWidth, h: img.naturalHeight })
|
||||
}}
|
||||
/>
|
||||
{/* Highlight overlay for active row */}
|
||||
{activeEntry?.bbox && (
|
||||
<div
|
||||
className="absolute border-2 border-yellow-400 bg-yellow-400/20 pointer-events-none animate-pulse"
|
||||
style={{
|
||||
left: `${activeEntry.bbox.x}%`,
|
||||
top: `${activeEntry.bbox.y}%`,
|
||||
width: `${activeEntry.bbox.w}%`,
|
||||
height: `${activeEntry.bbox.h}%`,
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Right: Table or Overlay */}
|
||||
<div className={viewMode === 'table' ? 'col-span-2' : 'col-span-1'} ref={tableRef}>
|
||||
{viewMode === 'table' ? (
|
||||
<>
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
{columnsUsed.length === 1 && columnsUsed[0]?.type === 'column_text' ? 'Tabelle' : 'Vokabeltabelle'} ({vocabEntries.length} Eintraege)
|
||||
</div>
|
||||
<div className="border border-gray-200 dark:border-gray-700 rounded-lg overflow-hidden">
|
||||
<div className="max-h-[70vh] overflow-y-auto">
|
||||
<table className="w-full text-sm">
|
||||
<thead className="sticky top-0 z-10">
|
||||
<tr className="bg-gray-50 dark:bg-gray-800 border-b border-gray-200 dark:border-gray-700">
|
||||
<th className="px-2 py-2 text-left text-gray-500 dark:text-gray-400 font-medium w-10">#</th>
|
||||
{columnsUsed.length > 0 ? (
|
||||
columnsUsed.map((col, i) => {
|
||||
const field = COL_TYPE_TO_FIELD[col.type]
|
||||
if (!field) return null
|
||||
return (
|
||||
<th key={i} className={`px-2 py-2 text-left font-medium ${COL_TYPE_COLOR[col.type] || 'text-gray-500 dark:text-gray-400'}`}>
|
||||
{FIELD_LABELS[field] || field}
|
||||
</th>
|
||||
)
|
||||
})
|
||||
) : (
|
||||
<>
|
||||
<th className="px-2 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">EN</th>
|
||||
<th className="px-2 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">DE</th>
|
||||
<th className="px-2 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">Beispiel</th>
|
||||
</>
|
||||
)}
|
||||
<th className="px-2 py-2 text-center text-gray-500 dark:text-gray-400 font-medium w-16">Status</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{vocabEntries.map((entry, idx) => {
|
||||
const rowStatus = getRowStatus(idx)
|
||||
const rowChanges = correctedMap.get(idx)
|
||||
|
||||
const rowBg = {
|
||||
pending: '',
|
||||
active: 'bg-yellow-50 dark:bg-yellow-900/20',
|
||||
reviewed: '',
|
||||
corrected: 'bg-teal-50/50 dark:bg-teal-900/10',
|
||||
skipped: 'bg-gray-50 dark:bg-gray-800/50',
|
||||
}[rowStatus]
|
||||
|
||||
return (
|
||||
<tr
|
||||
key={idx}
|
||||
ref={rowStatus === 'active' ? activeRowRef : undefined}
|
||||
className={`border-b border-gray-100 dark:border-gray-700/50 ${rowBg} ${
|
||||
rowStatus === 'active' ? 'ring-1 ring-yellow-400 ring-inset' : ''
|
||||
}`}
|
||||
>
|
||||
<td className="px-2 py-1.5 text-gray-400 font-mono text-xs">{idx}</td>
|
||||
{columnsUsed.length > 0 ? (
|
||||
columnsUsed.map((col, i) => {
|
||||
const field = COL_TYPE_TO_FIELD[col.type]
|
||||
if (!field) return null
|
||||
const text = (entry as Record<string, unknown>)[field] as string || ''
|
||||
return (
|
||||
<td key={i} className="px-2 py-1.5 text-xs">
|
||||
<CellContent text={text} field={field} rowChanges={rowChanges} />
|
||||
</td>
|
||||
)
|
||||
})
|
||||
) : (
|
||||
<>
|
||||
<td className="px-2 py-1.5">
|
||||
<CellContent text={entry.english} field="english" rowChanges={rowChanges} />
|
||||
</td>
|
||||
<td className="px-2 py-1.5">
|
||||
<CellContent text={entry.german} field="german" rowChanges={rowChanges} />
|
||||
</td>
|
||||
<td className="px-2 py-1.5 text-xs">
|
||||
<CellContent text={entry.example} field="example" rowChanges={rowChanges} />
|
||||
</td>
|
||||
</>
|
||||
)}
|
||||
<td className="px-2 py-1.5 text-center">
|
||||
<StatusIcon status={rowStatus} />
|
||||
</td>
|
||||
</tr>
|
||||
)
|
||||
})}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Text-Rekonstruktion ({cells.filter(c => c.text).length} Zellen)
|
||||
</div>
|
||||
<div className="border border-gray-200 dark:border-gray-700 rounded-lg overflow-hidden bg-white dark:bg-white">
|
||||
<div
|
||||
ref={reconRef}
|
||||
className="relative"
|
||||
style={{
|
||||
aspectRatio: imageNaturalSize ? `${imageNaturalSize.w} / ${imageNaturalSize.h}` : '3 / 4',
|
||||
}}
|
||||
>
|
||||
{cells.map(cell => {
|
||||
if (!cell.bbox_pct || !cell.text) return null
|
||||
const col = colPositions.get(cell.col_index)
|
||||
const cellX = col?.x ?? cell.bbox_pct.x
|
||||
const cellW = col?.w ?? cell.bbox_pct.w
|
||||
const aspect = imageNaturalSize ? imageNaturalSize.h / imageNaturalSize.w : 4 / 3
|
||||
const containerH = reconWidth * aspect
|
||||
const cellHeightPx = containerH * (cell.bbox_pct.h / 100)
|
||||
|
||||
const wordPos = cellWordPositions.get(cell.cell_id)
|
||||
|
||||
// Pixel-analysed: render word-groups at detected positions
|
||||
if (wordPos) {
|
||||
return wordPos.map((wp, i) => {
|
||||
// Auto font-size from pixel analysis, scaled by user slider
|
||||
const autoFontPx = cellHeightPx * wp.fontRatio * fontScale
|
||||
const fs = Math.max(6, autoFontPx)
|
||||
return (
|
||||
<span
|
||||
key={`${cell.cell_id}_${i}`}
|
||||
className="absolute leading-none pointer-events-none select-none"
|
||||
style={{
|
||||
left: `${wp.xPct}%`,
|
||||
top: `${cell.bbox_pct.y}%`,
|
||||
width: `${wp.wPct}%`,
|
||||
height: `${cell.bbox_pct.h}%`,
|
||||
fontSize: `${fs}px`,
|
||||
fontWeight: globalBold ? 'bold' : (cell.is_bold ? 'bold' : 'normal'),
|
||||
fontFamily: "'Liberation Sans', Arial, sans-serif",
|
||||
display: 'flex',
|
||||
alignItems: 'center',
|
||||
whiteSpace: 'nowrap',
|
||||
overflow: 'visible',
|
||||
color: '#1a1a1a',
|
||||
}}
|
||||
>
|
||||
{wp.text}
|
||||
</span>
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
// Fallback: no pixel data — single span for entire cell
|
||||
const fontSize = Math.max(6, cellHeightPx * fontScale)
|
||||
return (
|
||||
<span
|
||||
key={cell.cell_id}
|
||||
className="absolute leading-none pointer-events-none select-none"
|
||||
style={{
|
||||
left: `${cellX}%`,
|
||||
top: `${cell.bbox_pct.y}%`,
|
||||
width: `${cellW}%`,
|
||||
height: `${cell.bbox_pct.h}%`,
|
||||
fontSize: `${fontSize}px`,
|
||||
fontWeight: globalBold ? 'bold' : (cell.is_bold ? 'bold' : 'normal'),
|
||||
paddingLeft: `${leftPaddingPct}%`,
|
||||
fontFamily: "'Liberation Sans', Arial, sans-serif",
|
||||
display: 'flex',
|
||||
alignItems: 'center',
|
||||
whiteSpace: 'pre',
|
||||
overflow: 'visible',
|
||||
color: '#1a1a1a',
|
||||
}}
|
||||
>
|
||||
{cell.text}
|
||||
</span>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Done state: summary + actions */}
|
||||
{status === 'done' && (
|
||||
<div className="space-y-4">
|
||||
{/* Summary */}
|
||||
<div className="bg-gray-50 dark:bg-gray-800/50 rounded-lg p-3 text-xs text-gray-500 dark:text-gray-400">
|
||||
{changes.length === 0 ? (
|
||||
<span>Keine Korrekturen noetig — alle Eintraege sind korrekt.</span>
|
||||
) : (
|
||||
<span>
|
||||
{changes.length} Korrektur{changes.length !== 1 ? 'en' : ''} gefunden ·{' '}
|
||||
{accepted.size} ausgewaehlt ·{' '}
|
||||
{meta?.skipped || 0} uebersprungen (Lautschrift) ·{' '}
|
||||
{totalDuration}ms
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Corrections detail list (if any) */}
|
||||
{changes.length > 0 && (
|
||||
<div className="border border-gray-200 dark:border-gray-700 rounded-lg overflow-hidden">
|
||||
<div className="bg-gray-50 dark:bg-gray-800 px-3 py-2 border-b border-gray-200 dark:border-gray-700">
|
||||
<span className="text-xs font-medium text-gray-600 dark:text-gray-400">
|
||||
Korrekturvorschlaege ({accepted.size}/{changes.length} ausgewaehlt)
|
||||
</span>
|
||||
</div>
|
||||
<table className="w-full text-sm">
|
||||
<thead>
|
||||
<tr className="bg-gray-50/50 dark:bg-gray-800/50 border-b border-gray-200 dark:border-gray-700">
|
||||
<th className="w-10 px-3 py-1.5 text-center">
|
||||
<input type="checkbox" checked={accepted.size === changes.length} onChange={toggleAll}
|
||||
className="rounded border-gray-300 dark:border-gray-600" />
|
||||
</th>
|
||||
<th className="px-2 py-1.5 text-left text-gray-500 dark:text-gray-400 font-medium text-xs">Zeile</th>
|
||||
<th className="px-2 py-1.5 text-left text-gray-500 dark:text-gray-400 font-medium text-xs">Feld</th>
|
||||
<th className="px-2 py-1.5 text-left text-gray-500 dark:text-gray-400 font-medium text-xs">Vorher</th>
|
||||
<th className="px-2 py-1.5 text-left text-gray-500 dark:text-gray-400 font-medium text-xs">Nachher</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{changes.map((change, idx) => (
|
||||
<tr key={idx} className={`border-b border-gray-100 dark:border-gray-700/50 ${
|
||||
accepted.has(idx) ? 'bg-teal-50/50 dark:bg-teal-900/10' : ''
|
||||
}`}>
|
||||
<td className="px-3 py-1.5 text-center">
|
||||
<input type="checkbox" checked={accepted.has(idx)} onChange={() => toggleChange(idx)}
|
||||
className="rounded border-gray-300 dark:border-gray-600" />
|
||||
</td>
|
||||
<td className="px-2 py-1.5 text-gray-500 dark:text-gray-400 font-mono text-xs">R{change.row_index}</td>
|
||||
<td className="px-2 py-1.5">
|
||||
<span className="text-xs px-1.5 py-0.5 rounded bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-400">
|
||||
{FIELD_LABELS[change.field] || change.field}
|
||||
</span>
|
||||
</td>
|
||||
<td className="px-2 py-1.5"><span className="line-through text-red-500 dark:text-red-400 text-xs">{change.old}</span></td>
|
||||
<td className="px-2 py-1.5"><span className="text-green-600 dark:text-green-400 font-medium text-xs">{change.new}</span></td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Actions */}
|
||||
<div className="flex items-center justify-between pt-2">
|
||||
<p className="text-xs text-gray-400">
|
||||
{changes.length > 0 ? `${accepted.size} von ${changes.length} ausgewaehlt` : ''}
|
||||
</p>
|
||||
<div className="flex gap-3">
|
||||
{changes.length > 0 && (
|
||||
<button onClick={onNext}
|
||||
className="px-4 py-2 text-sm border border-gray-300 dark:border-gray-600 rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 transition-colors text-gray-600 dark:text-gray-400">
|
||||
Alle ablehnen
|
||||
</button>
|
||||
)}
|
||||
{changes.length > 0 ? (
|
||||
<button onClick={applyChanges} disabled={applying || accepted.size === 0}
|
||||
className="px-5 py-2 text-sm bg-teal-600 text-white rounded-lg hover:bg-teal-700 disabled:opacity-50 disabled:cursor-not-allowed transition-colors font-medium">
|
||||
{applying ? 'Wird uebernommen...' : `${accepted.size} Korrektur${accepted.size !== 1 ? 'en' : ''} uebernehmen`}
|
||||
</button>
|
||||
) : (
|
||||
<button onClick={onNext}
|
||||
className="px-6 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors font-medium">
|
||||
Weiter →
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
/** Cell content with inline diff for corrections */
|
||||
function CellContent({ text, field, rowChanges }: {
|
||||
text: string
|
||||
field: string
|
||||
rowChanges?: LlmChange[]
|
||||
}) {
|
||||
const change = rowChanges?.find(c => c.field === field)
|
||||
|
||||
if (!text && !change) {
|
||||
return <span className="text-gray-300 dark:text-gray-600">—</span>
|
||||
}
|
||||
|
||||
if (change) {
|
||||
return (
|
||||
<span>
|
||||
<span className="line-through text-red-400 dark:text-red-500 text-xs mr-1">{change.old}</span>
|
||||
<span className="text-green-600 dark:text-green-400 font-medium text-xs">{change.new}</span>
|
||||
</span>
|
||||
)
|
||||
}
|
||||
|
||||
return <span className="text-gray-700 dark:text-gray-300 text-xs">{text}</span>
|
||||
}
|
||||
|
||||
/** Status icon for each row */
|
||||
function StatusIcon({ status }: { status: RowStatus }) {
|
||||
switch (status) {
|
||||
case 'pending':
|
||||
return <span className="text-gray-300 dark:text-gray-600 text-xs">—</span>
|
||||
case 'active':
|
||||
return (
|
||||
<span className="inline-block w-3 h-3 rounded-full bg-yellow-400 animate-pulse" title="Wird geprueft" />
|
||||
)
|
||||
case 'reviewed':
|
||||
return (
|
||||
<svg className="w-4 h-4 text-green-500 inline-block" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" d="M5 13l4 4L19 7" />
|
||||
</svg>
|
||||
)
|
||||
case 'corrected':
|
||||
return (
|
||||
<span className="inline-flex items-center px-1.5 py-0.5 rounded text-[10px] font-medium bg-teal-100 dark:bg-teal-900/30 text-teal-700 dark:text-teal-400">
|
||||
korr.
|
||||
</span>
|
||||
)
|
||||
case 'skipped':
|
||||
return (
|
||||
<span className="inline-flex items-center px-1.5 py-0.5 rounded text-[10px] font-medium bg-gray-100 dark:bg-gray-700 text-gray-500 dark:text-gray-400">
|
||||
skip
|
||||
</span>
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -1,247 +0,0 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useState } from 'react'
|
||||
import type { OrientationResult, SessionInfo } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
import { ImageCompareView } from './ImageCompareView'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
interface StepOrientationProps {
|
||||
sessionId?: string | null
|
||||
onNext: (sessionId: string) => void
|
||||
}
|
||||
|
||||
export function StepOrientation({ sessionId: existingSessionId, onNext }: StepOrientationProps) {
|
||||
const [session, setSession] = useState<SessionInfo | null>(null)
|
||||
const [orientationResult, setOrientationResult] = useState<OrientationResult | null>(null)
|
||||
const [uploading, setUploading] = useState(false)
|
||||
const [detecting, setDetecting] = useState(false)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [dragOver, setDragOver] = useState(false)
|
||||
const [sessionName, setSessionName] = useState('')
|
||||
|
||||
// Reload session data when navigating back
|
||||
useEffect(() => {
|
||||
if (!existingSessionId || session) return
|
||||
|
||||
const loadSession = async () => {
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${existingSessionId}`)
|
||||
if (!res.ok) return
|
||||
const data = await res.json()
|
||||
|
||||
const sessionInfo: SessionInfo = {
|
||||
session_id: data.session_id,
|
||||
filename: data.filename,
|
||||
image_width: data.image_width,
|
||||
image_height: data.image_height,
|
||||
original_image_url: `${KLAUSUR_API}${data.original_image_url}`,
|
||||
}
|
||||
setSession(sessionInfo)
|
||||
|
||||
if (data.orientation_result) {
|
||||
setOrientationResult(data.orientation_result)
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to reload session:', e)
|
||||
}
|
||||
}
|
||||
|
||||
loadSession()
|
||||
}, [existingSessionId, session])
|
||||
|
||||
const handleUpload = useCallback(async (file: File) => {
|
||||
setUploading(true)
|
||||
setError(null)
|
||||
setOrientationResult(null)
|
||||
|
||||
try {
|
||||
const formData = new FormData()
|
||||
formData.append('file', file)
|
||||
if (sessionName.trim()) {
|
||||
formData.append('name', sessionName.trim())
|
||||
}
|
||||
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`, {
|
||||
method: 'POST',
|
||||
body: formData,
|
||||
})
|
||||
|
||||
if (!res.ok) {
|
||||
const err = await res.json().catch(() => ({ detail: res.statusText }))
|
||||
throw new Error(err.detail || 'Upload fehlgeschlagen')
|
||||
}
|
||||
|
||||
const data: SessionInfo = await res.json()
|
||||
data.original_image_url = `${KLAUSUR_API}${data.original_image_url}`
|
||||
setSession(data)
|
||||
|
||||
// Auto-trigger orientation detection
|
||||
setDetecting(true)
|
||||
const orientRes = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${data.session_id}/orientation`, {
|
||||
method: 'POST',
|
||||
})
|
||||
|
||||
if (!orientRes.ok) {
|
||||
throw new Error('Orientierungserkennung fehlgeschlagen')
|
||||
}
|
||||
|
||||
const orientData = await orientRes.json()
|
||||
setOrientationResult({
|
||||
orientation_degrees: orientData.orientation_degrees,
|
||||
corrected: orientData.corrected,
|
||||
duration_seconds: orientData.duration_seconds,
|
||||
})
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||
} finally {
|
||||
setUploading(false)
|
||||
setDetecting(false)
|
||||
}
|
||||
}, [sessionName])
|
||||
|
||||
const handleDrop = useCallback((e: React.DragEvent) => {
|
||||
e.preventDefault()
|
||||
setDragOver(false)
|
||||
const file = e.dataTransfer.files[0]
|
||||
if (file) handleUpload(file)
|
||||
}, [handleUpload])
|
||||
|
||||
const handleFileInput = useCallback((e: React.ChangeEvent<HTMLInputElement>) => {
|
||||
const file = e.target.files?.[0]
|
||||
if (file) handleUpload(file)
|
||||
}, [handleUpload])
|
||||
|
||||
// Upload area (no session yet)
|
||||
if (!session) {
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Session name input */}
|
||||
<div>
|
||||
<label className="block text-sm font-medium text-gray-600 dark:text-gray-400 mb-1">
|
||||
Session-Name (optional)
|
||||
</label>
|
||||
<input
|
||||
type="text"
|
||||
value={sessionName}
|
||||
onChange={(e) => setSessionName(e.target.value)}
|
||||
placeholder="z.B. Unit 3 Seite 42"
|
||||
className="w-full max-w-sm px-3 py-2 text-sm border rounded-lg dark:bg-gray-800 dark:border-gray-600 dark:text-gray-200 focus:outline-none focus:ring-2 focus:ring-teal-500"
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div
|
||||
onDragOver={(e) => { e.preventDefault(); setDragOver(true) }}
|
||||
onDragLeave={() => setDragOver(false)}
|
||||
onDrop={handleDrop}
|
||||
className={`border-2 border-dashed rounded-xl p-12 text-center transition-colors ${
|
||||
dragOver
|
||||
? 'border-teal-400 bg-teal-50 dark:bg-teal-900/20'
|
||||
: 'border-gray-300 dark:border-gray-600 hover:border-teal-400'
|
||||
}`}
|
||||
>
|
||||
{uploading ? (
|
||||
<div className="text-gray-500">
|
||||
<div className="animate-spin inline-block w-8 h-8 border-2 border-teal-500 border-t-transparent rounded-full mb-3" />
|
||||
<p>Wird hochgeladen...</p>
|
||||
</div>
|
||||
) : (
|
||||
<>
|
||||
<div className="text-4xl mb-3">📄</div>
|
||||
<p className="text-gray-600 dark:text-gray-400 mb-2">
|
||||
PDF oder Bild hierher ziehen
|
||||
</p>
|
||||
<p className="text-sm text-gray-400 mb-4">oder</p>
|
||||
<label className="inline-block px-4 py-2 bg-teal-600 text-white rounded-lg cursor-pointer hover:bg-teal-700 transition-colors">
|
||||
Datei auswaehlen
|
||||
<input
|
||||
type="file"
|
||||
accept=".pdf,.png,.jpg,.jpeg,.tiff,.tif"
|
||||
onChange={handleFileInput}
|
||||
className="hidden"
|
||||
/>
|
||||
</label>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
{error && (
|
||||
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// Session active: show orientation result
|
||||
const orientedUrl = orientationResult
|
||||
? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${session.session_id}/image/oriented`
|
||||
: null
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Filename */}
|
||||
<div className="text-sm text-gray-500 dark:text-gray-400">
|
||||
Datei: <span className="font-medium text-gray-700 dark:text-gray-300">{session.filename}</span>
|
||||
{' '}({session.image_width} x {session.image_height} px)
|
||||
</div>
|
||||
|
||||
{/* Loading indicator */}
|
||||
{detecting && (
|
||||
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
|
||||
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
|
||||
Orientierung wird erkannt...
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Image comparison */}
|
||||
<ImageCompareView
|
||||
originalUrl={session.original_image_url}
|
||||
deskewedUrl={orientedUrl}
|
||||
showGrid={false}
|
||||
showBinarized={false}
|
||||
binarizedUrl={null}
|
||||
leftLabel="Original"
|
||||
rightLabel="Orientiert"
|
||||
/>
|
||||
|
||||
{/* Orientation result badge */}
|
||||
{orientationResult && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
||||
<div className="flex items-center gap-3 text-sm">
|
||||
{orientationResult.corrected ? (
|
||||
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-amber-50 dark:bg-amber-900/20 text-amber-700 dark:text-amber-400 text-xs font-medium">
|
||||
🔄 {orientationResult.orientation_degrees}° korrigiert
|
||||
</span>
|
||||
) : (
|
||||
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-green-50 dark:bg-green-900/20 text-green-700 dark:text-green-400 text-xs font-medium">
|
||||
✓ 0° (keine Drehung noetig)
|
||||
</span>
|
||||
)}
|
||||
<span className="text-gray-400 text-xs">
|
||||
{orientationResult.duration_seconds}s
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Next button */}
|
||||
{orientationResult && (
|
||||
<div className="flex justify-end">
|
||||
<button
|
||||
onClick={() => onNext(session.session_id)}
|
||||
className="px-6 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium transition-colors"
|
||||
>
|
||||
Weiter →
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{error && (
|
||||
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,263 +0,0 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useState } from 'react'
|
||||
import type { RowResult, RowGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
interface StepRowDetectionProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
}
|
||||
|
||||
export function StepRowDetection({ sessionId, onNext }: StepRowDetectionProps) {
|
||||
const [rowResult, setRowResult] = useState<RowResult | null>(null)
|
||||
const [detecting, setDetecting] = useState(false)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [gtNotes, setGtNotes] = useState('')
|
||||
const [gtSaved, setGtSaved] = useState(false)
|
||||
|
||||
useEffect(() => {
|
||||
if (!sessionId) return
|
||||
|
||||
const fetchSession = async () => {
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
|
||||
if (res.ok) {
|
||||
const info = await res.json()
|
||||
if (info.row_result) {
|
||||
setRowResult(info.row_result)
|
||||
return
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to fetch session info:', e)
|
||||
}
|
||||
// No cached result — run auto
|
||||
runAutoDetection()
|
||||
}
|
||||
|
||||
fetchSession()
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [sessionId])
|
||||
|
||||
const runAutoDetection = useCallback(async () => {
|
||||
if (!sessionId) return
|
||||
setDetecting(true)
|
||||
setError(null)
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/rows`, {
|
||||
method: 'POST',
|
||||
})
|
||||
if (!res.ok) {
|
||||
const err = await res.json().catch(() => ({ detail: res.statusText }))
|
||||
throw new Error(err.detail || 'Zeilenerkennung fehlgeschlagen')
|
||||
}
|
||||
const data: RowResult = await res.json()
|
||||
setRowResult(data)
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||
} finally {
|
||||
setDetecting(false)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
const handleGroundTruth = useCallback(async (isCorrect: boolean) => {
|
||||
if (!sessionId) return
|
||||
const gt: RowGroundTruth = {
|
||||
is_correct: isCorrect,
|
||||
notes: gtNotes || undefined,
|
||||
}
|
||||
try {
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/ground-truth/rows`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(gt),
|
||||
})
|
||||
setGtSaved(true)
|
||||
} catch (e) {
|
||||
console.error('Ground truth save failed:', e)
|
||||
}
|
||||
}, [sessionId, gtNotes])
|
||||
|
||||
if (!sessionId) {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||
<div className="text-5xl mb-4">📏</div>
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||
Schritt 4: Zeilenerkennung
|
||||
</h3>
|
||||
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||
Bitte zuerst Schritte 1-3 abschliessen.
|
||||
</p>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const overlayUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/rows-overlay`
|
||||
const dewarpedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
|
||||
|
||||
const rowTypeColors: Record<string, string> = {
|
||||
header: 'bg-gray-200 dark:bg-gray-600 text-gray-700 dark:text-gray-300',
|
||||
content: 'bg-blue-100 dark:bg-blue-900/30 text-blue-700 dark:text-blue-300',
|
||||
footer: 'bg-gray-200 dark:bg-gray-600 text-gray-700 dark:text-gray-300',
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Loading */}
|
||||
{detecting && (
|
||||
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
|
||||
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
|
||||
Zeilenerkennung laeuft...
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Images: overlay vs clean */}
|
||||
<div className="grid grid-cols-2 gap-4">
|
||||
<div>
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Mit Zeilen-Overlay
|
||||
</div>
|
||||
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900">
|
||||
{rowResult ? (
|
||||
// eslint-disable-next-line @next/next/no-img-element
|
||||
<img
|
||||
src={`${overlayUrl}?t=${Date.now()}`}
|
||||
alt="Zeilen-Overlay"
|
||||
className="w-full h-auto"
|
||||
/>
|
||||
) : (
|
||||
<div className="aspect-[3/4] flex items-center justify-center text-gray-400 text-sm">
|
||||
{detecting ? 'Erkenne Zeilen...' : 'Keine Daten'}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
<div>
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Entzerrtes Bild
|
||||
</div>
|
||||
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900">
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
<img
|
||||
src={dewarpedUrl}
|
||||
alt="Entzerrt"
|
||||
className="w-full h-auto"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Row summary */}
|
||||
{rowResult && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-3">
|
||||
<div className="flex items-center justify-between">
|
||||
<h4 className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||
Ergebnis: {rowResult.total_rows} Zeilen erkannt
|
||||
</h4>
|
||||
<span className="text-xs text-gray-400">
|
||||
{rowResult.duration_seconds}s
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{/* Type summary badges */}
|
||||
<div className="flex gap-2">
|
||||
{Object.entries(rowResult.summary).map(([type, count]) => (
|
||||
<span
|
||||
key={type}
|
||||
className={`px-2 py-0.5 rounded text-xs font-medium ${rowTypeColors[type] || 'bg-gray-100 text-gray-600'}`}
|
||||
>
|
||||
{type}: {count}
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
|
||||
{/* Row list */}
|
||||
<div className="max-h-64 overflow-y-auto space-y-1">
|
||||
{rowResult.rows.map((row) => (
|
||||
<div
|
||||
key={row.index}
|
||||
className={`flex items-center gap-3 px-3 py-1.5 rounded text-xs font-mono ${
|
||||
row.row_type === 'header' || row.row_type === 'footer'
|
||||
? 'bg-gray-50 dark:bg-gray-700/50 text-gray-500'
|
||||
: 'text-gray-600 dark:text-gray-400'
|
||||
}`}
|
||||
>
|
||||
<span className="w-8 text-right text-gray-400">R{row.index}</span>
|
||||
<span className={`px-1.5 py-0.5 rounded text-[10px] uppercase font-semibold ${rowTypeColors[row.row_type] || ''}`}>
|
||||
{row.row_type}
|
||||
</span>
|
||||
<span>y={row.y}</span>
|
||||
<span>h={row.height}px</span>
|
||||
<span>{row.word_count} Woerter</span>
|
||||
{row.gap_before > 0 && (
|
||||
<span className="text-gray-400">gap={row.gap_before}px</span>
|
||||
)}
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Controls */}
|
||||
{rowResult && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-3">
|
||||
<div className="flex items-center gap-3">
|
||||
<button
|
||||
onClick={() => runAutoDetection()}
|
||||
disabled={detecting}
|
||||
className="px-3 py-1.5 text-xs border rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 dark:border-gray-600 disabled:opacity-50"
|
||||
>
|
||||
Erneut erkennen
|
||||
</button>
|
||||
|
||||
<div className="flex-1" />
|
||||
|
||||
{/* Ground truth */}
|
||||
{!gtSaved ? (
|
||||
<>
|
||||
<input
|
||||
type="text"
|
||||
placeholder="Notizen (optional)"
|
||||
value={gtNotes}
|
||||
onChange={(e) => setGtNotes(e.target.value)}
|
||||
className="px-2 py-1 text-xs border rounded dark:bg-gray-700 dark:border-gray-600 w-48"
|
||||
/>
|
||||
<button
|
||||
onClick={() => handleGroundTruth(true)}
|
||||
className="px-3 py-1.5 text-xs bg-green-600 text-white rounded-lg hover:bg-green-700"
|
||||
>
|
||||
Korrekt
|
||||
</button>
|
||||
<button
|
||||
onClick={() => handleGroundTruth(false)}
|
||||
className="px-3 py-1.5 text-xs bg-red-600 text-white rounded-lg hover:bg-red-700"
|
||||
>
|
||||
Fehlerhaft
|
||||
</button>
|
||||
</>
|
||||
) : (
|
||||
<span className="text-xs text-green-600 dark:text-green-400">
|
||||
Ground Truth gespeichert
|
||||
</span>
|
||||
)}
|
||||
|
||||
<button
|
||||
onClick={onNext}
|
||||
className="px-4 py-1.5 text-xs bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium"
|
||||
>
|
||||
Weiter
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{error && (
|
||||
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -1,339 +0,0 @@
|
||||
'use client'
|
||||
|
||||
import { useEffect, useState } from 'react'
|
||||
import type { StructureResult } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
interface StepStructureDetectionProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
}
|
||||
|
||||
const COLOR_HEX: Record<string, string> = {
|
||||
red: '#dc2626',
|
||||
orange: '#ea580c',
|
||||
yellow: '#ca8a04',
|
||||
green: '#16a34a',
|
||||
blue: '#2563eb',
|
||||
purple: '#9333ea',
|
||||
}
|
||||
|
||||
export function StepStructureDetection({ sessionId, onNext }: StepStructureDetectionProps) {
|
||||
const [result, setResult] = useState<StructureResult | null>(null)
|
||||
const [detecting, setDetecting] = useState(false)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [hasRun, setHasRun] = useState(false)
|
||||
const [overlayTs, setOverlayTs] = useState(0)
|
||||
|
||||
// Auto-trigger detection on mount
|
||||
useEffect(() => {
|
||||
if (!sessionId || hasRun) return
|
||||
setHasRun(true)
|
||||
|
||||
const runDetection = async () => {
|
||||
setDetecting(true)
|
||||
setError(null)
|
||||
|
||||
try {
|
||||
// Check if session already has structure result
|
||||
const sessionRes = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
|
||||
if (sessionRes.ok) {
|
||||
const sessionData = await sessionRes.json()
|
||||
if (sessionData.structure_result) {
|
||||
setResult(sessionData.structure_result)
|
||||
setOverlayTs(Date.now())
|
||||
setDetecting(false)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/detect-structure`, {
|
||||
method: 'POST',
|
||||
})
|
||||
|
||||
if (!res.ok) {
|
||||
throw new Error('Strukturerkennung fehlgeschlagen')
|
||||
}
|
||||
|
||||
const data = await res.json()
|
||||
setResult(data)
|
||||
setOverlayTs(Date.now())
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||
} finally {
|
||||
setDetecting(false)
|
||||
}
|
||||
}
|
||||
|
||||
runDetection()
|
||||
}, [sessionId, hasRun])
|
||||
|
||||
const handleRerun = async () => {
|
||||
if (!sessionId) return
|
||||
setDetecting(true)
|
||||
setError(null)
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/detect-structure`, {
|
||||
method: 'POST',
|
||||
})
|
||||
if (!res.ok) throw new Error('Erneute Erkennung fehlgeschlagen')
|
||||
const data = await res.json()
|
||||
setResult(data)
|
||||
setOverlayTs(Date.now())
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||
} finally {
|
||||
setDetecting(false)
|
||||
}
|
||||
}
|
||||
|
||||
if (!sessionId) {
|
||||
return <div className="text-sm text-gray-400">Keine Session ausgewaehlt.</div>
|
||||
}
|
||||
|
||||
const croppedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
|
||||
const overlayUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/structure-overlay${overlayTs ? `?t=${overlayTs}` : ''}`
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Loading indicator */}
|
||||
{detecting && (
|
||||
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
|
||||
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
|
||||
Dokumentstruktur wird analysiert...
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Two-column image comparison */}
|
||||
<div className="grid grid-cols-1 lg:grid-cols-2 gap-4">
|
||||
{/* Left: Original document */}
|
||||
<div className="space-y-2">
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
|
||||
Original
|
||||
</div>
|
||||
<div className="relative bg-gray-100 dark:bg-gray-800 rounded-lg overflow-hidden" style={{ aspectRatio: '210/297' }}>
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
<img
|
||||
src={croppedUrl}
|
||||
alt="Originaldokument"
|
||||
className="w-full h-full object-contain"
|
||||
onError={(e) => {
|
||||
(e.target as HTMLImageElement).style.display = 'none'
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Right: Structure overlay */}
|
||||
<div className="space-y-2">
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
|
||||
Erkannte Struktur
|
||||
</div>
|
||||
<div className="relative bg-gray-100 dark:bg-gray-800 rounded-lg overflow-hidden" style={{ aspectRatio: '210/297' }}>
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
<img
|
||||
src={overlayUrl}
|
||||
alt="Strukturerkennung"
|
||||
className="w-full h-full object-contain"
|
||||
onError={(e) => {
|
||||
(e.target as HTMLImageElement).style.display = 'none'
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Result info */}
|
||||
{result && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4 space-y-3">
|
||||
{/* Summary badges */}
|
||||
<div className="flex flex-wrap items-center gap-3 text-sm">
|
||||
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-teal-50 dark:bg-teal-900/20 text-teal-700 dark:text-teal-400 text-xs font-medium">
|
||||
{result.zones.length} Zone(n)
|
||||
</span>
|
||||
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-amber-50 dark:bg-amber-900/20 text-amber-700 dark:text-amber-400 text-xs font-medium">
|
||||
{result.boxes.length} Box(en)
|
||||
</span>
|
||||
{result.graphics && result.graphics.length > 0 && (
|
||||
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-purple-50 dark:bg-purple-900/20 text-purple-700 dark:text-purple-400 text-xs font-medium">
|
||||
{result.graphics.length} Grafik(en)
|
||||
</span>
|
||||
)}
|
||||
{result.has_words && (
|
||||
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-blue-50 dark:bg-blue-900/20 text-blue-700 dark:text-blue-400 text-xs font-medium">
|
||||
{result.word_count} Woerter
|
||||
</span>
|
||||
)}
|
||||
{(result.border_ghosts_removed ?? 0) > 0 && (
|
||||
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-red-50 dark:bg-red-900/20 text-red-700 dark:text-red-400 text-xs font-medium">
|
||||
{result.border_ghosts_removed} Rahmenlinien entfernt
|
||||
</span>
|
||||
)}
|
||||
<span className="text-gray-400 text-xs ml-auto">
|
||||
{result.image_width}x{result.image_height}px | {result.duration_seconds}s
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{/* Boxes detail */}
|
||||
{result.boxes.length > 0 && (
|
||||
<div>
|
||||
<h4 className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-2">Erkannte Boxen</h4>
|
||||
<div className="space-y-1.5">
|
||||
{result.boxes.map((box, i) => (
|
||||
<div key={i} className="flex items-center gap-3 text-xs">
|
||||
<span
|
||||
className="w-3 h-3 rounded-sm flex-shrink-0 border border-gray-300 dark:border-gray-600"
|
||||
style={{ backgroundColor: box.bg_color_hex || '#6b7280' }}
|
||||
/>
|
||||
<span className="text-gray-600 dark:text-gray-400">
|
||||
Box {i + 1}:
|
||||
</span>
|
||||
<span className="font-mono text-gray-500">
|
||||
{box.w}x{box.h}px @ ({box.x}, {box.y})
|
||||
</span>
|
||||
{box.bg_color_name && box.bg_color_name !== 'unknown' && box.bg_color_name !== 'white' && (
|
||||
<span className="px-1.5 py-0.5 rounded bg-gray-100 dark:bg-gray-700 text-gray-500">
|
||||
{box.bg_color_name}
|
||||
</span>
|
||||
)}
|
||||
{box.border_thickness > 0 && (
|
||||
<span className="text-gray-400">
|
||||
Rahmen: {box.border_thickness}px
|
||||
</span>
|
||||
)}
|
||||
<span className="text-gray-400">
|
||||
{Math.round(box.confidence * 100)}%
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Zones detail */}
|
||||
<div>
|
||||
<h4 className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-2">Seitenzonen</h4>
|
||||
<div className="flex flex-wrap gap-2">
|
||||
{result.zones.map((zone) => (
|
||||
<span
|
||||
key={zone.index}
|
||||
className={`inline-flex items-center gap-1 px-2 py-1 rounded text-[11px] font-medium ${
|
||||
zone.zone_type === 'box'
|
||||
? 'bg-amber-50 dark:bg-amber-900/20 text-amber-700 dark:text-amber-300 border border-amber-200 dark:border-amber-800'
|
||||
: 'bg-gray-50 dark:bg-gray-800 text-gray-500 dark:text-gray-400 border border-gray-200 dark:border-gray-700'
|
||||
}`}
|
||||
>
|
||||
{zone.zone_type === 'box' ? 'Box' : 'Inhalt'} {zone.index}
|
||||
<span className="text-[10px] font-normal opacity-70">
|
||||
({zone.w}x{zone.h})
|
||||
</span>
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Graphics / visual elements */}
|
||||
{result.graphics && result.graphics.length > 0 && (
|
||||
<div>
|
||||
<h4 className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-2">
|
||||
Graphische Elemente ({result.graphics.length})
|
||||
</h4>
|
||||
{/* Summary by shape */}
|
||||
{(() => {
|
||||
const shapeCounts: Record<string, number> = {}
|
||||
for (const g of result.graphics) {
|
||||
shapeCounts[g.shape] = (shapeCounts[g.shape] || 0) + 1
|
||||
}
|
||||
return (
|
||||
<div className="flex flex-wrap gap-2 mb-2">
|
||||
{Object.entries(shapeCounts)
|
||||
.sort(([, a], [, b]) => b - a)
|
||||
.map(([shape, count]) => (
|
||||
<span
|
||||
key={shape}
|
||||
className="inline-flex items-center gap-1 px-2 py-1 rounded text-[11px] bg-purple-50 dark:bg-purple-900/20 text-purple-700 dark:text-purple-300 border border-purple-200 dark:border-purple-800"
|
||||
>
|
||||
{shape === 'arrow' ? '→' : shape === 'circle' ? '●' : shape === 'line' ? '─' : shape === 'exclamation' ? '❗' : shape === 'dot' ? '•' : shape === 'illustration' ? '🖼' : '◆'}
|
||||
{' '}{shape} <span className="font-semibold">×{count}</span>
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
)
|
||||
})()}
|
||||
{/* Individual graphics list */}
|
||||
<div className="space-y-1.5 max-h-40 overflow-y-auto">
|
||||
{result.graphics.map((g, i) => (
|
||||
<div key={i} className="flex items-center gap-3 text-xs">
|
||||
<span
|
||||
className="w-3 h-3 rounded-full flex-shrink-0 border border-gray-300 dark:border-gray-600"
|
||||
style={{ backgroundColor: g.color_hex || '#6b7280' }}
|
||||
/>
|
||||
<span className="text-gray-600 dark:text-gray-400 font-medium min-w-[60px]">
|
||||
{g.shape}
|
||||
</span>
|
||||
<span className="font-mono text-gray-500">
|
||||
{g.w}x{g.h}px @ ({g.x}, {g.y})
|
||||
</span>
|
||||
<span className="text-gray-400">
|
||||
{g.color_name}
|
||||
</span>
|
||||
<span className="text-gray-400">
|
||||
{Math.round(g.confidence * 100)}%
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Color regions */}
|
||||
{Object.keys(result.color_pixel_counts).length > 0 && (
|
||||
<div>
|
||||
<h4 className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-2">Erkannte Farben</h4>
|
||||
<div className="flex flex-wrap gap-2">
|
||||
{Object.entries(result.color_pixel_counts)
|
||||
.sort(([, a], [, b]) => b - a)
|
||||
.map(([name, count]) => (
|
||||
<span key={name} className="inline-flex items-center gap-1.5 px-2 py-1 rounded text-[11px] bg-gray-50 dark:bg-gray-800 border border-gray-200 dark:border-gray-700">
|
||||
<span
|
||||
className="w-2.5 h-2.5 rounded-full"
|
||||
style={{ backgroundColor: COLOR_HEX[name] || '#6b7280' }}
|
||||
/>
|
||||
<span className="text-gray-600 dark:text-gray-400">{name}</span>
|
||||
<span className="text-gray-400 text-[10px]">{count.toLocaleString()}px</span>
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Action buttons */}
|
||||
{result && (
|
||||
<div className="flex justify-between">
|
||||
<button
|
||||
onClick={handleRerun}
|
||||
disabled={detecting}
|
||||
className="px-4 py-2 text-sm text-gray-500 hover:text-gray-700 dark:text-gray-400 dark:hover:text-gray-200 transition-colors disabled:opacity-50"
|
||||
>
|
||||
Erneut erkennen
|
||||
</button>
|
||||
<button
|
||||
onClick={onNext}
|
||||
className="px-6 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium transition-colors"
|
||||
>
|
||||
Weiter →
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{error && (
|
||||
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -1,936 +0,0 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useRef, useState } from 'react'
|
||||
import type { GridResult, GridCell, WordEntry, WordGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
/** Render text with \n as line breaks */
|
||||
function MultilineText({ text }: { text: string }) {
|
||||
if (!text) return <span className="text-gray-300 dark:text-gray-600">—</span>
|
||||
const lines = text.split('\n')
|
||||
if (lines.length === 1) return <>{text}</>
|
||||
return <>{lines.map((line, i) => (
|
||||
<span key={i}>{line}{i < lines.length - 1 && <br />}</span>
|
||||
))}</>
|
||||
}
|
||||
|
||||
/** Column type → human-readable header */
|
||||
function colTypeLabel(colType: string): string {
|
||||
const labels: Record<string, string> = {
|
||||
column_en: 'English',
|
||||
column_de: 'Deutsch',
|
||||
column_example: 'Example',
|
||||
column_text: 'Text',
|
||||
column_marker: 'Marker',
|
||||
page_ref: 'Seite',
|
||||
}
|
||||
return labels[colType] || colType.replace('column_', '')
|
||||
}
|
||||
|
||||
/** Column type → color class */
|
||||
function colTypeColor(colType: string): string {
|
||||
const colors: Record<string, string> = {
|
||||
column_en: 'text-blue-600 dark:text-blue-400',
|
||||
column_de: 'text-green-600 dark:text-green-400',
|
||||
column_example: 'text-orange-600 dark:text-orange-400',
|
||||
column_text: 'text-purple-600 dark:text-purple-400',
|
||||
column_marker: 'text-gray-500 dark:text-gray-400',
|
||||
}
|
||||
return colors[colType] || 'text-gray-600 dark:text-gray-400'
|
||||
}
|
||||
|
||||
interface StepWordRecognitionProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
goToStep: (step: number) => void
|
||||
/** Skip _heal_row_gaps in cell grid (better overlay positioning) */
|
||||
skipHealGaps?: boolean
|
||||
}
|
||||
|
||||
export function StepWordRecognition({ sessionId, onNext, goToStep, skipHealGaps = false }: StepWordRecognitionProps) {
|
||||
const [gridResult, setGridResult] = useState<GridResult | null>(null)
|
||||
const [detecting, setDetecting] = useState(false)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [gtNotes, setGtNotes] = useState('')
|
||||
const [gtSaved, setGtSaved] = useState(false)
|
||||
|
||||
// Step-through labeling state
|
||||
const [activeIndex, setActiveIndex] = useState(0)
|
||||
const [editedEntries, setEditedEntries] = useState<WordEntry[]>([])
|
||||
const [editedCells, setEditedCells] = useState<GridCell[]>([])
|
||||
const [mode, setMode] = useState<'overview' | 'labeling'>('overview')
|
||||
const [ocrEngine, setOcrEngine] = useState<'auto' | 'tesseract' | 'rapid' | 'paddle'>('auto')
|
||||
const [usedEngine, setUsedEngine] = useState<string>('')
|
||||
const [pronunciation, setPronunciation] = useState<'british' | 'american'>('british')
|
||||
const [gridMethod, setGridMethod] = useState<'v2' | 'words_first'>('v2')
|
||||
|
||||
// Streaming progress state
|
||||
const [streamProgress, setStreamProgress] = useState<{ current: number; total: number } | null>(null)
|
||||
|
||||
const enRef = useRef<HTMLInputElement>(null)
|
||||
const tableEndRef = useRef<HTMLDivElement>(null)
|
||||
|
||||
const isVocab = gridResult?.layout === 'vocab'
|
||||
|
||||
useEffect(() => {
|
||||
if (!sessionId) return
|
||||
// Always run fresh detection — word-lookup is fast (~0.03s)
|
||||
// and avoids stale cached results from previous pipeline versions.
|
||||
runAutoDetection()
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [sessionId])
|
||||
|
||||
const applyGridResult = (data: GridResult) => {
|
||||
setGridResult(data)
|
||||
setUsedEngine(data.ocr_engine || '')
|
||||
if (data.layout === 'vocab' && data.entries) {
|
||||
initEntries(data.entries)
|
||||
}
|
||||
if (data.cells) {
|
||||
setEditedCells(data.cells.map(c => ({ ...c, status: c.status || 'pending' })))
|
||||
}
|
||||
}
|
||||
|
||||
const initEntries = (entries: WordEntry[]) => {
|
||||
setEditedEntries(entries.map(e => ({ ...e, status: e.status || 'pending' })))
|
||||
setActiveIndex(0)
|
||||
}
|
||||
|
||||
const runAutoDetection = useCallback(async (engine?: string) => {
|
||||
if (!sessionId) return
|
||||
const eng = engine || ocrEngine
|
||||
setDetecting(true)
|
||||
setError(null)
|
||||
setStreamProgress(null)
|
||||
setEditedCells([])
|
||||
setEditedEntries([])
|
||||
setGridResult(null)
|
||||
|
||||
try {
|
||||
// PP-OCRv5 forces words_first on the backend, so align frontend accordingly
|
||||
const effectiveGridMethod = eng === 'paddle' ? 'words_first' : gridMethod
|
||||
const useStream = effectiveGridMethod === 'v2'
|
||||
|
||||
// Retry once if initial request fails (e.g. after container restart,
|
||||
// session cache may not be warm yet when navigating via wizard)
|
||||
let res: Response | null = null
|
||||
for (let attempt = 0; attempt < 2; attempt++) {
|
||||
res = await fetch(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/words?stream=${useStream ? 'true' : 'false'}&engine=${eng}&pronunciation=${pronunciation}${skipHealGaps ? '&skip_heal_gaps=true' : ''}&grid_method=${effectiveGridMethod}`,
|
||||
{ method: 'POST' },
|
||||
)
|
||||
if (res.ok) break
|
||||
if (attempt === 0 && (res.status === 400 || res.status === 404)) {
|
||||
// Wait briefly for cache to warm up, then retry
|
||||
await new Promise(r => setTimeout(r, 2000))
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
if (!res || !res.ok) {
|
||||
const err = await res?.json().catch(() => ({ detail: res?.statusText })) || { detail: 'Worterkennung fehlgeschlagen' }
|
||||
throw new Error(err.detail || 'Worterkennung fehlgeschlagen')
|
||||
}
|
||||
|
||||
// words_first / pp-ocrv5 returns plain JSON (no streaming)
|
||||
if (!useStream) {
|
||||
const data = await res.json() as GridResult
|
||||
applyGridResult(data)
|
||||
return
|
||||
}
|
||||
|
||||
const reader = res.body!.getReader()
|
||||
const decoder = new TextDecoder()
|
||||
let buffer = ''
|
||||
let streamLayout: string | null = null
|
||||
let streamColumnsUsed: GridResult['columns_used'] = []
|
||||
let streamGridShape: GridResult['grid_shape'] | null = null
|
||||
let streamCells: GridCell[] = []
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read()
|
||||
if (done) break
|
||||
buffer += decoder.decode(value, { stream: true })
|
||||
|
||||
// Parse SSE events (separated by \n\n)
|
||||
while (buffer.includes('\n\n')) {
|
||||
const idx = buffer.indexOf('\n\n')
|
||||
const chunk = buffer.slice(0, idx).trim()
|
||||
buffer = buffer.slice(idx + 2)
|
||||
|
||||
if (!chunk.startsWith('data: ')) continue
|
||||
const dataStr = chunk.slice(6) // strip "data: "
|
||||
|
||||
let event: any
|
||||
try {
|
||||
event = JSON.parse(dataStr)
|
||||
} catch {
|
||||
continue
|
||||
}
|
||||
|
||||
if (event.type === 'meta') {
|
||||
streamLayout = event.layout || 'generic'
|
||||
streamGridShape = event.grid_shape || null
|
||||
// Show partial grid result so UI renders structure
|
||||
setGridResult(prev => ({
|
||||
...prev,
|
||||
layout: event.layout || 'generic',
|
||||
grid_shape: event.grid_shape,
|
||||
columns_used: [],
|
||||
cells: [],
|
||||
summary: { total_cells: event.grid_shape?.total_cells || 0, non_empty_cells: 0, low_confidence: 0 },
|
||||
duration_seconds: 0,
|
||||
ocr_engine: '',
|
||||
} as GridResult))
|
||||
}
|
||||
|
||||
if (event.type === 'columns') {
|
||||
streamColumnsUsed = event.columns_used || []
|
||||
setGridResult(prev => prev ? { ...prev, columns_used: streamColumnsUsed } : prev)
|
||||
}
|
||||
|
||||
if (event.type === 'cell') {
|
||||
const cell: GridCell = { ...event.cell, status: 'pending' }
|
||||
streamCells = [...streamCells, cell]
|
||||
setEditedCells(streamCells)
|
||||
setStreamProgress(event.progress)
|
||||
// Auto-scroll table to bottom
|
||||
setTimeout(() => tableEndRef.current?.scrollIntoView({ behavior: 'smooth', block: 'nearest' }), 16)
|
||||
}
|
||||
|
||||
if (event.type === 'complete') {
|
||||
// Build final GridResult
|
||||
const finalResult: GridResult = {
|
||||
cells: streamCells,
|
||||
grid_shape: streamGridShape || { rows: 0, cols: 0, total_cells: streamCells.length },
|
||||
columns_used: streamColumnsUsed,
|
||||
layout: streamLayout || 'generic',
|
||||
image_width: 0,
|
||||
image_height: 0,
|
||||
duration_seconds: event.duration_seconds || 0,
|
||||
ocr_engine: event.ocr_engine || '',
|
||||
summary: event.summary || {},
|
||||
}
|
||||
|
||||
// If vocab: apply post-processed entries from complete event
|
||||
if (event.vocab_entries) {
|
||||
finalResult.entries = event.vocab_entries
|
||||
finalResult.vocab_entries = event.vocab_entries
|
||||
finalResult.entry_count = event.vocab_entries.length
|
||||
}
|
||||
|
||||
applyGridResult(finalResult)
|
||||
setUsedEngine(event.ocr_engine || '')
|
||||
setStreamProgress(null)
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||
} finally {
|
||||
setDetecting(false)
|
||||
}
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [sessionId, ocrEngine, pronunciation, gridMethod])
|
||||
|
||||
const handleGroundTruth = useCallback(async (isCorrect: boolean) => {
|
||||
if (!sessionId) return
|
||||
const gt: WordGroundTruth = {
|
||||
is_correct: isCorrect,
|
||||
corrected_entries: isCorrect ? undefined : (isVocab ? editedEntries : undefined),
|
||||
notes: gtNotes || undefined,
|
||||
}
|
||||
try {
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/ground-truth/words`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(gt),
|
||||
})
|
||||
setGtSaved(true)
|
||||
} catch (e) {
|
||||
console.error('Ground truth save failed:', e)
|
||||
}
|
||||
}, [sessionId, gtNotes, editedEntries, isVocab])
|
||||
|
||||
// Vocab mode: update entry field
|
||||
const updateEntry = (index: number, field: 'english' | 'german' | 'example', value: string) => {
|
||||
setEditedEntries(prev => prev.map((e, i) =>
|
||||
i === index ? { ...e, [field]: value, status: 'edited' as const } : e
|
||||
))
|
||||
}
|
||||
|
||||
// Generic mode: update cell text
|
||||
const updateCell = (cellId: string, value: string) => {
|
||||
setEditedCells(prev => prev.map(c =>
|
||||
c.cell_id === cellId ? { ...c, text: value, status: 'edited' as const } : c
|
||||
))
|
||||
}
|
||||
|
||||
// Step-through: confirm current row (always cell-based)
|
||||
const confirmEntry = () => {
|
||||
const rowCells = getRowCells(activeIndex)
|
||||
const cellIds = new Set(rowCells.map(c => c.cell_id))
|
||||
setEditedCells(prev => prev.map(c =>
|
||||
cellIds.has(c.cell_id) ? { ...c, status: c.status === 'edited' ? 'edited' : 'confirmed' } : c
|
||||
))
|
||||
const maxIdx = getUniqueRowCount() - 1
|
||||
if (activeIndex < maxIdx) {
|
||||
setActiveIndex(activeIndex + 1)
|
||||
}
|
||||
}
|
||||
|
||||
// Step-through: skip current row
|
||||
const skipEntry = () => {
|
||||
const rowCells = getRowCells(activeIndex)
|
||||
const cellIds = new Set(rowCells.map(c => c.cell_id))
|
||||
setEditedCells(prev => prev.map(c =>
|
||||
cellIds.has(c.cell_id) ? { ...c, status: 'skipped' as const } : c
|
||||
))
|
||||
const maxIdx = getUniqueRowCount() - 1
|
||||
if (activeIndex < maxIdx) {
|
||||
setActiveIndex(activeIndex + 1)
|
||||
}
|
||||
}
|
||||
|
||||
// Helper: get unique row indices from cells
|
||||
const getUniqueRowCount = () => {
|
||||
if (!editedCells.length) return 0
|
||||
return new Set(editedCells.map(c => c.row_index)).size
|
||||
}
|
||||
|
||||
// Helper: get cells for a given row index (by position in sorted unique rows)
|
||||
const getRowCells = (rowPosition: number) => {
|
||||
const uniqueRows = [...new Set(editedCells.map(c => c.row_index))].sort((a, b) => a - b)
|
||||
const rowIdx = uniqueRows[rowPosition]
|
||||
return editedCells.filter(c => c.row_index === rowIdx)
|
||||
}
|
||||
|
||||
// Focus english input when active entry changes in labeling mode
|
||||
useEffect(() => {
|
||||
if (mode === 'labeling' && enRef.current) {
|
||||
enRef.current.focus()
|
||||
}
|
||||
}, [activeIndex, mode])
|
||||
|
||||
// Keyboard shortcuts in labeling mode
|
||||
useEffect(() => {
|
||||
if (mode !== 'labeling') return
|
||||
const handler = (e: KeyboardEvent) => {
|
||||
if (e.key === 'Enter' && !e.shiftKey) {
|
||||
e.preventDefault()
|
||||
confirmEntry()
|
||||
} else if (e.key === 'ArrowDown' && e.ctrlKey) {
|
||||
e.preventDefault()
|
||||
skipEntry()
|
||||
} else if (e.key === 'ArrowUp' && e.ctrlKey) {
|
||||
e.preventDefault()
|
||||
if (activeIndex > 0) setActiveIndex(activeIndex - 1)
|
||||
}
|
||||
}
|
||||
window.addEventListener('keydown', handler)
|
||||
return () => window.removeEventListener('keydown', handler)
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [mode, activeIndex, editedEntries, editedCells])
|
||||
|
||||
if (!sessionId) {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||
<div className="text-5xl mb-4">🔤</div>
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||
Schritt 5: Worterkennung
|
||||
</h3>
|
||||
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||
Bitte zuerst Schritte 1-4 abschliessen.
|
||||
</p>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const overlayUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/words-overlay`
|
||||
const dewarpedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
|
||||
|
||||
const confColor = (conf: number) => {
|
||||
if (conf >= 70) return 'text-green-600 dark:text-green-400'
|
||||
if (conf >= 50) return 'text-yellow-600 dark:text-yellow-400'
|
||||
return 'text-red-600 dark:text-red-400'
|
||||
}
|
||||
|
||||
const statusBadge = (status?: string) => {
|
||||
const map: Record<string, string> = {
|
||||
pending: 'bg-gray-100 dark:bg-gray-700 text-gray-500',
|
||||
confirmed: 'bg-green-100 dark:bg-green-900/30 text-green-700 dark:text-green-400',
|
||||
edited: 'bg-blue-100 dark:bg-blue-900/30 text-blue-700 dark:text-blue-400',
|
||||
skipped: 'bg-orange-100 dark:bg-orange-900/30 text-orange-700 dark:text-orange-400',
|
||||
}
|
||||
return map[status || 'pending'] || map.pending
|
||||
}
|
||||
|
||||
const summary = gridResult?.summary
|
||||
const columnsUsed = gridResult?.columns_used || []
|
||||
const gridShape = gridResult?.grid_shape
|
||||
|
||||
// Counts for labeling progress (always cell-based)
|
||||
const confirmedRowIds = new Set(
|
||||
editedCells.filter(c => c.status === 'confirmed' || c.status === 'edited').map(c => c.row_index)
|
||||
)
|
||||
const confirmedCount = confirmedRowIds.size
|
||||
const totalCount = getUniqueRowCount()
|
||||
|
||||
// Group cells by row for generic table display
|
||||
const cellsByRow: Map<number, GridCell[]> = new Map()
|
||||
for (const cell of editedCells) {
|
||||
const existing = cellsByRow.get(cell.row_index) || []
|
||||
existing.push(cell)
|
||||
cellsByRow.set(cell.row_index, existing)
|
||||
}
|
||||
const sortedRowIndices = [...cellsByRow.keys()].sort((a, b) => a - b)
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Loading with streaming progress */}
|
||||
{detecting && (
|
||||
<div className="space-y-1">
|
||||
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
|
||||
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
|
||||
{streamProgress
|
||||
? `Zelle ${streamProgress.current}/${streamProgress.total} erkannt...`
|
||||
: 'Worterkennung startet...'}
|
||||
</div>
|
||||
{streamProgress && streamProgress.total > 0 && (
|
||||
<div className="w-full bg-gray-200 dark:bg-gray-700 rounded-full h-1.5">
|
||||
<div
|
||||
className="bg-teal-500 h-1.5 rounded-full transition-all duration-150"
|
||||
style={{ width: `${(streamProgress.current / streamProgress.total) * 100}%` }}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Layout badge + Mode toggle */}
|
||||
{gridResult && (
|
||||
<div className="flex items-center gap-2">
|
||||
{/* Layout badge */}
|
||||
<span className={`px-2 py-0.5 rounded text-[10px] uppercase font-semibold ${
|
||||
isVocab
|
||||
? 'bg-indigo-100 dark:bg-indigo-900/30 text-indigo-700 dark:text-indigo-300'
|
||||
: 'bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-400'
|
||||
}`}>
|
||||
{isVocab ? 'Vokabel-Layout' : 'Generisch'}
|
||||
</span>
|
||||
|
||||
{gridShape && (
|
||||
<span className="text-[10px] text-gray-400">
|
||||
{gridShape.rows}×{gridShape.cols} = {gridShape.total_cells} Zellen
|
||||
</span>
|
||||
)}
|
||||
|
||||
<div className="flex-1" />
|
||||
|
||||
<button
|
||||
onClick={() => setMode('overview')}
|
||||
className={`px-3 py-1.5 text-xs rounded-lg font-medium transition-colors ${
|
||||
mode === 'overview'
|
||||
? 'bg-teal-600 text-white'
|
||||
: 'bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-300 hover:bg-gray-200 dark:hover:bg-gray-600'
|
||||
}`}
|
||||
>
|
||||
Uebersicht
|
||||
</button>
|
||||
<button
|
||||
onClick={() => setMode('labeling')}
|
||||
className={`px-3 py-1.5 text-xs rounded-lg font-medium transition-colors ${
|
||||
mode === 'labeling'
|
||||
? 'bg-teal-600 text-white'
|
||||
: 'bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-300 hover:bg-gray-200 dark:hover:bg-gray-600'
|
||||
}`}
|
||||
>
|
||||
Labeling ({confirmedCount}/{totalCount})
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Overview mode */}
|
||||
{mode === 'overview' && (
|
||||
<>
|
||||
{/* Images: overlay vs clean */}
|
||||
<div className="grid grid-cols-2 gap-4">
|
||||
<div>
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Mit Grid-Overlay
|
||||
</div>
|
||||
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900">
|
||||
{gridResult ? (
|
||||
// eslint-disable-next-line @next/next/no-img-element
|
||||
<img
|
||||
src={`${overlayUrl}?t=${Date.now()}`}
|
||||
alt="Wort-Overlay"
|
||||
className="w-full h-auto"
|
||||
/>
|
||||
) : (
|
||||
<div className="aspect-[3/4] flex items-center justify-center text-gray-400 text-sm">
|
||||
{detecting ? 'Erkenne Woerter...' : 'Keine Daten'}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
<div>
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Entzerrtes Bild
|
||||
</div>
|
||||
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900">
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
<img
|
||||
src={dewarpedUrl}
|
||||
alt="Entzerrt"
|
||||
className="w-full h-auto"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Result summary (only after streaming completes) */}
|
||||
{gridResult && summary && !detecting && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-3">
|
||||
<div className="flex items-center justify-between">
|
||||
<h4 className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||
Ergebnis: {summary.non_empty_cells}/{summary.total_cells} Zellen mit Text
|
||||
({sortedRowIndices.length} Zeilen, {columnsUsed.length} Spalten)
|
||||
</h4>
|
||||
<span className="text-xs text-gray-400">
|
||||
{gridResult.duration_seconds}s
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{/* Summary badges */}
|
||||
<div className="flex gap-2 flex-wrap">
|
||||
<span className="px-2 py-0.5 rounded text-xs font-medium bg-blue-100 dark:bg-blue-900/30 text-blue-700 dark:text-blue-300">
|
||||
Zellen: {summary.non_empty_cells}/{summary.total_cells}
|
||||
</span>
|
||||
{columnsUsed.map((col, i) => (
|
||||
<span key={i} className={`px-2 py-0.5 rounded text-xs font-medium bg-gray-100 dark:bg-gray-700 ${colTypeColor(col.type)}`}>
|
||||
C{col.index}: {colTypeLabel(col.type)}
|
||||
</span>
|
||||
))}
|
||||
{summary.low_confidence > 0 && (
|
||||
<span className="px-2 py-0.5 rounded text-xs font-medium bg-red-100 dark:bg-red-900/30 text-red-700 dark:text-red-300">
|
||||
Unsicher: {summary.low_confidence}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Entry/Cell table */}
|
||||
<div className="max-h-80 overflow-y-auto">
|
||||
{/* Unified dynamic table — columns driven by columns_used */}
|
||||
<table className="w-full text-xs">
|
||||
<thead className="sticky top-0 bg-white dark:bg-gray-800">
|
||||
<tr className="text-left text-gray-500 dark:text-gray-400 border-b dark:border-gray-700">
|
||||
<th className="py-1 pr-2 w-12">Zeile</th>
|
||||
{columnsUsed.map((col, i) => (
|
||||
<th key={i} className={`py-1 pr-2 ${colTypeColor(col.type)}`}>
|
||||
{colTypeLabel(col.type)}
|
||||
</th>
|
||||
))}
|
||||
<th className="py-1 w-12 text-right">Conf</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{sortedRowIndices.map((rowIdx, posIdx) => {
|
||||
const rowCells = cellsByRow.get(rowIdx) || []
|
||||
const avgConf = rowCells.length
|
||||
? Math.round(rowCells.reduce((s, c) => s + c.confidence, 0) / rowCells.length)
|
||||
: 0
|
||||
return (
|
||||
<tr
|
||||
key={rowIdx}
|
||||
className={`border-b dark:border-gray-700/50 ${
|
||||
posIdx === activeIndex ? 'bg-teal-50 dark:bg-teal-900/20' : ''
|
||||
}`}
|
||||
onClick={() => { setActiveIndex(posIdx); setMode('labeling') }}
|
||||
>
|
||||
<td className="py-1 pr-2 text-gray-400 font-mono text-[10px]">
|
||||
R{String(rowIdx).padStart(2, '0')}
|
||||
</td>
|
||||
{columnsUsed.map((col) => {
|
||||
const cell = rowCells.find(c => c.col_index === col.index)
|
||||
return (
|
||||
<td key={col.index} className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300 cursor-pointer">
|
||||
<MultilineText text={cell?.text || ''} />
|
||||
</td>
|
||||
)
|
||||
})}
|
||||
<td className={`py-1 text-right font-mono ${confColor(avgConf)}`}>
|
||||
{avgConf}%
|
||||
</td>
|
||||
</tr>
|
||||
)
|
||||
})}
|
||||
</tbody>
|
||||
</table>
|
||||
<div ref={tableEndRef} />
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Streaming cell table (shown while detecting, before complete) */}
|
||||
{detecting && editedCells.length > 0 && !gridResult?.summary?.non_empty_cells && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-3">
|
||||
<h4 className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||
Live: {editedCells.length} Zellen erkannt...
|
||||
</h4>
|
||||
<div className="max-h-80 overflow-y-auto">
|
||||
<table className="w-full text-xs">
|
||||
<thead className="sticky top-0 bg-white dark:bg-gray-800">
|
||||
<tr className="text-left text-gray-500 dark:text-gray-400 border-b dark:border-gray-700">
|
||||
<th className="py-1 pr-2 w-12">Zelle</th>
|
||||
{columnsUsed.map((col, i) => (
|
||||
<th key={i} className={`py-1 pr-2 ${colTypeColor(col.type)}`}>
|
||||
{colTypeLabel(col.type)}
|
||||
</th>
|
||||
))}
|
||||
<th className="py-1 w-12 text-right">Conf</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{(() => {
|
||||
const liveByRow: Map<number, GridCell[]> = new Map()
|
||||
for (const cell of editedCells) {
|
||||
const existing = liveByRow.get(cell.row_index) || []
|
||||
existing.push(cell)
|
||||
liveByRow.set(cell.row_index, existing)
|
||||
}
|
||||
const liveSorted = [...liveByRow.keys()].sort((a, b) => a - b)
|
||||
return liveSorted.map(rowIdx => {
|
||||
const rowCells = liveByRow.get(rowIdx) || []
|
||||
const avgConf = rowCells.length
|
||||
? Math.round(rowCells.reduce((s, c) => s + c.confidence, 0) / rowCells.length)
|
||||
: 0
|
||||
return (
|
||||
<tr key={rowIdx} className="border-b dark:border-gray-700/50 animate-fade-in">
|
||||
<td className="py-1 pr-2 text-gray-400 font-mono text-[10px]">
|
||||
R{String(rowIdx).padStart(2, '0')}
|
||||
</td>
|
||||
{columnsUsed.map((col) => {
|
||||
const cell = rowCells.find(c => c.col_index === col.index)
|
||||
return (
|
||||
<td key={col.index} className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300">
|
||||
<MultilineText text={cell?.text || ''} />
|
||||
</td>
|
||||
)
|
||||
})}
|
||||
<td className={`py-1 text-right font-mono ${confColor(avgConf)}`}>
|
||||
{avgConf}%
|
||||
</td>
|
||||
</tr>
|
||||
)
|
||||
})
|
||||
})()}
|
||||
</tbody>
|
||||
</table>
|
||||
<div ref={tableEndRef} />
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
|
||||
{/* Labeling mode */}
|
||||
{mode === 'labeling' && editedCells.length > 0 && (
|
||||
<div className="grid grid-cols-3 gap-4">
|
||||
{/* Left 2/3: Image with highlighted active row */}
|
||||
<div className="col-span-2">
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Zeile {activeIndex + 1} von {getUniqueRowCount()}
|
||||
</div>
|
||||
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900 relative">
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
<img
|
||||
src={`${overlayUrl}?t=${Date.now()}`}
|
||||
alt="Wort-Overlay"
|
||||
className="w-full h-auto"
|
||||
/>
|
||||
{/* Highlight overlay for active row */}
|
||||
{(() => {
|
||||
const rowCells = getRowCells(activeIndex)
|
||||
return rowCells.map(cell => (
|
||||
<div
|
||||
key={cell.cell_id}
|
||||
className="absolute border-2 border-yellow-400 bg-yellow-400/10 pointer-events-none"
|
||||
style={{
|
||||
left: `${cell.bbox_pct.x}%`,
|
||||
top: `${cell.bbox_pct.y}%`,
|
||||
width: `${cell.bbox_pct.w}%`,
|
||||
height: `${cell.bbox_pct.h}%`,
|
||||
}}
|
||||
/>
|
||||
))
|
||||
})()}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Right 1/3: Editable fields */}
|
||||
<div className="space-y-3">
|
||||
{/* Navigation */}
|
||||
<div className="flex items-center justify-between">
|
||||
<button
|
||||
onClick={() => setActiveIndex(Math.max(0, activeIndex - 1))}
|
||||
disabled={activeIndex === 0}
|
||||
className="px-2 py-1 text-xs border rounded hover:bg-gray-50 dark:hover:bg-gray-700 dark:border-gray-600 disabled:opacity-30"
|
||||
>
|
||||
Zurueck
|
||||
</button>
|
||||
<span className="text-xs text-gray-500">
|
||||
{activeIndex + 1} / {getUniqueRowCount()}
|
||||
</span>
|
||||
<button
|
||||
onClick={() => setActiveIndex(Math.min(
|
||||
getUniqueRowCount() - 1,
|
||||
activeIndex + 1
|
||||
))}
|
||||
disabled={activeIndex >= getUniqueRowCount() - 1}
|
||||
className="px-2 py-1 text-xs border rounded hover:bg-gray-50 dark:hover:bg-gray-700 dark:border-gray-600 disabled:opacity-30"
|
||||
>
|
||||
Weiter
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Status badge */}
|
||||
<div className="flex items-center gap-2">
|
||||
{(() => {
|
||||
const rowCells = getRowCells(activeIndex)
|
||||
const avgConf = rowCells.length
|
||||
? Math.round(rowCells.reduce((s, c) => s + c.confidence, 0) / rowCells.length)
|
||||
: 0
|
||||
return (
|
||||
<span className={`text-xs font-mono ${confColor(avgConf)}`}>
|
||||
{avgConf}% Konfidenz
|
||||
</span>
|
||||
)
|
||||
})()}
|
||||
</div>
|
||||
|
||||
{/* Editable fields — one per column, driven by columns_used */}
|
||||
<div className="space-y-2">
|
||||
{(() => {
|
||||
const rowCells = getRowCells(activeIndex)
|
||||
return columnsUsed.map((col, colIdx) => {
|
||||
const cell = rowCells.find(c => c.col_index === col.index)
|
||||
if (!cell) return null
|
||||
return (
|
||||
<div key={col.index}>
|
||||
<div className="flex items-center gap-1 mb-0.5">
|
||||
<label className={`text-[10px] font-medium ${colTypeColor(col.type)}`}>
|
||||
{colTypeLabel(col.type)}
|
||||
</label>
|
||||
<span className="text-[9px] text-gray-400">{cell.cell_id}</span>
|
||||
</div>
|
||||
{/* Cell crop */}
|
||||
<div className="border rounded dark:border-gray-700 overflow-hidden bg-white dark:bg-gray-900 h-10 relative mb-1">
|
||||
<CellCrop imageUrl={dewarpedUrl} bbox={cell.bbox_pct} />
|
||||
</div>
|
||||
<textarea
|
||||
ref={colIdx === 0 ? enRef as any : undefined}
|
||||
rows={Math.max(1, (cell.text || '').split('\n').length)}
|
||||
value={cell.text || ''}
|
||||
onChange={(e) => updateCell(cell.cell_id, e.target.value)}
|
||||
className="w-full px-2 py-1.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600 font-mono resize-none"
|
||||
/>
|
||||
</div>
|
||||
)
|
||||
})
|
||||
})()}
|
||||
</div>
|
||||
|
||||
{/* Action buttons */}
|
||||
<div className="flex gap-2">
|
||||
<button
|
||||
onClick={confirmEntry}
|
||||
className="flex-1 px-3 py-1.5 text-xs bg-green-600 text-white rounded-lg hover:bg-green-700 font-medium"
|
||||
>
|
||||
Bestaetigen (Enter)
|
||||
</button>
|
||||
<button
|
||||
onClick={skipEntry}
|
||||
className="px-3 py-1.5 text-xs border rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 dark:border-gray-600"
|
||||
>
|
||||
Skip
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Shortcuts hint */}
|
||||
<div className="text-[10px] text-gray-400 space-y-0.5">
|
||||
<div>Enter = Bestaetigen & weiter</div>
|
||||
<div>Ctrl+Down = Ueberspringen</div>
|
||||
<div>Ctrl+Up = Zurueck</div>
|
||||
</div>
|
||||
|
||||
{/* Row list (compact) */}
|
||||
<div className="border-t dark:border-gray-700 pt-2 mt-2">
|
||||
<div className="text-[10px] font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Alle Zeilen
|
||||
</div>
|
||||
<div className="max-h-48 overflow-y-auto space-y-0.5">
|
||||
{sortedRowIndices.map((rowIdx, posIdx) => {
|
||||
const rowCells = cellsByRow.get(rowIdx) || []
|
||||
const textParts = rowCells.filter(c => c.text).map(c => c.text.replace(/\n/g, ' '))
|
||||
return (
|
||||
<div
|
||||
key={rowIdx}
|
||||
onClick={() => setActiveIndex(posIdx)}
|
||||
className={`flex items-center gap-1 px-2 py-1 rounded text-[10px] cursor-pointer transition-colors ${
|
||||
posIdx === activeIndex
|
||||
? 'bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700'
|
||||
: 'hover:bg-gray-50 dark:hover:bg-gray-700/50'
|
||||
}`}
|
||||
>
|
||||
<span className="w-6 text-right text-gray-400 font-mono">R{String(rowIdx).padStart(2, '0')}</span>
|
||||
<span className="truncate text-gray-600 dark:text-gray-400 font-mono">
|
||||
{textParts.join(' \u2192 ') || '\u2014'}
|
||||
</span>
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Controls */}
|
||||
{gridResult && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-3">
|
||||
<div className="flex items-center gap-3 flex-wrap">
|
||||
{/* Grid method selector */}
|
||||
<select
|
||||
value={gridMethod}
|
||||
onChange={(e) => setGridMethod(e.target.value as 'v2' | 'words_first')}
|
||||
className="px-2 py-1.5 text-xs border rounded-lg dark:bg-gray-700 dark:border-gray-600"
|
||||
>
|
||||
<option value="v2">Standard (v2)</option>
|
||||
<option value="words_first">Words-First</option>
|
||||
</select>
|
||||
|
||||
{/* OCR Engine selector */}
|
||||
<select
|
||||
value={ocrEngine}
|
||||
onChange={(e) => setOcrEngine(e.target.value as 'auto' | 'tesseract' | 'rapid' | 'paddle')}
|
||||
className="px-2 py-1.5 text-xs border rounded-lg dark:bg-gray-700 dark:border-gray-600"
|
||||
>
|
||||
<option value="auto">Auto (RapidOCR wenn verfuegbar)</option>
|
||||
<option value="rapid">RapidOCR (ONNX)</option>
|
||||
<option value="tesseract">Tesseract</option>
|
||||
<option value="paddle">PP-OCRv5 (lokal)</option>
|
||||
</select>
|
||||
|
||||
{/* Pronunciation selector (only for vocab) */}
|
||||
{isVocab && (
|
||||
<select
|
||||
value={pronunciation}
|
||||
onChange={(e) => setPronunciation(e.target.value as 'british' | 'american')}
|
||||
className="px-2 py-1.5 text-xs border rounded-lg dark:bg-gray-700 dark:border-gray-600"
|
||||
>
|
||||
<option value="british">Britisch (RP)</option>
|
||||
<option value="american">Amerikanisch</option>
|
||||
</select>
|
||||
)}
|
||||
|
||||
<button
|
||||
onClick={() => runAutoDetection()}
|
||||
disabled={detecting}
|
||||
className="px-3 py-1.5 text-xs border rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 dark:border-gray-600 disabled:opacity-50"
|
||||
>
|
||||
Erneut erkennen
|
||||
</button>
|
||||
|
||||
{/* Show which engine was used */}
|
||||
{usedEngine && (
|
||||
<span className={`px-2 py-0.5 rounded text-[10px] uppercase font-semibold ${
|
||||
usedEngine === 'rapid' || usedEngine === 'paddle'
|
||||
? 'bg-purple-100 dark:bg-purple-900/30 text-purple-700 dark:text-purple-300'
|
||||
: 'bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-400'
|
||||
}`}>
|
||||
{usedEngine === 'paddle' ? 'pp-ocrv5' : usedEngine}
|
||||
</span>
|
||||
)}
|
||||
|
||||
<button
|
||||
onClick={() => goToStep(3)}
|
||||
className="px-3 py-1.5 text-xs border rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 dark:border-gray-600 text-orange-600 dark:text-orange-400 border-orange-300 dark:border-orange-700"
|
||||
>
|
||||
Zeilen korrigieren (Step 4)
|
||||
</button>
|
||||
|
||||
<div className="flex-1" />
|
||||
|
||||
{/* Ground truth */}
|
||||
{!gtSaved ? (
|
||||
<>
|
||||
<input
|
||||
type="text"
|
||||
placeholder="Notizen (optional)"
|
||||
value={gtNotes}
|
||||
onChange={(e) => setGtNotes(e.target.value)}
|
||||
className="px-2 py-1 text-xs border rounded dark:bg-gray-700 dark:border-gray-600 w-48"
|
||||
/>
|
||||
<button
|
||||
onClick={() => handleGroundTruth(true)}
|
||||
className="px-3 py-1.5 text-xs bg-green-600 text-white rounded-lg hover:bg-green-700"
|
||||
>
|
||||
Korrekt
|
||||
</button>
|
||||
<button
|
||||
onClick={() => handleGroundTruth(false)}
|
||||
className="px-3 py-1.5 text-xs bg-red-600 text-white rounded-lg hover:bg-red-700"
|
||||
>
|
||||
Fehlerhaft
|
||||
</button>
|
||||
</>
|
||||
) : (
|
||||
<span className="text-xs text-green-600 dark:text-green-400">
|
||||
Ground Truth gespeichert
|
||||
</span>
|
||||
)}
|
||||
|
||||
<button
|
||||
onClick={onNext}
|
||||
className="px-4 py-1.5 text-xs bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium"
|
||||
>
|
||||
Weiter
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{error && (
|
||||
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* CellCrop: Shows a cropped portion of the dewarped image based on percent bbox.
|
||||
* Uses CSS background-image + background-position for efficient cropping.
|
||||
*/
|
||||
function CellCrop({ imageUrl, bbox }: { imageUrl: string; bbox: { x: number; y: number; w: number; h: number } }) {
|
||||
// Scale factor: how much to zoom into the cell
|
||||
const scaleX = 100 / bbox.w
|
||||
const scaleY = 100 / bbox.h
|
||||
const scale = Math.min(scaleX, scaleY, 8) // Cap zoom at 8x
|
||||
|
||||
return (
|
||||
<div
|
||||
className="w-full h-full"
|
||||
style={{
|
||||
backgroundImage: `url(${imageUrl})`,
|
||||
backgroundSize: `${scale * 100}%`,
|
||||
backgroundPosition: `${-bbox.x * scale}% ${-bbox.y * scale}%`,
|
||||
backgroundRepeat: 'no-repeat',
|
||||
}}
|
||||
/>
|
||||
)
|
||||
}
|
||||
@@ -1,176 +0,0 @@
|
||||
/**
|
||||
* Tests for useSlideWordPositions hook.
|
||||
*
|
||||
* The hook computes word positions from OCR word_boxes or pixel projection.
|
||||
* Since Canvas/Image are not available in jsdom, we test the pure computation
|
||||
* logic by extracting and verifying the WordPosition interface contract.
|
||||
*/
|
||||
import { describe, it, expect } from 'vitest'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// WordPosition interface (mirrored from useSlideWordPositions.ts)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface WordPosition {
|
||||
xPct: number
|
||||
wPct: number
|
||||
yPct: number
|
||||
hPct: number
|
||||
text: string
|
||||
fontRatio: number
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Pure computation functions extracted from the hook for testing
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Word-box path: compute WordPosition from an OCR word_box.
|
||||
* Replicates the word_boxes.map() logic in useSlideWordPositions.
|
||||
*/
|
||||
function wordBoxToPosition(
|
||||
box: { text: string; left: number; top: number; width: number; height: number },
|
||||
imgW: number,
|
||||
imgH: number,
|
||||
): WordPosition {
|
||||
return {
|
||||
xPct: (box.left / imgW) * 100,
|
||||
wPct: (box.width / imgW) * 100,
|
||||
yPct: (box.top / imgH) * 100,
|
||||
hPct: (box.height / imgH) * 100,
|
||||
text: box.text,
|
||||
fontRatio: 1.0,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fallback path (no word_boxes): spread tokens evenly across cell bbox.
|
||||
* Replicates the fallback logic in useSlideWordPositions.
|
||||
*/
|
||||
function fallbackPositions(
|
||||
tokens: string[],
|
||||
bboxPct: { x: number; y: number; w: number; h: number },
|
||||
): WordPosition[] {
|
||||
const fallbackW = bboxPct.w / tokens.length
|
||||
return tokens.map((t, i) => ({
|
||||
xPct: bboxPct.x + i * fallbackW,
|
||||
wPct: fallbackW,
|
||||
yPct: bboxPct.y,
|
||||
hPct: bboxPct.h,
|
||||
text: t,
|
||||
fontRatio: 1.0,
|
||||
}))
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('wordBoxToPosition (word-box path)', () => {
|
||||
it('should compute percentage positions from pixel coordinates', () => {
|
||||
const box = { text: 'hello', left: 100, top: 200, width: 80, height: 20 }
|
||||
const wp = wordBoxToPosition(box, 1000, 2000)
|
||||
|
||||
expect(wp.xPct).toBeCloseTo(10, 1) // 100/1000 * 100
|
||||
expect(wp.wPct).toBeCloseTo(8, 1) // 80/1000 * 100
|
||||
expect(wp.yPct).toBeCloseTo(10, 1) // 200/2000 * 100
|
||||
expect(wp.hPct).toBeCloseTo(1, 1) // 20/2000 * 100
|
||||
expect(wp.text).toBe('hello')
|
||||
expect(wp.fontRatio).toBe(1.0)
|
||||
})
|
||||
|
||||
it('should produce different yPct for words on different lines', () => {
|
||||
const imgW = 1000, imgH = 2000
|
||||
const word1 = wordBoxToPosition({ text: 'line1', left: 50, top: 100, width: 60, height: 20 }, imgW, imgH)
|
||||
const word2 = wordBoxToPosition({ text: 'line2', left: 50, top: 130, width: 60, height: 20 }, imgW, imgH)
|
||||
|
||||
expect(word1.yPct).not.toEqual(word2.yPct)
|
||||
expect(word2.yPct).toBeGreaterThan(word1.yPct)
|
||||
})
|
||||
|
||||
it('should handle word at origin', () => {
|
||||
const wp = wordBoxToPosition({ text: 'a', left: 0, top: 0, width: 50, height: 25 }, 500, 500)
|
||||
expect(wp.xPct).toBe(0)
|
||||
expect(wp.yPct).toBe(0)
|
||||
expect(wp.wPct).toBeCloseTo(10, 1)
|
||||
expect(wp.hPct).toBeCloseTo(5, 1)
|
||||
})
|
||||
|
||||
it('should handle word at bottom-right corner', () => {
|
||||
const wp = wordBoxToPosition({ text: 'z', left: 900, top: 1900, width: 100, height: 100 }, 1000, 2000)
|
||||
expect(wp.xPct).toBe(90)
|
||||
expect(wp.yPct).toBe(95)
|
||||
expect(wp.wPct).toBe(10)
|
||||
expect(wp.hPct).toBe(5)
|
||||
})
|
||||
})
|
||||
|
||||
|
||||
describe('fallbackPositions (no word_boxes)', () => {
|
||||
it('should spread tokens evenly across cell width', () => {
|
||||
const bbox = { x: 10, y: 20, w: 60, h: 5 }
|
||||
const positions = fallbackPositions(['apple', 'Apfel'], bbox)
|
||||
|
||||
expect(positions.length).toBe(2)
|
||||
expect(positions[0].xPct).toBeCloseTo(10, 1)
|
||||
expect(positions[1].xPct).toBeCloseTo(40, 1) // 10 + 30
|
||||
expect(positions[0].wPct).toBeCloseTo(30, 1)
|
||||
expect(positions[1].wPct).toBeCloseTo(30, 1)
|
||||
})
|
||||
|
||||
it('should use cell bbox for Y position (all words same Y)', () => {
|
||||
const bbox = { x: 5, y: 30, w: 80, h: 4 }
|
||||
const positions = fallbackPositions(['a', 'b', 'c'], bbox)
|
||||
|
||||
for (const wp of positions) {
|
||||
expect(wp.yPct).toBe(30)
|
||||
expect(wp.hPct).toBe(4)
|
||||
}
|
||||
})
|
||||
|
||||
it('should handle single token', () => {
|
||||
const bbox = { x: 15, y: 25, w: 50, h: 6 }
|
||||
const positions = fallbackPositions(['word'], bbox)
|
||||
|
||||
expect(positions.length).toBe(1)
|
||||
expect(positions[0].xPct).toBe(15)
|
||||
expect(positions[0].wPct).toBe(50)
|
||||
expect(positions[0].yPct).toBe(25)
|
||||
expect(positions[0].hPct).toBe(6)
|
||||
})
|
||||
})
|
||||
|
||||
|
||||
describe('WordPosition yPct/hPct contract', () => {
|
||||
it('word-box path: yPct comes from box.top, not cell bbox', () => {
|
||||
// This is the key fix: multi-line cells should NOT stack words at cell center
|
||||
const cellBbox = { x: 10, y: 20, w: 60, h: 10 } // cell spans y=20% to y=30%
|
||||
const imgW = 1000, imgH = 1000
|
||||
|
||||
// Two words on different lines within the same cell
|
||||
const word1 = wordBoxToPosition({ text: 'line1', left: 100, top: 200, width: 80, height: 20 }, imgW, imgH)
|
||||
const word2 = wordBoxToPosition({ text: 'line2', left: 100, top: 260, width: 80, height: 20 }, imgW, imgH)
|
||||
|
||||
// word1 should be at y=20%, word2 at y=26% — NOT both at cellBbox.y (20%)
|
||||
expect(word1.yPct).toBeCloseTo(20, 1)
|
||||
expect(word2.yPct).toBeCloseTo(26, 1)
|
||||
expect(word1.yPct).not.toEqual(word2.yPct)
|
||||
|
||||
// Both should have individual heights from their box, not cell height
|
||||
expect(word1.hPct).toBeCloseTo(2, 1)
|
||||
expect(word2.hPct).toBeCloseTo(2, 1)
|
||||
// Cell height would be 10% — word height is 2%, confirming per-word sizing
|
||||
expect(word1.hPct).toBeLessThan(cellBbox.h)
|
||||
})
|
||||
|
||||
it('fallback path: yPct equals cell bbox.y (no per-word data)', () => {
|
||||
const bbox = { x: 10, y: 45, w: 30, h: 8 }
|
||||
const positions = fallbackPositions(['a', 'b'], bbox)
|
||||
|
||||
// Without word_boxes, all words use cell bbox Y — expected behavior
|
||||
expect(positions[0].yPct).toBe(bbox.y)
|
||||
expect(positions[1].yPct).toBe(bbox.y)
|
||||
expect(positions[0].hPct).toBe(bbox.h)
|
||||
expect(positions[1].hPct).toBe(bbox.h)
|
||||
})
|
||||
})
|
||||
@@ -1,198 +0,0 @@
|
||||
import { useEffect, useState } from 'react'
|
||||
import type { GridCell } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
export interface WordPosition {
|
||||
xPct: number
|
||||
wPct: number
|
||||
text: string
|
||||
fontRatio: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Shared hook: analyse dark-pixel clusters on an image to determine
|
||||
* the exact horizontal position & auto-font-size of word groups in each cell.
|
||||
*
|
||||
* When rotation=180, the image is rotated 180° before pixel analysis.
|
||||
* Cell coordinates are transformed to the rotated space for reading,
|
||||
* and cluster positions are mirrored back to the original coordinate system.
|
||||
*
|
||||
* Returns a Map<cell_id, WordPosition[]>.
|
||||
*/
|
||||
export function usePixelWordPositions(
|
||||
imageUrl: string,
|
||||
cells: GridCell[],
|
||||
active: boolean,
|
||||
rotation: 0 | 180 = 0,
|
||||
): Map<string, WordPosition[]> {
|
||||
const [cellWordPositions, setCellWordPositions] = useState<Map<string, WordPosition[]>>(new Map())
|
||||
|
||||
useEffect(() => {
|
||||
if (!active || cells.length === 0 || !imageUrl) return
|
||||
|
||||
const img = new Image()
|
||||
img.crossOrigin = 'anonymous'
|
||||
img.onload = () => {
|
||||
const imgW = img.naturalWidth
|
||||
const imgH = img.naturalHeight
|
||||
|
||||
const canvas = document.createElement('canvas')
|
||||
canvas.width = imgW
|
||||
canvas.height = imgH
|
||||
const ctx = canvas.getContext('2d')
|
||||
if (!ctx) return
|
||||
|
||||
if (rotation === 180) {
|
||||
// Draw image rotated 180°
|
||||
ctx.translate(imgW, imgH)
|
||||
ctx.rotate(Math.PI)
|
||||
ctx.drawImage(img, 0, 0)
|
||||
ctx.setTransform(1, 0, 0, 1, 0, 0) // reset transform for measureText
|
||||
} else {
|
||||
ctx.drawImage(img, 0, 0)
|
||||
}
|
||||
|
||||
const refFontSize = 40
|
||||
const fontFam = "'Liberation Sans', Arial, sans-serif"
|
||||
ctx.font = `${refFontSize}px ${fontFam}`
|
||||
|
||||
const positions = new Map<string, WordPosition[]>()
|
||||
|
||||
for (const cell of cells) {
|
||||
if (!cell.bbox_pct || !cell.text) continue
|
||||
|
||||
// Split by 3+ whitespace into word-groups
|
||||
const groups = cell.text.split(/\s{3,}/).map(s => s.trim()).filter(Boolean)
|
||||
|
||||
// Cell pixel region — when rotated 180°, transform coordinates
|
||||
let cx: number, cy: number
|
||||
const cw = Math.round(cell.bbox_pct.w / 100 * imgW)
|
||||
const ch = Math.round(cell.bbox_pct.h / 100 * imgH)
|
||||
|
||||
if (rotation === 180) {
|
||||
// In rotated image: (x,y) maps to (W-x-w, H-y-h)
|
||||
cx = Math.round((100 - cell.bbox_pct.x - cell.bbox_pct.w) / 100 * imgW)
|
||||
cy = Math.round((100 - cell.bbox_pct.y - cell.bbox_pct.h) / 100 * imgH)
|
||||
} else {
|
||||
cx = Math.round(cell.bbox_pct.x / 100 * imgW)
|
||||
cy = Math.round(cell.bbox_pct.y / 100 * imgH)
|
||||
}
|
||||
if (cw <= 0 || ch <= 0) continue
|
||||
// Clamp to image bounds
|
||||
if (cx < 0) cx = 0
|
||||
if (cy < 0) cy = 0
|
||||
if (cx + cw > imgW || cy + ch > imgH) continue
|
||||
|
||||
const imageData = ctx.getImageData(cx, cy, cw, ch)
|
||||
|
||||
// Vertical projection: count dark pixels per column
|
||||
const proj = new Float32Array(cw)
|
||||
for (let y = 0; y < ch; y++) {
|
||||
for (let x = 0; x < cw; x++) {
|
||||
const idx = (y * cw + x) * 4
|
||||
const lum = 0.299 * imageData.data[idx] + 0.587 * imageData.data[idx + 1] + 0.114 * imageData.data[idx + 2]
|
||||
if (lum < 128) proj[x]++
|
||||
}
|
||||
}
|
||||
|
||||
// Find dark-pixel clusters (word groups on the image)
|
||||
const threshold = Math.max(1, ch * 0.03)
|
||||
const minGap = Math.max(5, Math.round(cw * 0.02))
|
||||
let clusters: { start: number; end: number }[] = []
|
||||
let inCluster = false
|
||||
let clStart = 0
|
||||
let gap = 0
|
||||
|
||||
for (let x = 0; x < cw; x++) {
|
||||
if (proj[x] >= threshold) {
|
||||
if (!inCluster) { clStart = x; inCluster = true }
|
||||
gap = 0
|
||||
} else if (inCluster) {
|
||||
gap++
|
||||
if (gap > minGap) {
|
||||
clusters.push({ start: clStart, end: x - gap })
|
||||
inCluster = false
|
||||
gap = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
if (inCluster) clusters.push({ start: clStart, end: cw - 1 - gap })
|
||||
|
||||
if (clusters.length === 0) continue
|
||||
|
||||
// When rotated 180°, mirror clusters back to original coordinate system
|
||||
// A cluster at (start, end) in rotated space = (cw-1-end, cw-1-start) in original
|
||||
if (rotation === 180) {
|
||||
clusters = clusters.map(c => ({
|
||||
start: cw - 1 - c.end,
|
||||
end: cw - 1 - c.start,
|
||||
})).reverse() // reverse to restore left-to-right order in original space
|
||||
}
|
||||
|
||||
const wordPos: WordPosition[] = []
|
||||
|
||||
if (groups.length <= 1) {
|
||||
// Single group: position at first cluster, merge all clusters for width
|
||||
const firstCl = clusters[0]
|
||||
const lastCl = clusters[clusters.length - 1]
|
||||
const clusterW = lastCl.end - firstCl.start + 1
|
||||
const measured = ctx.measureText(cell.text.trim())
|
||||
const autoFontPx = refFontSize * (clusterW / measured.width)
|
||||
const fontRatio = Math.min(autoFontPx / ch, 1.0)
|
||||
wordPos.push({
|
||||
xPct: cell.bbox_pct.x + (firstCl.start / cw) * cell.bbox_pct.w,
|
||||
wPct: ((lastCl.end - firstCl.start + 1) / cw) * cell.bbox_pct.w,
|
||||
text: cell.text.trim(),
|
||||
fontRatio,
|
||||
})
|
||||
} else if (clusters.length >= groups.length) {
|
||||
// Multiple groups: match to clusters left-to-right
|
||||
for (let i = 0; i < groups.length; i++) {
|
||||
const cl = clusters[i]
|
||||
const clusterW = cl.end - cl.start + 1
|
||||
const measured = ctx.measureText(groups[i])
|
||||
const autoFontPx = refFontSize * (clusterW / measured.width)
|
||||
const fontRatio = Math.min(autoFontPx / ch, 1.0)
|
||||
wordPos.push({
|
||||
xPct: cell.bbox_pct.x + (cl.start / cw) * cell.bbox_pct.w,
|
||||
wPct: ((cl.end - cl.start + 1) / cw) * cell.bbox_pct.w,
|
||||
text: groups[i],
|
||||
fontRatio,
|
||||
})
|
||||
}
|
||||
} else {
|
||||
continue // fewer clusters than groups — skip
|
||||
}
|
||||
|
||||
positions.set(cell.cell_id, wordPos)
|
||||
}
|
||||
|
||||
// Normalise: find the most common fontRatio (mode) and apply it to all
|
||||
const allRatios: number[] = []
|
||||
for (const wps of positions.values()) {
|
||||
for (const wp of wps) allRatios.push(wp.fontRatio)
|
||||
}
|
||||
if (allRatios.length > 0) {
|
||||
// Bucket ratios to 2 decimal places, find mode
|
||||
const buckets = new Map<number, number>()
|
||||
for (const r of allRatios) {
|
||||
const key = Math.round(r * 50) / 50 // round to nearest 0.02
|
||||
buckets.set(key, (buckets.get(key) || 0) + 1)
|
||||
}
|
||||
let modeRatio = allRatios[0]
|
||||
let modeCount = 0
|
||||
for (const [ratio, count] of buckets) {
|
||||
if (count > modeCount) { modeRatio = ratio; modeCount = count }
|
||||
}
|
||||
// Apply mode to all word positions
|
||||
for (const wps of positions.values()) {
|
||||
for (const wp of wps) wp.fontRatio = modeRatio
|
||||
}
|
||||
}
|
||||
|
||||
setCellWordPositions(positions)
|
||||
}
|
||||
img.src = imageUrl
|
||||
}, [active, cells, imageUrl, rotation])
|
||||
|
||||
return cellWordPositions
|
||||
}
|
||||
@@ -234,6 +234,28 @@ export const MODULE_REGISTRY: BackendModule[] = [
|
||||
},
|
||||
priority: 'high'
|
||||
},
|
||||
{
|
||||
id: 'llm-compare',
|
||||
name: 'LLM Vergleich',
|
||||
description: 'Vergleich verschiedener KI-Modelle und Provider',
|
||||
category: 'ai',
|
||||
backend: {
|
||||
service: 'python-backend',
|
||||
port: 8000,
|
||||
basePath: '/api/llm',
|
||||
endpoints: [
|
||||
{ path: '/providers', method: 'GET', description: 'Verfuegbare Provider' },
|
||||
{ path: '/compare', method: 'POST', description: 'Modelle vergleichen' },
|
||||
{ path: '/benchmark', method: 'POST', description: 'Benchmark ausfuehren' },
|
||||
]
|
||||
},
|
||||
frontend: {
|
||||
adminV2Page: '/ai/llm-compare',
|
||||
oldAdminPage: '/admin/llm-compare',
|
||||
status: 'connected'
|
||||
},
|
||||
priority: 'medium'
|
||||
},
|
||||
{
|
||||
id: 'magic-help',
|
||||
name: 'Magic Help (TrOCR)',
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
* All DSGVO and Compliance modules are now consolidated under the SDK.
|
||||
*/
|
||||
|
||||
export type CategoryId = 'communication' | 'ai' | 'education' | 'website' | 'sdk-docs'
|
||||
export type CategoryId = 'compliance-sdk' | 'ai' | 'education' | 'website' | 'sdk-docs'
|
||||
|
||||
export interface NavModule {
|
||||
id: string
|
||||
@@ -31,47 +31,23 @@ export interface NavCategory {
|
||||
|
||||
export const navigation: NavCategory[] = [
|
||||
// =========================================================================
|
||||
// Kommunikation — Video, Voice, Alerts
|
||||
// Compliance SDK - Alle Datenschutz-, Compliance- und SDK-Module
|
||||
// =========================================================================
|
||||
{
|
||||
id: 'communication',
|
||||
name: 'Kommunikation',
|
||||
icon: 'mail',
|
||||
color: '#f59e0b', // Amber-500
|
||||
colorClass: 'communication',
|
||||
description: 'Video & Chat, Voice Service, E-Mail, Alerts',
|
||||
id: 'compliance-sdk',
|
||||
name: 'Compliance SDK',
|
||||
icon: 'shield',
|
||||
color: '#8b5cf6', // Violet-500
|
||||
colorClass: 'compliance-sdk',
|
||||
description: 'DSGVO, Audit, GRC & SDK-Werkzeuge',
|
||||
modules: [
|
||||
{
|
||||
id: 'mail',
|
||||
name: 'Unified Inbox',
|
||||
href: '/communication/mail',
|
||||
description: 'E-Mail-Konten & KI-Analyse',
|
||||
purpose: 'E-Mail-Konten verwalten und KI-Kategorisierung nutzen. IMAP/SMTP Konfiguration, Vorlagen und Audit-Log.',
|
||||
audience: ['Support', 'Admins'],
|
||||
},
|
||||
{
|
||||
id: 'video-chat',
|
||||
name: 'Video & Chat',
|
||||
href: '/communication/video-chat',
|
||||
description: 'Matrix & Jitsi Monitoring',
|
||||
purpose: 'Dashboard fuer Matrix Synapse und Jitsi Meet. Service-Status, aktive Meetings, Traffic-Analyse und Ressourcen-Empfehlungen.',
|
||||
audience: ['Admins', 'DevOps'],
|
||||
},
|
||||
{
|
||||
id: 'voice-service',
|
||||
name: 'Voice Service',
|
||||
href: '/communication/matrix',
|
||||
description: 'PersonaPlex-7B & TaskOrchestrator',
|
||||
purpose: 'Voice-First Interface Konfiguration und Architektur-Dokumentation. Live Demo, Task States, Intents und DSGVO-Informationen.',
|
||||
audience: ['Entwickler', 'Admins'],
|
||||
},
|
||||
{
|
||||
id: 'alerts',
|
||||
name: 'Alerts Monitoring',
|
||||
href: '/communication/alerts',
|
||||
description: 'Google Alerts & Feed-Ueberwachung',
|
||||
purpose: 'Google Alerts und RSS-Feeds fuer relevante Neuigkeiten ueberwachen. Topics, Regeln, Relevanz-Profil und Digest-Generierung.',
|
||||
audience: ['Marketing', 'Admins'],
|
||||
id: 'catalog-manager',
|
||||
name: 'Katalogverwaltung',
|
||||
href: '/dashboard/catalog-manager',
|
||||
description: 'SDK-Kataloge & Auswahltabellen',
|
||||
purpose: 'Zentrale Verwaltung aller Dropdown- und Auswahltabellen im SDK. Systemkataloge (Risiken, Massnahmen, Vorlagen) anzeigen und benutzerdefinierte Eintraege ergaenzen, bearbeiten und loeschen.',
|
||||
audience: ['DSB', 'Compliance Officer', 'Administratoren'],
|
||||
},
|
||||
],
|
||||
},
|
||||
@@ -132,6 +108,16 @@ export const navigation: NavCategory[] = [
|
||||
// -----------------------------------------------------------------------
|
||||
// KI-Werkzeuge: Standalone-Tools fuer Entwicklung & QA
|
||||
// -----------------------------------------------------------------------
|
||||
{
|
||||
id: 'llm-compare',
|
||||
name: 'LLM Vergleich',
|
||||
href: '/ai/llm-compare',
|
||||
description: 'KI-Provider Vergleich',
|
||||
purpose: 'Vergleichen Sie verschiedene LLM-Anbieter (Ollama, OpenAI, Anthropic) hinsichtlich Qualitaet, Geschwindigkeit und Kosten. Standalone-Werkzeug fuer Modell-Evaluation.',
|
||||
audience: ['Entwickler', 'Data Scientists'],
|
||||
oldAdminPath: '/admin/llm-compare',
|
||||
subgroup: 'KI-Werkzeuge',
|
||||
},
|
||||
{
|
||||
id: 'ocr-compare',
|
||||
name: 'OCR Vergleich',
|
||||
@@ -141,24 +127,6 @@ export const navigation: NavCategory[] = [
|
||||
audience: ['Entwickler', 'Data Scientists', 'Lehrer'],
|
||||
subgroup: 'KI-Werkzeuge',
|
||||
},
|
||||
{
|
||||
id: 'ocr-pipeline',
|
||||
name: 'OCR Pipeline',
|
||||
href: '/ai/ocr-pipeline',
|
||||
description: 'Schrittweise Seitenrekonstruktion',
|
||||
purpose: 'Schrittweise Seitenrekonstruktion: Scan begradigen, Spalten erkennen, Woerter lokalisieren und die Seite Wort fuer Wort nachbauen. 6-Schritt-Pipeline mit Ground Truth Validierung.',
|
||||
audience: ['Entwickler', 'Data Scientists'],
|
||||
subgroup: 'KI-Werkzeuge',
|
||||
},
|
||||
{
|
||||
id: 'ocr-overlay',
|
||||
name: 'OCR Overlay',
|
||||
href: '/ai/ocr-overlay',
|
||||
description: 'Ganzseitige Overlay-Rekonstruktion',
|
||||
purpose: 'Arbeitsblatt ohne Spaltenerkennung direkt als Overlay rekonstruieren. Vereinfachte 7-Schritt-Pipeline.',
|
||||
audience: ['Entwickler'],
|
||||
subgroup: 'KI-Werkzeuge',
|
||||
},
|
||||
{
|
||||
id: 'test-quality',
|
||||
name: 'Test Quality (BQAS)',
|
||||
|
||||
@@ -23,7 +23,7 @@ export const roles: Role[] = [
|
||||
name: 'Entwickler',
|
||||
description: 'Voller Zugriff auf alle Bereiche',
|
||||
icon: 'code',
|
||||
visibleCategories: ['communication', 'ai', 'education', 'website'],
|
||||
visibleCategories: ['compliance-sdk', 'ai', 'education', 'website'],
|
||||
color: 'bg-primary-100 border-primary-300 text-primary-700',
|
||||
},
|
||||
{
|
||||
@@ -31,7 +31,7 @@ export const roles: Role[] = [
|
||||
name: 'Manager',
|
||||
description: 'Executive Uebersicht',
|
||||
icon: 'chart',
|
||||
visibleCategories: ['communication', 'website'],
|
||||
visibleCategories: ['compliance-sdk', 'website'],
|
||||
color: 'bg-blue-100 border-blue-300 text-blue-700',
|
||||
},
|
||||
{
|
||||
@@ -39,7 +39,7 @@ export const roles: Role[] = [
|
||||
name: 'Auditor',
|
||||
description: 'Compliance Pruefung',
|
||||
icon: 'clipboard',
|
||||
visibleCategories: ['communication'],
|
||||
visibleCategories: ['compliance-sdk'],
|
||||
color: 'bg-amber-100 border-amber-300 text-amber-700',
|
||||
},
|
||||
{
|
||||
@@ -47,7 +47,7 @@ export const roles: Role[] = [
|
||||
name: 'DSB',
|
||||
description: 'Datenschutzbeauftragter',
|
||||
icon: 'shield',
|
||||
visibleCategories: ['communication'],
|
||||
visibleCategories: ['compliance-sdk'],
|
||||
color: 'bg-purple-100 border-purple-300 text-purple-700',
|
||||
},
|
||||
]
|
||||
|
||||
@@ -2,8 +2,6 @@
|
||||
const nextConfig = {
|
||||
output: 'standalone',
|
||||
reactStrictMode: true,
|
||||
// Force unique build ID to bust browser caches on each deploy
|
||||
generateBuildId: () => `build-${Date.now()}`,
|
||||
// TODO: Remove after fixing type incompatibilities from restore
|
||||
typescript: {
|
||||
ignoreBuildErrors: true,
|
||||
|
||||
463
admin-lehrer/package-lock.json
generated
463
admin-lehrer/package-lock.json
generated
@@ -8,7 +8,6 @@
|
||||
"name": "breakpilot-admin-v2",
|
||||
"version": "1.0.0",
|
||||
"dependencies": {
|
||||
"bpmn-js": "^18.0.1",
|
||||
"jspdf": "^4.1.0",
|
||||
"jszip": "^3.10.1",
|
||||
"lucide-react": "^0.468.0",
|
||||
@@ -16,7 +15,6 @@
|
||||
"react": "^18.3.1",
|
||||
"react-dom": "^18.3.1",
|
||||
"reactflow": "^11.11.4",
|
||||
"recharts": "^2.15.0",
|
||||
"uuid": "^13.0.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
@@ -430,16 +428,6 @@
|
||||
"node": ">=6.9.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@bpmn-io/diagram-js-ui": {
|
||||
"version": "0.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@bpmn-io/diagram-js-ui/-/diagram-js-ui-0.2.3.tgz",
|
||||
"integrity": "sha512-OGyjZKvGK8tHSZ0l7RfeKhilGoOGtFDcoqSGYkX0uhFlo99OVZ9Jn1K7TJGzcE9BdKwvA5Y5kGqHEhdTxHvFfw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"htm": "^3.1.1",
|
||||
"preact": "^10.11.2"
|
||||
}
|
||||
},
|
||||
"node_modules/@csstools/color-helpers": {
|
||||
"version": "5.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@csstools/color-helpers/-/color-helpers-5.1.0.tgz",
|
||||
@@ -3008,39 +2996,6 @@
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/bpmn-js": {
|
||||
"version": "18.12.0",
|
||||
"resolved": "https://registry.npmjs.org/bpmn-js/-/bpmn-js-18.12.0.tgz",
|
||||
"integrity": "sha512-Dg2O+r7jpBwLgWGpManc7P4ZfZQfxTVi2xNtXR3Q2G5Hx1RVYVFoNsQED8+FPCgjy6m7ZQbxKP1sjCJt5rbtBg==",
|
||||
"license": "SEE LICENSE IN LICENSE",
|
||||
"dependencies": {
|
||||
"bpmn-moddle": "^10.0.0",
|
||||
"diagram-js": "^15.9.0",
|
||||
"diagram-js-direct-editing": "^3.3.0",
|
||||
"ids": "^3.0.0",
|
||||
"inherits-browser": "^0.1.0",
|
||||
"min-dash": "^5.0.0",
|
||||
"min-dom": "^5.2.0",
|
||||
"tiny-svg": "^4.1.4"
|
||||
},
|
||||
"engines": {
|
||||
"node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/bpmn-moddle": {
|
||||
"version": "10.0.0",
|
||||
"resolved": "https://registry.npmjs.org/bpmn-moddle/-/bpmn-moddle-10.0.0.tgz",
|
||||
"integrity": "sha512-vXePD5jkatcILmM3zwJG/m6IIHIghTGB7WvgcdEraEw8E8VdJHrTgrvBUhbzqaXJpnsGQz15QS936xeBY6l9aA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"min-dash": "^5.0.0",
|
||||
"moddle": "^8.0.0",
|
||||
"moddle-xml": "^12.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 20.12"
|
||||
}
|
||||
},
|
||||
"node_modules/braces": {
|
||||
"version": "3.0.3",
|
||||
"resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz",
|
||||
@@ -3198,15 +3153,6 @@
|
||||
"integrity": "sha512-IV3Ou0jSMzZrd3pZ48nLkT9DA7Ag1pnPzaiQhpW7c3RbcqqzvzzVu+L8gfqMp/8IM2MQtSiqaCxrrcfu8I8rMA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/clsx": {
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/clsx/-/clsx-2.1.1.tgz",
|
||||
"integrity": "sha512-eYm0QWBtUrBWZWG0d386OGAw16Z995PiOVo2B7bjWSbHedGl5e0ZWaq65kOGgUSNesEIDkB9ISbTg/JK9dhCZA==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=6"
|
||||
}
|
||||
},
|
||||
"node_modules/commander": {
|
||||
"version": "4.1.1",
|
||||
"resolved": "https://registry.npmjs.org/commander/-/commander-4.1.1.tgz",
|
||||
@@ -3316,20 +3262,9 @@
|
||||
"version": "3.2.3",
|
||||
"resolved": "https://registry.npmjs.org/csstype/-/csstype-3.2.3.tgz",
|
||||
"integrity": "sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ==",
|
||||
"devOptional": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/d3-array": {
|
||||
"version": "3.2.4",
|
||||
"resolved": "https://registry.npmjs.org/d3-array/-/d3-array-3.2.4.tgz",
|
||||
"integrity": "sha512-tdQAmyA18i4J7wprpYq8ClcxZy3SC31QMeByyCFyRt7BVHdREQZ5lpzoe5mFEYZUWe+oq8HBvk9JjpibyEV4Jg==",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"internmap": "1 - 2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/d3-color": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/d3-color/-/d3-color-3.1.0.tgz",
|
||||
@@ -3370,15 +3305,6 @@
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/d3-format": {
|
||||
"version": "3.1.2",
|
||||
"resolved": "https://registry.npmjs.org/d3-format/-/d3-format-3.1.2.tgz",
|
||||
"integrity": "sha512-AJDdYOdnyRDV5b6ArilzCPPwc1ejkHcoyFarqlPqT7zRYjhavcT3uSrqcMvsgh2CgoPbK3RCwyHaVyxYcP2Arg==",
|
||||
"license": "ISC",
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/d3-interpolate": {
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://registry.npmjs.org/d3-interpolate/-/d3-interpolate-3.0.1.tgz",
|
||||
@@ -3391,31 +3317,6 @@
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/d3-path": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/d3-path/-/d3-path-3.1.0.tgz",
|
||||
"integrity": "sha512-p3KP5HCf/bvjBSSKuXid6Zqijx7wIfNW+J/maPs+iwR35at5JCbLUT0LzF1cnjbCHWhqzQTIN2Jpe8pRebIEFQ==",
|
||||
"license": "ISC",
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/d3-scale": {
|
||||
"version": "4.0.2",
|
||||
"resolved": "https://registry.npmjs.org/d3-scale/-/d3-scale-4.0.2.tgz",
|
||||
"integrity": "sha512-GZW464g1SH7ag3Y7hXjf8RoUuAFIqklOAq3MRl4OaWabTFJY9PN/E1YklhXLh+OQ3fM9yS2nOkCoS+WLZ6kvxQ==",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"d3-array": "2.10.0 - 3",
|
||||
"d3-format": "1 - 3",
|
||||
"d3-interpolate": "1.2.0 - 3",
|
||||
"d3-time": "2.1.1 - 3",
|
||||
"d3-time-format": "2 - 4"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/d3-selection": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz",
|
||||
@@ -3425,42 +3326,6 @@
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/d3-shape": {
|
||||
"version": "3.2.0",
|
||||
"resolved": "https://registry.npmjs.org/d3-shape/-/d3-shape-3.2.0.tgz",
|
||||
"integrity": "sha512-SaLBuwGm3MOViRq2ABk3eLoxwZELpH6zhl3FbAoJ7Vm1gofKx6El1Ib5z23NUEhF9AsGl7y+dzLe5Cw2AArGTA==",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"d3-path": "^3.1.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/d3-time": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/d3-time/-/d3-time-3.1.0.tgz",
|
||||
"integrity": "sha512-VqKjzBLejbSMT4IgbmVgDjpkYrNWUYJnbCGo874u7MMKIWsILRX+OpX/gTk8MqjpT1A/c6HY2dCA77ZN0lkQ2Q==",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"d3-array": "2 - 3"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/d3-time-format": {
|
||||
"version": "4.1.0",
|
||||
"resolved": "https://registry.npmjs.org/d3-time-format/-/d3-time-format-4.1.0.tgz",
|
||||
"integrity": "sha512-dJxPBlzC7NugB2PDLwo9Q8JiTR3M3e4/XANkreKSUxF8vvXKqm1Yfq4Q5dl8budlunRVlUUaDUgFt7eA8D6NLg==",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"d3-time": "1 - 3"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/d3-timer": {
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://registry.npmjs.org/d3-timer/-/d3-timer-3.0.1.tgz",
|
||||
@@ -3544,12 +3409,6 @@
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/decimal.js-light": {
|
||||
"version": "2.5.1",
|
||||
"resolved": "https://registry.npmjs.org/decimal.js-light/-/decimal.js-light-2.5.1.tgz",
|
||||
"integrity": "sha512-qIMFpTMZmny+MMIitAB6D7iVPEorVw6YQRWkvarTkT4tBeSLLiHzcwj6q0MmYSFCiVpiqPJTJEYIrpcPzVEIvg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/dequal": {
|
||||
"version": "2.0.3",
|
||||
"resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz",
|
||||
@@ -3570,51 +3429,6 @@
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/diagram-js": {
|
||||
"version": "15.9.1",
|
||||
"resolved": "https://registry.npmjs.org/diagram-js/-/diagram-js-15.9.1.tgz",
|
||||
"integrity": "sha512-2JsGmyeTo6o39beq2e/UkTfMopQSM27eXBUzbYQ+1m5VhEnQDkcjcrnRCjcObLMzzXSE/LSJyYhji90sqBFodQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@bpmn-io/diagram-js-ui": "^0.2.3",
|
||||
"clsx": "^2.1.1",
|
||||
"didi": "^11.0.0",
|
||||
"inherits-browser": "^0.1.0",
|
||||
"min-dash": "^5.0.0",
|
||||
"min-dom": "^5.2.0",
|
||||
"object-refs": "^0.4.0",
|
||||
"path-intersection": "^4.1.0",
|
||||
"tiny-svg": "^4.1.4"
|
||||
},
|
||||
"engines": {
|
||||
"node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/diagram-js-direct-editing": {
|
||||
"version": "3.3.0",
|
||||
"resolved": "https://registry.npmjs.org/diagram-js-direct-editing/-/diagram-js-direct-editing-3.3.0.tgz",
|
||||
"integrity": "sha512-EjXYb35J3qBU8lLz5U81hn7wNykVmF7U5DXZ7BvPok2IX7rmPz+ZyaI5AEMiqaC6lpSnHqPxFcPgKEiJcAiv5w==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"min-dash": "^5.0.0",
|
||||
"min-dom": "^5.2.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": "*"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"diagram-js": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/didi": {
|
||||
"version": "11.0.0",
|
||||
"resolved": "https://registry.npmjs.org/didi/-/didi-11.0.0.tgz",
|
||||
"integrity": "sha512-PzCfRzQttvFpVcYMbSF7h8EsWjeJpVjWH4qDhB5LkMi1ILvHq4Ob0vhM2wLFziPkbUBi+PAo7ODbe2sacR7nJQ==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 20.12"
|
||||
}
|
||||
},
|
||||
"node_modules/didyoumean": {
|
||||
"version": "1.2.2",
|
||||
"resolved": "https://registry.npmjs.org/didyoumean/-/didyoumean-1.2.2.tgz",
|
||||
@@ -3637,28 +3451,6 @@
|
||||
"license": "MIT",
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/dom-helpers": {
|
||||
"version": "5.2.1",
|
||||
"resolved": "https://registry.npmjs.org/dom-helpers/-/dom-helpers-5.2.1.tgz",
|
||||
"integrity": "sha512-nRCa7CK3VTrM2NmGkIy4cbK7IZlgBE/PYMn55rrXefr5xXDP0LdtfPnblFDoVdcAfslJ7or6iqAUnx0CCGIWQA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@babel/runtime": "^7.8.7",
|
||||
"csstype": "^3.0.2"
|
||||
}
|
||||
},
|
||||
"node_modules/domify": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/domify/-/domify-3.0.0.tgz",
|
||||
"integrity": "sha512-bs2yO68JDFOm6rKv8f0EnrM2cENduhRkpqOtt/s5l5JBA/eqGBZCzLPmdYoHtJ6utgLGgcBajFsEQbl12pT0lQ==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=20"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/dompurify": {
|
||||
"version": "3.3.1",
|
||||
"resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.3.1.tgz",
|
||||
@@ -3758,12 +3550,6 @@
|
||||
"@types/estree": "^1.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/eventemitter3": {
|
||||
"version": "4.0.7",
|
||||
"resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-4.0.7.tgz",
|
||||
"integrity": "sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/expect-type": {
|
||||
"version": "1.3.0",
|
||||
"resolved": "https://registry.npmjs.org/expect-type/-/expect-type-1.3.0.tgz",
|
||||
@@ -3774,15 +3560,6 @@
|
||||
"node": ">=12.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/fast-equals": {
|
||||
"version": "5.4.0",
|
||||
"resolved": "https://registry.npmjs.org/fast-equals/-/fast-equals-5.4.0.tgz",
|
||||
"integrity": "sha512-jt2DW/aNFNwke7AUd+Z+e6pz39KO5rzdbbFCg2sGafS4mk13MI7Z8O5z9cADNn5lhGODIgLwug6TZO2ctf7kcw==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=6.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/fast-glob": {
|
||||
"version": "3.3.3",
|
||||
"resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.3.tgz",
|
||||
@@ -3928,12 +3705,6 @@
|
||||
"node": ">= 0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/htm": {
|
||||
"version": "3.1.1",
|
||||
"resolved": "https://registry.npmjs.org/htm/-/htm-3.1.1.tgz",
|
||||
"integrity": "sha512-983Vyg8NwUE7JkZ6NmOqpCZ+sh1bKv2iYTlUkzlWmA5JD2acKoxd4KVxbMmxX/85mtfdnDmTFoNKcg5DGAvxNQ==",
|
||||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/html-encoding-sniffer": {
|
||||
"version": "6.0.0",
|
||||
"resolved": "https://registry.npmjs.org/html-encoding-sniffer/-/html-encoding-sniffer-6.0.0.tgz",
|
||||
@@ -3989,15 +3760,6 @@
|
||||
"node": ">= 14"
|
||||
}
|
||||
},
|
||||
"node_modules/ids": {
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://registry.npmjs.org/ids/-/ids-3.0.1.tgz",
|
||||
"integrity": "sha512-mr0zAgpgA/hzCrHB0DnoTG6xZjNC3ABs4eaksXrpVtfaDatA2SVdDb1ZPLjmKjqzp4kexQRuHXwDWQILVK8FZQ==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 20.12"
|
||||
}
|
||||
},
|
||||
"node_modules/immediate": {
|
||||
"version": "3.0.6",
|
||||
"resolved": "https://registry.npmjs.org/immediate/-/immediate-3.0.6.tgz",
|
||||
@@ -4020,21 +3782,6 @@
|
||||
"integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==",
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/inherits-browser": {
|
||||
"version": "0.1.0",
|
||||
"resolved": "https://registry.npmjs.org/inherits-browser/-/inherits-browser-0.1.0.tgz",
|
||||
"integrity": "sha512-CJHHvW3jQ6q7lzsXPpapLdMx5hDpSF3FSh45pwsj6bKxJJ8Nl8v43i5yXnr3BdfOimGHKyniewQtnAIp3vyJJw==",
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/internmap": {
|
||||
"version": "2.0.3",
|
||||
"resolved": "https://registry.npmjs.org/internmap/-/internmap-2.0.3.tgz",
|
||||
"integrity": "sha512-5Hh7Y1wQbvY5ooGgPbDaL5iYLAPzMTUrjMulskHLH6wnv/A+1q5rgEaiuqEjB+oxGXIVZs1FF+R/KPN3ZSQYYg==",
|
||||
"license": "ISC",
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/iobuffer": {
|
||||
"version": "5.4.0",
|
||||
"resolved": "https://registry.npmjs.org/iobuffer/-/iobuffer-5.4.0.tgz",
|
||||
@@ -4262,12 +4009,6 @@
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/lodash": {
|
||||
"version": "4.17.23",
|
||||
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.23.tgz",
|
||||
"integrity": "sha512-LgVTMpQtIopCi79SJeDiP0TfWi5CNEc/L/aRdTh3yIvmZXTnheWpKjSZhnvMl8iXbC1tFg9gdHHDMLoV7CnG+w==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/loose-envify": {
|
||||
"version": "1.4.0",
|
||||
"resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz",
|
||||
@@ -4351,22 +4092,6 @@
|
||||
"node": ">=8.6"
|
||||
}
|
||||
},
|
||||
"node_modules/min-dash": {
|
||||
"version": "5.0.0",
|
||||
"resolved": "https://registry.npmjs.org/min-dash/-/min-dash-5.0.0.tgz",
|
||||
"integrity": "sha512-EGuoBnVL7/Fnv2sqakpX5WGmZehZ3YMmLayT7sM8E9DRU74kkeyMg4Rik1lsOkR2GbFNeBca4/L+UfU6gF0Edw==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/min-dom": {
|
||||
"version": "5.3.0",
|
||||
"resolved": "https://registry.npmjs.org/min-dom/-/min-dom-5.3.0.tgz",
|
||||
"integrity": "sha512-0w5FEBgPAyHhmFojW3zxd7we3D+m5XYS3E/06OyvxmbHJoiQVa4Nagj6RWvoAKYRw5xth6cP5TMePc5cR1M9hA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"domify": "^3.0.0",
|
||||
"min-dash": "^5.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/min-indent": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/min-indent/-/min-indent-1.0.1.tgz",
|
||||
@@ -4377,31 +4102,6 @@
|
||||
"node": ">=4"
|
||||
}
|
||||
},
|
||||
"node_modules/moddle": {
|
||||
"version": "8.1.0",
|
||||
"resolved": "https://registry.npmjs.org/moddle/-/moddle-8.1.0.tgz",
|
||||
"integrity": "sha512-dBddc1CNuZHgro8nQWwfPZ2BkyLWdnxoNpPu9d+XKPN96DAiiBOeBw527ft++ebDuFez5PMdaR3pgUgoOaUGrA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"min-dash": "^5.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/moddle-xml": {
|
||||
"version": "12.0.0",
|
||||
"resolved": "https://registry.npmjs.org/moddle-xml/-/moddle-xml-12.0.0.tgz",
|
||||
"integrity": "sha512-NJc2+sCe4tvuGlaUBcoZcYf6j9f+z+qxHOyGm/LB3ZrlJXVPPHoBTg/KXgDRCufdBJhJ3AheFs3QU/abABNzRg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"min-dash": "^5.0.0",
|
||||
"saxen": "^11.0.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 18"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"moddle": ">= 6.2.0"
|
||||
}
|
||||
},
|
||||
"node_modules/ms": {
|
||||
"version": "2.1.3",
|
||||
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
|
||||
@@ -4540,6 +4240,7 @@
|
||||
"version": "4.1.1",
|
||||
"resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
|
||||
"integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
@@ -4555,15 +4256,6 @@
|
||||
"node": ">= 6"
|
||||
}
|
||||
},
|
||||
"node_modules/object-refs": {
|
||||
"version": "0.4.0",
|
||||
"resolved": "https://registry.npmjs.org/object-refs/-/object-refs-0.4.0.tgz",
|
||||
"integrity": "sha512-6kJqKWryKZmtte6QYvouas0/EIJKPI1/MMIuRsiBlNuhIMfqYTggzX2F1AJ2+cDs288xyi9GL7FyasHINR98BQ==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/obug": {
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/obug/-/obug-2.1.1.tgz",
|
||||
@@ -4594,15 +4286,6 @@
|
||||
"url": "https://github.com/inikulin/parse5?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/path-intersection": {
|
||||
"version": "4.1.0",
|
||||
"resolved": "https://registry.npmjs.org/path-intersection/-/path-intersection-4.1.0.tgz",
|
||||
"integrity": "sha512-urUP6WvhnxbHPdHYl6L7Yrc6+1ny6uOFKPCzPxTSUSYGHG0o94RmI7SvMMaScNAM5RtTf08bg4skc6/kjfne3A==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 14.20"
|
||||
}
|
||||
},
|
||||
"node_modules/path-parse": {
|
||||
"version": "1.0.7",
|
||||
"resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.7.tgz",
|
||||
@@ -4872,16 +4555,6 @@
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/preact": {
|
||||
"version": "10.28.4",
|
||||
"resolved": "https://registry.npmjs.org/preact/-/preact-10.28.4.tgz",
|
||||
"integrity": "sha512-uKFfOHWuSNpRFVTnljsCluEFq57OKT+0QdOiQo8XWnQ/pSvg7OpX5eNOejELXJMWy+BwM2nobz0FkvzmnpCNsQ==",
|
||||
"license": "MIT",
|
||||
"funding": {
|
||||
"type": "opencollective",
|
||||
"url": "https://opencollective.com/preact"
|
||||
}
|
||||
},
|
||||
"node_modules/pretty-format": {
|
||||
"version": "27.5.1",
|
||||
"resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-27.5.1.tgz",
|
||||
@@ -4904,23 +4577,6 @@
|
||||
"integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/prop-types": {
|
||||
"version": "15.8.1",
|
||||
"resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.8.1.tgz",
|
||||
"integrity": "sha512-oj87CgZICdulUohogVAR7AjlC0327U4el4L6eAvOqCeudMDVU0NThNaV+b9Df4dXgSP1gXMTnPdhfe/2qDH5cg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"loose-envify": "^1.4.0",
|
||||
"object-assign": "^4.1.1",
|
||||
"react-is": "^16.13.1"
|
||||
}
|
||||
},
|
||||
"node_modules/prop-types/node_modules/react-is": {
|
||||
"version": "16.13.1",
|
||||
"resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz",
|
||||
"integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/punycode": {
|
||||
"version": "2.3.1",
|
||||
"resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz",
|
||||
@@ -5005,37 +4661,6 @@
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/react-smooth": {
|
||||
"version": "4.0.4",
|
||||
"resolved": "https://registry.npmjs.org/react-smooth/-/react-smooth-4.0.4.tgz",
|
||||
"integrity": "sha512-gnGKTpYwqL0Iii09gHobNolvX4Kiq4PKx6eWBCYYix+8cdw+cGo3do906l1NBPKkSWx1DghC1dlWG9L2uGd61Q==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"fast-equals": "^5.0.1",
|
||||
"prop-types": "^15.8.1",
|
||||
"react-transition-group": "^4.4.5"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0",
|
||||
"react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/react-transition-group": {
|
||||
"version": "4.4.5",
|
||||
"resolved": "https://registry.npmjs.org/react-transition-group/-/react-transition-group-4.4.5.tgz",
|
||||
"integrity": "sha512-pZcd1MCJoiKiBR2NRxeCRg13uCXbydPnmB4EOeRrY7480qNWO8IIgQG6zlDkm6uRMsURXPuKq0GWtiM59a5Q6g==",
|
||||
"license": "BSD-3-Clause",
|
||||
"dependencies": {
|
||||
"@babel/runtime": "^7.5.5",
|
||||
"dom-helpers": "^5.0.1",
|
||||
"loose-envify": "^1.4.0",
|
||||
"prop-types": "^15.6.2"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"react": ">=16.6.0",
|
||||
"react-dom": ">=16.6.0"
|
||||
}
|
||||
},
|
||||
"node_modules/reactflow": {
|
||||
"version": "11.11.4",
|
||||
"resolved": "https://registry.npmjs.org/reactflow/-/reactflow-11.11.4.tgz",
|
||||
@@ -5092,44 +4717,6 @@
|
||||
"node": ">=8.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/recharts": {
|
||||
"version": "2.15.4",
|
||||
"resolved": "https://registry.npmjs.org/recharts/-/recharts-2.15.4.tgz",
|
||||
"integrity": "sha512-UT/q6fwS3c1dHbXv2uFgYJ9BMFHu3fwnd7AYZaEQhXuYQ4hgsxLvsUXzGdKeZrW5xopzDCvuA2N41WJ88I7zIw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"clsx": "^2.0.0",
|
||||
"eventemitter3": "^4.0.1",
|
||||
"lodash": "^4.17.21",
|
||||
"react-is": "^18.3.1",
|
||||
"react-smooth": "^4.0.4",
|
||||
"recharts-scale": "^0.4.4",
|
||||
"tiny-invariant": "^1.3.1",
|
||||
"victory-vendor": "^36.6.8"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"react": "^16.0.0 || ^17.0.0 || ^18.0.0 || ^19.0.0",
|
||||
"react-dom": "^16.0.0 || ^17.0.0 || ^18.0.0 || ^19.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/recharts-scale": {
|
||||
"version": "0.4.5",
|
||||
"resolved": "https://registry.npmjs.org/recharts-scale/-/recharts-scale-0.4.5.tgz",
|
||||
"integrity": "sha512-kivNFO+0OcUNu7jQquLXAxz1FIwZj8nrj+YkOKc5694NbjCvcT6aSZiIzNzd2Kul4o4rTto8QVR9lMNtxD4G1w==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"decimal.js-light": "^2.4.1"
|
||||
}
|
||||
},
|
||||
"node_modules/recharts/node_modules/react-is": {
|
||||
"version": "18.3.1",
|
||||
"resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz",
|
||||
"integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/redent": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/redent/-/redent-3.0.0.tgz",
|
||||
@@ -5278,15 +4865,6 @@
|
||||
"integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/saxen": {
|
||||
"version": "11.0.2",
|
||||
"resolved": "https://registry.npmjs.org/saxen/-/saxen-11.0.2.tgz",
|
||||
"integrity": "sha512-WDb4gqac8uiJzOdOdVpr9NWh9NrJMm7Brn5GX2Poj+mjE/QTXqYQENr8T/mom54dDDgbd3QjwTg23TRHYiWXRA==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 20.12"
|
||||
}
|
||||
},
|
||||
"node_modules/saxes": {
|
||||
"version": "6.0.0",
|
||||
"resolved": "https://registry.npmjs.org/saxes/-/saxes-6.0.0.tgz",
|
||||
@@ -5582,21 +5160,6 @@
|
||||
"node": ">=0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/tiny-invariant": {
|
||||
"version": "1.3.3",
|
||||
"resolved": "https://registry.npmjs.org/tiny-invariant/-/tiny-invariant-1.3.3.tgz",
|
||||
"integrity": "sha512-+FbBPE1o9QAYvviau/qC5SE3caw21q3xkvWKBtja5vgqOWIHHJ3ioaq1VPfn/Szqctz2bU/oYeKd9/z5BL+PVg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/tiny-svg": {
|
||||
"version": "4.1.4",
|
||||
"resolved": "https://registry.npmjs.org/tiny-svg/-/tiny-svg-4.1.4.tgz",
|
||||
"integrity": "sha512-cBaEACCbouYrQc9RG+eTXnPYosX1Ijqty/I6DdXovwDd89Pwu4jcmpOR7BuFEF9YCcd7/AWwasE0207WMK7hdw==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 20"
|
||||
}
|
||||
},
|
||||
"node_modules/tinybench": {
|
||||
"version": "2.9.0",
|
||||
"resolved": "https://registry.npmjs.org/tinybench/-/tinybench-2.9.0.tgz",
|
||||
@@ -5844,28 +5407,6 @@
|
||||
"uuid": "dist-node/bin/uuid"
|
||||
}
|
||||
},
|
||||
"node_modules/victory-vendor": {
|
||||
"version": "36.9.2",
|
||||
"resolved": "https://registry.npmjs.org/victory-vendor/-/victory-vendor-36.9.2.tgz",
|
||||
"integrity": "sha512-PnpQQMuxlwYdocC8fIJqVXvkeViHYzotI+NJrCuav0ZYFoq912ZHBk3mCeuj+5/VpodOjPe1z0Fk2ihgzlXqjQ==",
|
||||
"license": "MIT AND ISC",
|
||||
"dependencies": {
|
||||
"@types/d3-array": "^3.0.3",
|
||||
"@types/d3-ease": "^3.0.0",
|
||||
"@types/d3-interpolate": "^3.0.1",
|
||||
"@types/d3-scale": "^4.0.2",
|
||||
"@types/d3-shape": "^3.1.0",
|
||||
"@types/d3-time": "^3.0.0",
|
||||
"@types/d3-timer": "^3.0.0",
|
||||
"d3-array": "^3.1.6",
|
||||
"d3-ease": "^3.0.1",
|
||||
"d3-interpolate": "^3.0.1",
|
||||
"d3-scale": "^4.0.2",
|
||||
"d3-shape": "^3.1.0",
|
||||
"d3-time": "^3.0.0",
|
||||
"d3-timer": "^3.0.1"
|
||||
}
|
||||
},
|
||||
"node_modules/vite": {
|
||||
"version": "7.3.1",
|
||||
"resolved": "https://registry.npmjs.org/vite/-/vite-7.3.1.tgz",
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
"test:all": "vitest run && playwright test --project=chromium"
|
||||
},
|
||||
"dependencies": {
|
||||
"bpmn-js": "^18.0.1",
|
||||
"jspdf": "^4.1.0",
|
||||
"jszip": "^3.10.1",
|
||||
"lucide-react": "^0.468.0",
|
||||
@@ -26,7 +27,6 @@
|
||||
"react-dom": "^18.3.1",
|
||||
"reactflow": "^11.11.4",
|
||||
"recharts": "^2.15.0",
|
||||
"fabric": "^6.0.0",
|
||||
"uuid": "^13.0.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -119,6 +119,13 @@ export const AI_PIPELINE_MODULES: AIModuleLink[] = [
|
||||
* Kein direkter Datenfluss zur Pipeline.
|
||||
*/
|
||||
export const AI_TOOLS_MODULES: AIModuleLink[] = [
|
||||
{
|
||||
id: 'llm-compare',
|
||||
name: 'LLM Vergleich',
|
||||
href: '/ai/llm-compare',
|
||||
description: 'KI-Provider Vergleich & Evaluation',
|
||||
icon: '⚖️',
|
||||
},
|
||||
{
|
||||
id: 'test-quality',
|
||||
name: 'Test Quality (BQAS)',
|
||||
@@ -205,7 +212,27 @@ export const AI_MODULE_RELATIONS: Record<string, AIModuleLink[]> = {
|
||||
},
|
||||
],
|
||||
// KI-Werkzeuge Relations (Standalone-Tools)
|
||||
'llm-compare': [
|
||||
{
|
||||
id: 'test-quality',
|
||||
name: 'Test Quality (BQAS)',
|
||||
href: '/ai/test-quality',
|
||||
description: 'Golden Suite & Synthetic Tests',
|
||||
},
|
||||
{
|
||||
id: 'agents',
|
||||
name: 'Agent Management',
|
||||
href: '/ai/agents',
|
||||
description: 'Multi-Agent System',
|
||||
},
|
||||
],
|
||||
'test-quality': [
|
||||
{
|
||||
id: 'llm-compare',
|
||||
name: 'LLM Vergleich',
|
||||
href: '/ai/llm-compare',
|
||||
description: 'KI-Provider vergleichen',
|
||||
},
|
||||
{
|
||||
id: 'klausur-korrektur',
|
||||
name: 'Klausur-Korrektur',
|
||||
|
||||
323
docker-compose.coolify.yml
Normal file
323
docker-compose.coolify.yml
Normal file
@@ -0,0 +1,323 @@
|
||||
# =========================================================
|
||||
# BreakPilot Lehrer — KI-Lehrerplattform (Coolify)
|
||||
# =========================================================
|
||||
# Requires: breakpilot-core must be running
|
||||
# Deployed via Coolify. SSL termination handled by Traefik.
|
||||
# External services (managed separately in Coolify):
|
||||
# - PostgreSQL, Qdrant, S3-compatible storage
|
||||
# =========================================================
|
||||
|
||||
networks:
|
||||
breakpilot-network:
|
||||
external: true
|
||||
name: breakpilot-network
|
||||
|
||||
volumes:
|
||||
klausur_uploads:
|
||||
eh_uploads:
|
||||
ocr_labeling:
|
||||
paddle_models:
|
||||
lehrer_backend_data:
|
||||
opensearch_data:
|
||||
|
||||
services:
|
||||
|
||||
# =========================================================
|
||||
# FRONTEND
|
||||
# =========================================================
|
||||
admin-lehrer:
|
||||
build:
|
||||
context: ./admin-lehrer
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
NEXT_PUBLIC_API_URL: ${NEXT_PUBLIC_API_URL:-https://api-lehrer.breakpilot.ai}
|
||||
NEXT_PUBLIC_OLD_ADMIN_URL: ${NEXT_PUBLIC_OLD_ADMIN_URL:-}
|
||||
NEXT_PUBLIC_KLAUSUR_SERVICE_URL: ${NEXT_PUBLIC_KLAUSUR_SERVICE_URL:-https://klausur.breakpilot.ai}
|
||||
NEXT_PUBLIC_VOICE_SERVICE_URL: ${NEXT_PUBLIC_VOICE_SERVICE_URL:-wss://voice.breakpilot.ai}
|
||||
container_name: bp-lehrer-admin
|
||||
expose:
|
||||
- "3000"
|
||||
volumes:
|
||||
- lehrer_backend_data:/app/data
|
||||
environment:
|
||||
NODE_ENV: production
|
||||
BACKEND_URL: http://backend-lehrer:8001
|
||||
CONSENT_SERVICE_URL: http://bp-core-consent-service:8081
|
||||
KLAUSUR_SERVICE_URL: http://klausur-service:8086
|
||||
OLLAMA_URL: ${OLLAMA_URL:-}
|
||||
depends_on:
|
||||
backend-lehrer:
|
||||
condition: service_started
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.admin-lehrer.rule=Host(`admin-lehrer.breakpilot.ai`)"
|
||||
- "traefik.http.routers.admin-lehrer.entrypoints=https"
|
||||
- "traefik.http.routers.admin-lehrer.tls=true"
|
||||
- "traefik.http.routers.admin-lehrer.tls.certresolver=letsencrypt"
|
||||
- "traefik.http.services.admin-lehrer.loadbalancer.server.port=3000"
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- breakpilot-network
|
||||
|
||||
studio-v2:
|
||||
build:
|
||||
context: ./studio-v2
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
NEXT_PUBLIC_VOICE_SERVICE_URL: ${NEXT_PUBLIC_VOICE_SERVICE_URL:-wss://voice.breakpilot.ai}
|
||||
NEXT_PUBLIC_KLAUSUR_SERVICE_URL: ${NEXT_PUBLIC_KLAUSUR_SERVICE_URL:-https://klausur.breakpilot.ai}
|
||||
container_name: bp-lehrer-studio-v2
|
||||
expose:
|
||||
- "3001"
|
||||
environment:
|
||||
NODE_ENV: production
|
||||
BACKEND_URL: http://backend-lehrer:8001
|
||||
depends_on:
|
||||
- backend-lehrer
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.studio.rule=Host(`app.breakpilot.ai`)"
|
||||
- "traefik.http.routers.studio.entrypoints=https"
|
||||
- "traefik.http.routers.studio.tls=true"
|
||||
- "traefik.http.routers.studio.tls.certresolver=letsencrypt"
|
||||
- "traefik.http.services.studio.loadbalancer.server.port=3001"
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- breakpilot-network
|
||||
|
||||
website:
|
||||
build:
|
||||
context: ./website
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
NEXT_PUBLIC_BILLING_API_URL: ${NEXT_PUBLIC_BILLING_API_URL:-https://api-core.breakpilot.ai}
|
||||
NEXT_PUBLIC_APP_URL: ${NEXT_PUBLIC_APP_URL:-https://app.breakpilot.ai}
|
||||
NEXT_PUBLIC_KLAUSUR_SERVICE_URL: ${NEXT_PUBLIC_KLAUSUR_SERVICE_URL:-https://klausur.breakpilot.ai}
|
||||
NEXT_PUBLIC_VOICE_SERVICE_URL: ${NEXT_PUBLIC_VOICE_SERVICE_URL:-wss://voice.breakpilot.ai}
|
||||
NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY: ${NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY:-}
|
||||
container_name: bp-lehrer-website
|
||||
expose:
|
||||
- "3000"
|
||||
environment:
|
||||
NODE_ENV: production
|
||||
VAST_API_KEY: ${VAST_API_KEY:-}
|
||||
CONTROL_API_KEY: ${CONTROL_API_KEY:-}
|
||||
BACKEND_URL: http://backend-lehrer:8001
|
||||
CONSENT_SERVICE_URL: http://bp-core-consent-service:8081
|
||||
EDU_SEARCH_URL: ${EDU_SEARCH_URL:-}
|
||||
EDU_SEARCH_API_KEY: ${EDU_SEARCH_API_KEY:-}
|
||||
depends_on:
|
||||
- backend-lehrer
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.website.rule=Host(`www.breakpilot.ai`)"
|
||||
- "traefik.http.routers.website.entrypoints=https"
|
||||
- "traefik.http.routers.website.tls=true"
|
||||
- "traefik.http.routers.website.tls.certresolver=letsencrypt"
|
||||
- "traefik.http.services.website.loadbalancer.server.port=3000"
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- breakpilot-network
|
||||
|
||||
# =========================================================
|
||||
# BACKEND
|
||||
# =========================================================
|
||||
backend-lehrer:
|
||||
build:
|
||||
context: ./backend-lehrer
|
||||
dockerfile: Dockerfile
|
||||
container_name: bp-lehrer-backend
|
||||
user: "0:0"
|
||||
expose:
|
||||
- "8001"
|
||||
volumes:
|
||||
- lehrer_backend_data:/app/data
|
||||
environment:
|
||||
PORT: 8001
|
||||
DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT:-5432}/${POSTGRES_DB}?options=-csearch_path%3Dlehrer,core,public
|
||||
JWT_SECRET: ${JWT_SECRET}
|
||||
ENVIRONMENT: production
|
||||
CONSENT_SERVICE_URL: http://bp-core-consent-service:8081
|
||||
KLAUSUR_SERVICE_URL: http://klausur-service:8086
|
||||
TROCR_SERVICE_URL: ${TROCR_SERVICE_URL:-}
|
||||
CAMUNDA_URL: ${CAMUNDA_URL:-}
|
||||
VALKEY_URL: redis://bp-core-valkey:6379/0
|
||||
SESSION_TTL_HOURS: ${SESSION_TTL_HOURS:-24}
|
||||
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
|
||||
DEBUG: "false"
|
||||
ALERTS_AGENT_ENABLED: ${ALERTS_AGENT_ENABLED:-false}
|
||||
VAST_API_KEY: ${VAST_API_KEY:-}
|
||||
VAST_INSTANCE_ID: ${VAST_INSTANCE_ID:-}
|
||||
CONTROL_API_KEY: ${CONTROL_API_KEY:-}
|
||||
OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-}
|
||||
OLLAMA_ENABLED: ${OLLAMA_ENABLED:-false}
|
||||
OLLAMA_DEFAULT_MODEL: ${OLLAMA_DEFAULT_MODEL:-}
|
||||
OLLAMA_VISION_MODEL: ${OLLAMA_VISION_MODEL:-}
|
||||
OLLAMA_CORRECTION_MODEL: ${OLLAMA_CORRECTION_MODEL:-}
|
||||
OLLAMA_TIMEOUT: ${OLLAMA_TIMEOUT:-120}
|
||||
GAME_USE_DATABASE: ${GAME_USE_DATABASE:-true}
|
||||
GAME_REQUIRE_AUTH: ${GAME_REQUIRE_AUTH:-true}
|
||||
GAME_REQUIRE_BILLING: ${GAME_REQUIRE_BILLING:-true}
|
||||
GAME_LLM_MODEL: ${GAME_LLM_MODEL:-}
|
||||
SMTP_HOST: ${SMTP_HOST}
|
||||
SMTP_PORT: ${SMTP_PORT:-587}
|
||||
SMTP_USERNAME: ${SMTP_USERNAME}
|
||||
SMTP_PASSWORD: ${SMTP_PASSWORD}
|
||||
SMTP_FROM_NAME: ${SMTP_FROM_NAME:-BreakPilot}
|
||||
SMTP_FROM_ADDR: ${SMTP_FROM_ADDR:-noreply@breakpilot.ai}
|
||||
RAG_SERVICE_URL: http://bp-core-rag-service:8097
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.backend-lehrer.rule=Host(`api-lehrer.breakpilot.ai`)"
|
||||
- "traefik.http.routers.backend-lehrer.entrypoints=https"
|
||||
- "traefik.http.routers.backend-lehrer.tls=true"
|
||||
- "traefik.http.routers.backend-lehrer.tls.certresolver=letsencrypt"
|
||||
- "traefik.http.services.backend-lehrer.loadbalancer.server.port=8001"
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- breakpilot-network
|
||||
|
||||
# =========================================================
|
||||
# MICROSERVICES
|
||||
# =========================================================
|
||||
klausur-service:
|
||||
build:
|
||||
context: ./klausur-service
|
||||
dockerfile: Dockerfile
|
||||
container_name: bp-lehrer-klausur-service
|
||||
expose:
|
||||
- "8086"
|
||||
volumes:
|
||||
- klausur_uploads:/app/uploads
|
||||
- eh_uploads:/app/eh-uploads
|
||||
- ocr_labeling:/app/ocr-labeling
|
||||
- paddle_models:/root/.paddlex
|
||||
environment:
|
||||
JWT_SECRET: ${JWT_SECRET}
|
||||
BACKEND_URL: http://backend-lehrer:8001
|
||||
SCHOOL_SERVICE_URL: http://school-service:8084
|
||||
ENVIRONMENT: production
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT:-5432}/${POSTGRES_DB}
|
||||
EMBEDDING_SERVICE_URL: http://bp-core-embedding-service:8087
|
||||
QDRANT_URL: ${QDRANT_URL}
|
||||
MINIO_ENDPOINT: ${S3_ENDPOINT}
|
||||
MINIO_ACCESS_KEY: ${S3_ACCESS_KEY}
|
||||
MINIO_SECRET_KEY: ${S3_SECRET_KEY}
|
||||
MINIO_BUCKET: ${S3_BUCKET:-breakpilot-rag}
|
||||
MINIO_SECURE: ${S3_SECURE:-true}
|
||||
PADDLEOCR_SERVICE_URL: ${PADDLEOCR_SERVICE_URL:-}
|
||||
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
|
||||
OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-}
|
||||
OLLAMA_ENABLED: ${OLLAMA_ENABLED:-false}
|
||||
OLLAMA_DEFAULT_MODEL: ${OLLAMA_DEFAULT_MODEL:-}
|
||||
OLLAMA_VISION_MODEL: ${OLLAMA_VISION_MODEL:-}
|
||||
OLLAMA_CORRECTION_MODEL: ${OLLAMA_CORRECTION_MODEL:-}
|
||||
RAG_SERVICE_URL: http://bp-core-rag-service:8097
|
||||
depends_on:
|
||||
school-service:
|
||||
condition: service_started
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://127.0.0.1:8086/health"]
|
||||
interval: 30s
|
||||
timeout: 30s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.klausur.rule=Host(`klausur.breakpilot.ai`)"
|
||||
- "traefik.http.routers.klausur.entrypoints=https"
|
||||
- "traefik.http.routers.klausur.tls=true"
|
||||
- "traefik.http.routers.klausur.tls.certresolver=letsencrypt"
|
||||
- "traefik.http.services.klausur.loadbalancer.server.port=8086"
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- breakpilot-network
|
||||
|
||||
school-service:
|
||||
build:
|
||||
context: ./school-service
|
||||
dockerfile: Dockerfile
|
||||
container_name: bp-lehrer-school-service
|
||||
expose:
|
||||
- "8084"
|
||||
environment:
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT:-5432}/${POSTGRES_DB}
|
||||
JWT_SECRET: ${JWT_SECRET}
|
||||
PORT: 8084
|
||||
ENVIRONMENT: production
|
||||
ALLOWED_ORIGINS: "*"
|
||||
LLM_GATEWAY_URL: http://backend-lehrer:8001/llm
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- breakpilot-network
|
||||
|
||||
# =========================================================
|
||||
# EDU SEARCH
|
||||
# =========================================================
|
||||
opensearch:
|
||||
image: opensearchproject/opensearch:2.11.1
|
||||
container_name: bp-lehrer-opensearch
|
||||
environment:
|
||||
- cluster.name=edu-search-cluster
|
||||
- node.name=opensearch-node1
|
||||
- discovery.type=single-node
|
||||
- bootstrap.memory_lock=true
|
||||
- "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m"
|
||||
- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_PASSWORD:-Admin123!}
|
||||
- plugins.security.disabled=true
|
||||
ulimits:
|
||||
memlock:
|
||||
soft: -1
|
||||
hard: -1
|
||||
nofile:
|
||||
soft: 65536
|
||||
hard: 65536
|
||||
volumes:
|
||||
- opensearch_data:/usr/share/opensearch/data
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -s http://localhost:9200 >/dev/null || exit 1"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 5
|
||||
start_period: 60s
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- breakpilot-network
|
||||
|
||||
edu-search-service:
|
||||
build:
|
||||
context: ./edu-search-service
|
||||
dockerfile: Dockerfile
|
||||
container_name: bp-lehrer-edu-search
|
||||
expose:
|
||||
- "8088"
|
||||
environment:
|
||||
PORT: 8088
|
||||
OPENSEARCH_URL: http://opensearch:9200
|
||||
OPENSEARCH_USERNAME: admin
|
||||
OPENSEARCH_PASSWORD: ${OPENSEARCH_PASSWORD:-Admin123!}
|
||||
INDEX_NAME: bp_documents_v1
|
||||
EDU_SEARCH_API_KEY: ${EDU_SEARCH_API_KEY:-}
|
||||
USER_AGENT: "BreakpilotEduCrawler/1.0 (+contact: security@breakpilot.com)"
|
||||
RATE_LIMIT_PER_SEC: "0.2"
|
||||
MAX_DEPTH: "4"
|
||||
MAX_PAGES_PER_RUN: "500"
|
||||
DB_HOST: ${POSTGRES_HOST}
|
||||
DB_PORT: ${POSTGRES_PORT:-5432}
|
||||
DB_USER: ${POSTGRES_USER}
|
||||
DB_PASSWORD: ${POSTGRES_PASSWORD}
|
||||
DB_NAME: ${POSTGRES_DB}
|
||||
DB_SSLMODE: disable
|
||||
STAFF_CRAWLER_EMAIL: crawler@breakpilot.de
|
||||
depends_on:
|
||||
opensearch:
|
||||
condition: service_healthy
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8088/v1/health"]
|
||||
interval: 30s
|
||||
timeout: 3s
|
||||
start_period: 10s
|
||||
retries: 3
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- breakpilot-network
|
||||
@@ -15,24 +15,11 @@ volumes:
|
||||
eh_uploads:
|
||||
ocr_labeling:
|
||||
paddle_models:
|
||||
lighton_models:
|
||||
paddleocr_models:
|
||||
transcription_models:
|
||||
transcription_temp:
|
||||
lehrer_backend_data:
|
||||
opensearch_data:
|
||||
# Communication (Jitsi + Matrix)
|
||||
synapse_data:
|
||||
synapse_db_data:
|
||||
jitsi_web_config:
|
||||
jitsi_web_crontabs:
|
||||
jitsi_transcripts:
|
||||
jitsi_prosody_config:
|
||||
jitsi_prosody_plugins:
|
||||
jitsi_jicofo_config:
|
||||
jitsi_jvb_config:
|
||||
# Voice
|
||||
voice_session_data:
|
||||
|
||||
services:
|
||||
|
||||
@@ -167,6 +154,7 @@ services:
|
||||
CONSENT_SERVICE_URL: http://bp-core-consent-service:8081
|
||||
KLAUSUR_SERVICE_URL: http://klausur-service:8086
|
||||
TROCR_SERVICE_URL: http://paddleocr-service:8095
|
||||
CAMUNDA_URL: http://bp-core-camunda:8080
|
||||
VALKEY_URL: redis://bp-core-valkey:6379/0
|
||||
SESSION_TTL_HOURS: ${SESSION_TTL_HOURS:-24}
|
||||
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
|
||||
@@ -221,7 +209,6 @@ services:
|
||||
- eh_uploads:/app/eh-uploads
|
||||
- ocr_labeling:/app/ocr-labeling
|
||||
- paddle_models:/root/.paddlex
|
||||
- lighton_models:/root/.cache/huggingface
|
||||
environment:
|
||||
JWT_SECRET: ${JWT_SECRET:-your-super-secret-jwt-key-change-in-production}
|
||||
BACKEND_URL: http://backend-lehrer:8001
|
||||
@@ -236,8 +223,6 @@ services:
|
||||
MINIO_BUCKET: ${MINIO_BUCKET:-breakpilot-rag}
|
||||
MINIO_SECURE: "false"
|
||||
PADDLEOCR_SERVICE_URL: http://paddleocr-service:8095
|
||||
PADDLEOCR_REMOTE_URL: ${PADDLEOCR_REMOTE_URL:-https://hetzner.meghsakha.com:8095}
|
||||
PADDLEOCR_API_KEY: ${PADDLEOCR_API_KEY:-}
|
||||
VAULT_ADDR: http://bp-core-vault:8200
|
||||
VAULT_TOKEN: ${VAULT_TOKEN:-breakpilot-dev-token}
|
||||
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
|
||||
@@ -246,12 +231,6 @@ services:
|
||||
OLLAMA_DEFAULT_MODEL: ${OLLAMA_DEFAULT_MODEL:-llama3.2}
|
||||
OLLAMA_VISION_MODEL: ${OLLAMA_VISION_MODEL:-llama3.2-vision}
|
||||
OLLAMA_CORRECTION_MODEL: ${OLLAMA_CORRECTION_MODEL:-llama3.2}
|
||||
OLLAMA_REVIEW_MODEL: ${OLLAMA_REVIEW_MODEL:-qwen3:0.6b}
|
||||
OLLAMA_REVIEW_BATCH_SIZE: ${OLLAMA_REVIEW_BATCH_SIZE:-20}
|
||||
REVIEW_ENGINE: ${REVIEW_ENGINE:-spell}
|
||||
OCR_ENGINE: ${OCR_ENGINE:-auto}
|
||||
OLLAMA_HTR_MODEL: ${OLLAMA_HTR_MODEL:-qwen2.5vl:32b}
|
||||
HTR_FALLBACK_MODEL: ${HTR_FALLBACK_MODEL:-trocr-large}
|
||||
RAG_SERVICE_URL: http://bp-core-rag-service:8097
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
@@ -394,216 +373,6 @@ services:
|
||||
networks:
|
||||
- breakpilot-network
|
||||
|
||||
# =========================================================
|
||||
# VOICE SERVICE
|
||||
# =========================================================
|
||||
voice-service:
|
||||
build:
|
||||
context: ./voice-service
|
||||
dockerfile: Dockerfile
|
||||
container_name: bp-lehrer-voice-service
|
||||
platform: linux/arm64
|
||||
expose:
|
||||
- "8091"
|
||||
volumes:
|
||||
- voice_session_data:/app/data/sessions
|
||||
environment:
|
||||
PORT: 8091
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER:-breakpilot}:${POSTGRES_PASSWORD:-breakpilot123}@bp-core-postgres:5432/${POSTGRES_DB:-breakpilot_db}
|
||||
VALKEY_URL: redis://bp-core-valkey:6379/0
|
||||
KLAUSUR_SERVICE_URL: http://klausur-service:8086
|
||||
OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-http://host.docker.internal:11434}
|
||||
OLLAMA_VOICE_MODEL: ${OLLAMA_VOICE_MODEL:-llama3.2}
|
||||
ENVIRONMENT: ${ENVIRONMENT:-development}
|
||||
JWT_SECRET: ${JWT_SECRET:-your-super-secret-jwt-key-change-in-production}
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
depends_on:
|
||||
core-health-check:
|
||||
condition: service_completed_successfully
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://127.0.0.1:8091/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
start_period: 60s
|
||||
retries: 3
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- breakpilot-network
|
||||
|
||||
# =========================================================
|
||||
# COMMUNICATION: Jitsi Meet
|
||||
# =========================================================
|
||||
jitsi-web:
|
||||
image: jitsi/web:stable-9823
|
||||
container_name: bp-lehrer-jitsi-web
|
||||
expose:
|
||||
- "80"
|
||||
volumes:
|
||||
- jitsi_web_config:/config
|
||||
- jitsi_web_crontabs:/var/spool/cron/crontabs
|
||||
- jitsi_transcripts:/usr/share/jitsi-meet/transcripts
|
||||
environment:
|
||||
ENABLE_XMPP_WEBSOCKET: "true"
|
||||
ENABLE_COLIBRI_WEBSOCKET: "true"
|
||||
XMPP_DOMAIN: ${XMPP_DOMAIN:-meet.jitsi}
|
||||
XMPP_BOSH_URL_BASE: http://jitsi-xmpp:5280
|
||||
XMPP_MUC_DOMAIN: ${XMPP_MUC_DOMAIN:-muc.meet.jitsi}
|
||||
XMPP_GUEST_DOMAIN: ${XMPP_GUEST_DOMAIN:-guest.meet.jitsi}
|
||||
TZ: ${TZ:-Europe/Berlin}
|
||||
PUBLIC_URL: ${JITSI_PUBLIC_URL:-https://macmini:8443}
|
||||
JICOFO_AUTH_USER: focus
|
||||
ENABLE_AUTH: ${JITSI_ENABLE_AUTH:-false}
|
||||
ENABLE_GUESTS: "true"
|
||||
ENABLE_RECORDING: "true"
|
||||
ENABLE_LIVESTREAMING: "false"
|
||||
DISABLE_HTTPS: "true"
|
||||
APP_NAME: "BreakPilot Meet"
|
||||
NATIVE_APP_NAME: "BreakPilot Meet"
|
||||
PROVIDER_NAME: "BreakPilot"
|
||||
depends_on:
|
||||
- jitsi-xmpp
|
||||
networks:
|
||||
breakpilot-network:
|
||||
aliases:
|
||||
- meet.jitsi
|
||||
|
||||
jitsi-xmpp:
|
||||
image: jitsi/prosody:stable-9823
|
||||
container_name: bp-lehrer-jitsi-xmpp
|
||||
volumes:
|
||||
- jitsi_prosody_config:/config
|
||||
- jitsi_prosody_plugins:/prosody-plugins-custom
|
||||
environment:
|
||||
XMPP_DOMAIN: ${XMPP_DOMAIN:-meet.jitsi}
|
||||
XMPP_AUTH_DOMAIN: ${XMPP_AUTH_DOMAIN:-auth.meet.jitsi}
|
||||
XMPP_MUC_DOMAIN: ${XMPP_MUC_DOMAIN:-muc.meet.jitsi}
|
||||
XMPP_INTERNAL_MUC_DOMAIN: ${XMPP_INTERNAL_MUC_DOMAIN:-internal-muc.meet.jitsi}
|
||||
XMPP_GUEST_DOMAIN: ${XMPP_GUEST_DOMAIN:-guest.meet.jitsi}
|
||||
XMPP_RECORDER_DOMAIN: ${XMPP_RECORDER_DOMAIN:-recorder.meet.jitsi}
|
||||
XMPP_CROSS_DOMAIN: "true"
|
||||
TZ: ${TZ:-Europe/Berlin}
|
||||
JICOFO_AUTH_USER: focus
|
||||
JICOFO_AUTH_PASSWORD: ${JICOFO_AUTH_PASSWORD:-jicofo_secret}
|
||||
JVB_AUTH_USER: jvb
|
||||
JVB_AUTH_PASSWORD: ${JVB_AUTH_PASSWORD:-jvb_secret}
|
||||
JIBRI_XMPP_USER: jibri
|
||||
JIBRI_XMPP_PASSWORD: ${JIBRI_XMPP_PASSWORD:-jibri_secret}
|
||||
JIBRI_RECORDER_USER: recorder
|
||||
JIBRI_RECORDER_PASSWORD: ${JIBRI_RECORDER_PASSWORD:-recorder_secret}
|
||||
LOG_LEVEL: ${XMPP_LOG_LEVEL:-warn}
|
||||
PUBLIC_URL: ${JITSI_PUBLIC_URL:-https://macmini:8443}
|
||||
ENABLE_AUTH: ${JITSI_ENABLE_AUTH:-false}
|
||||
ENABLE_GUESTS: "true"
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
breakpilot-network:
|
||||
aliases:
|
||||
- xmpp.meet.jitsi
|
||||
|
||||
jitsi-jicofo:
|
||||
image: jitsi/jicofo:stable-9823
|
||||
container_name: bp-lehrer-jitsi-jicofo
|
||||
volumes:
|
||||
- jitsi_jicofo_config:/config
|
||||
environment:
|
||||
XMPP_DOMAIN: ${XMPP_DOMAIN:-meet.jitsi}
|
||||
XMPP_AUTH_DOMAIN: ${XMPP_AUTH_DOMAIN:-auth.meet.jitsi}
|
||||
XMPP_MUC_DOMAIN: ${XMPP_MUC_DOMAIN:-muc.meet.jitsi}
|
||||
XMPP_INTERNAL_MUC_DOMAIN: ${XMPP_INTERNAL_MUC_DOMAIN:-internal-muc.meet.jitsi}
|
||||
XMPP_SERVER: jitsi-xmpp
|
||||
JICOFO_AUTH_USER: focus
|
||||
JICOFO_AUTH_PASSWORD: ${JICOFO_AUTH_PASSWORD:-jicofo_secret}
|
||||
TZ: ${TZ:-Europe/Berlin}
|
||||
ENABLE_AUTH: ${JITSI_ENABLE_AUTH:-false}
|
||||
AUTH_TYPE: internal
|
||||
ENABLE_AUTO_OWNER: "true"
|
||||
depends_on:
|
||||
- jitsi-xmpp
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- breakpilot-network
|
||||
|
||||
jitsi-jvb:
|
||||
image: jitsi/jvb:stable-9823
|
||||
container_name: bp-lehrer-jitsi-jvb
|
||||
ports:
|
||||
- "10000:10000/udp"
|
||||
- "8080:8080"
|
||||
volumes:
|
||||
- jitsi_jvb_config:/config
|
||||
environment:
|
||||
XMPP_DOMAIN: ${XMPP_DOMAIN:-meet.jitsi}
|
||||
XMPP_AUTH_DOMAIN: ${XMPP_AUTH_DOMAIN:-auth.meet.jitsi}
|
||||
XMPP_INTERNAL_MUC_DOMAIN: ${XMPP_INTERNAL_MUC_DOMAIN:-internal-muc.meet.jitsi}
|
||||
XMPP_SERVER: jitsi-xmpp
|
||||
JVB_AUTH_USER: jvb
|
||||
JVB_AUTH_PASSWORD: ${JVB_AUTH_PASSWORD:-jvb_secret}
|
||||
JVB_PORT: 10000
|
||||
JVB_STUN_SERVERS: ${JVB_STUN_SERVERS:-stun.l.google.com:19302}
|
||||
TZ: ${TZ:-Europe/Berlin}
|
||||
PUBLIC_URL: ${JITSI_PUBLIC_URL:-https://macmini:8443}
|
||||
COLIBRI_REST_ENABLED: "true"
|
||||
ENABLE_COLIBRI_WEBSOCKET: "true"
|
||||
depends_on:
|
||||
- jitsi-xmpp
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- breakpilot-network
|
||||
|
||||
# =========================================================
|
||||
# COMMUNICATION: Matrix/Synapse
|
||||
# =========================================================
|
||||
synapse-db:
|
||||
image: postgres:16-alpine
|
||||
container_name: bp-lehrer-synapse-db
|
||||
profiles: [chat]
|
||||
environment:
|
||||
POSTGRES_USER: synapse
|
||||
POSTGRES_PASSWORD: ${SYNAPSE_DB_PASSWORD:-synapse_secret}
|
||||
POSTGRES_DB: synapse
|
||||
POSTGRES_INITDB_ARGS: "--encoding=UTF-8 --lc-collate=C --lc-ctype=C"
|
||||
volumes:
|
||||
- synapse_db_data:/var/lib/postgresql/data
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U synapse"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- breakpilot-network
|
||||
|
||||
synapse:
|
||||
image: matrixdotorg/synapse:latest
|
||||
container_name: bp-lehrer-synapse
|
||||
profiles: [chat]
|
||||
ports:
|
||||
- "8008:8008"
|
||||
- "8448:8448"
|
||||
volumes:
|
||||
- synapse_data:/data
|
||||
environment:
|
||||
SYNAPSE_SERVER_NAME: ${SYNAPSE_SERVER_NAME:-macmini}
|
||||
SYNAPSE_REPORT_STATS: "no"
|
||||
SYNAPSE_NO_TLS: "true"
|
||||
SYNAPSE_ENABLE_REGISTRATION: ${SYNAPSE_ENABLE_REGISTRATION:-true}
|
||||
SYNAPSE_LOG_LEVEL: ${SYNAPSE_LOG_LEVEL:-WARNING}
|
||||
UID: "1000"
|
||||
GID: "1000"
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://127.0.0.1:8008/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
start_period: 30s
|
||||
retries: 3
|
||||
depends_on:
|
||||
synapse-db:
|
||||
condition: service_healthy
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- breakpilot-network
|
||||
|
||||
# =========================================================
|
||||
# EDU SEARCH
|
||||
# =========================================================
|
||||
|
||||
@@ -1,114 +0,0 @@
|
||||
# Chunk-Browser
|
||||
|
||||
## Uebersicht
|
||||
|
||||
Der Chunk-Browser ermoeglicht das sequenzielle Durchblaettern aller Chunks in einer Qdrant-Collection. Er ist als Tab "Chunk-Browser" auf der RAG-Seite (`/ai/rag`) verfuegbar.
|
||||
|
||||
**URL:** `https://macmini:3002/ai/rag` → Tab "Chunk-Browser"
|
||||
|
||||
---
|
||||
|
||||
## Funktionen
|
||||
|
||||
### Collection-Auswahl
|
||||
Dropdown mit allen verfuegbaren Compliance-Collections:
|
||||
|
||||
- `bp_compliance_gesetze`
|
||||
- `bp_compliance_ce`
|
||||
- `bp_compliance_datenschutz`
|
||||
- `bp_dsfa_corpus`
|
||||
- `bp_compliance_recht`
|
||||
- `bp_legal_templates`
|
||||
- `bp_compliance_gdpr`
|
||||
- `bp_compliance_schulrecht`
|
||||
- `bp_dsfa_templates`
|
||||
- `bp_dsfa_risks`
|
||||
|
||||
### Seitenweise Navigation
|
||||
- 20 Chunks pro Seite
|
||||
- Zurueck/Weiter-Buttons
|
||||
- Seitennummer und Chunk-Zaehler
|
||||
- Cursor-basierte Pagination via Qdrant Scroll API
|
||||
|
||||
### Textsuche
|
||||
- Filtert Chunks auf der aktuell geladenen Seite
|
||||
- Treffer werden gelb hervorgehoben
|
||||
- Suche ueber den Chunk-Text (payload.text, payload.content, payload.chunk_text)
|
||||
|
||||
### Chunk-Details
|
||||
- Klick auf einen Chunk klappt alle Metadaten aus
|
||||
- Zeigt: regulation_code, article, language, source, licence, etc.
|
||||
- Chunks haben eine fortlaufende Nummer (#1, #2, ...)
|
||||
|
||||
### Integration mit Regulierungen-Tab
|
||||
Der Button "In Chunks suchen" bei jeder Regulierung wechselt zum Chunk-Browser mit:
|
||||
- Vorauswahl der richtigen Collection
|
||||
- Vorausgefuelltem Suchbegriff (Regulierungsname)
|
||||
|
||||
---
|
||||
|
||||
## API
|
||||
|
||||
### Scroll-Endpoint (API Proxy)
|
||||
|
||||
```
|
||||
GET /api/legal-corpus?action=scroll&collection=bp_compliance_ce&limit=20&offset={cursor}
|
||||
```
|
||||
|
||||
**Parameter:**
|
||||
|
||||
| Parameter | Typ | Beschreibung |
|
||||
|-----------|-----|--------------|
|
||||
| `collection` | string | Qdrant Collection Name |
|
||||
| `limit` | number | Chunks pro Seite (max 100) |
|
||||
| `offset` | string | Cursor fuer naechste Seite (optional) |
|
||||
| `text_search` | string | Textsuche-Filter (optional) |
|
||||
|
||||
**Response:**
|
||||
```json
|
||||
{
|
||||
"chunks": [
|
||||
{
|
||||
"id": "uuid",
|
||||
"text": "...",
|
||||
"regulation_code": "GDPR",
|
||||
"article": "Art. 5",
|
||||
"language": "de"
|
||||
}
|
||||
],
|
||||
"next_offset": "uuid-or-null",
|
||||
"total_in_page": 20
|
||||
}
|
||||
```
|
||||
|
||||
### Collection-Count-Endpoint
|
||||
|
||||
```
|
||||
GET /api/legal-corpus?action=collection-count&collection=bp_compliance_ce
|
||||
```
|
||||
|
||||
**Response:**
|
||||
```json
|
||||
{
|
||||
"count": 12345
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Technische Details
|
||||
|
||||
- Der API-Proxy spricht direkt mit Qdrant (Port 6333) via dessen `POST /collections/{name}/points/scroll` Endpoint
|
||||
- Kein Embedding oder rag-service erforderlich
|
||||
- Textsuche ist client-seitig (kein Embedding noetig)
|
||||
- Pagination ist cursor-basiert (Qdrant `next_page_offset`)
|
||||
|
||||
---
|
||||
|
||||
## Weitere Features auf der RAG-Seite
|
||||
|
||||
### Originalquelle-Links
|
||||
Jede Regulierung in der Tabelle hat einen "Originalquelle" Link zum offiziellen Dokument (EUR-Lex, gesetze-im-internet.de, etc.). Definiert in `REGULATION_SOURCES` (88 Eintraege).
|
||||
|
||||
### Low-Chunk-Warnung
|
||||
Regulierungen mit weniger als 10 Chunks aber einem erwarteten Wert >= 10 werden mit einem Amber-Warnsymbol markiert. Dies hilft, fehlgeschlagene oder unvollstaendige Ingestions zu erkennen.
|
||||
File diff suppressed because it is too large
Load Diff
@@ -8,15 +8,24 @@ RUN npm install
|
||||
COPY frontend/ ./
|
||||
RUN npm run build
|
||||
|
||||
# Production stage — uses pre-built base with Tesseract + Python deps.
|
||||
# Base image contains: python:3.11-slim + tesseract-ocr + all pip packages.
|
||||
# Rebuild base only when requirements.txt or system deps change:
|
||||
# docker build -f klausur-service/Dockerfile.base -t klausur-base:latest klausur-service/
|
||||
FROM klausur-base:latest
|
||||
# Production stage
|
||||
FROM python:3.11-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy backend code (this is the only layer that changes on code edits)
|
||||
# Install system dependencies (incl. Tesseract OCR for bounding-box extraction)
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
curl \
|
||||
tesseract-ocr \
|
||||
tesseract-ocr-deu \
|
||||
tesseract-ocr-eng \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Python dependencies
|
||||
COPY backend/requirements.txt ./
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy backend code
|
||||
COPY backend/ ./
|
||||
|
||||
# Copy built frontend to the expected path
|
||||
|
||||
@@ -1,27 +0,0 @@
|
||||
# Base image with system dependencies + Python packages.
|
||||
# These change rarely — build once, reuse on every --no-cache.
|
||||
#
|
||||
# Rebuild manually when requirements.txt or system deps change:
|
||||
# docker build -f klausur-service/Dockerfile.base -t klausur-base:latest klausur-service/
|
||||
#
|
||||
FROM python:3.11-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# System dependencies (Tesseract OCR, curl for healthcheck)
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
curl \
|
||||
tesseract-ocr \
|
||||
tesseract-ocr-deu \
|
||||
tesseract-ocr-eng \
|
||||
libgl1 \
|
||||
libglib2.0-0 \
|
||||
fonts-liberation \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Python dependencies
|
||||
COPY backend/requirements.txt ./
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Clean up pip cache
|
||||
RUN rm -rf /root/.cache/pip
|
||||
@@ -1,471 +0,0 @@
|
||||
"""
|
||||
Embedded box detection and page zone splitting for the CV vocabulary pipeline.
|
||||
|
||||
Detects boxes (grammar tips, exercises, etc.) that span the page width and
|
||||
interrupt the normal column layout. Splits the page into vertical zones so
|
||||
that column detection can run independently per zone.
|
||||
|
||||
Two-stage algorithm (both run, results merged):
|
||||
1. Morphological line detection — finds bordered boxes via horizontal lines.
|
||||
2. Background shading detection — finds shaded/colored boxes via median-blur
|
||||
background analysis. Works for colored (blue, green) and grayscale
|
||||
(gray shading on B/W scans) boxes.
|
||||
|
||||
Lizenz: Apache 2.0 (kommerziell nutzbar)
|
||||
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from cv_vocab_types import DetectedBox, PageZone
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
__all__ = [
|
||||
"detect_boxes",
|
||||
"split_page_into_zones",
|
||||
]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stage 1: Morphological line detection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _detect_boxes_by_lines(
|
||||
gray: np.ndarray,
|
||||
content_x: int,
|
||||
content_w: int,
|
||||
content_y: int,
|
||||
content_h: int,
|
||||
) -> List[DetectedBox]:
|
||||
"""Find boxes defined by pairs of long horizontal border lines.
|
||||
|
||||
Args:
|
||||
gray: Grayscale image (full page).
|
||||
content_x, content_w: Horizontal content bounds.
|
||||
content_y, content_h: Vertical content bounds.
|
||||
|
||||
Returns:
|
||||
List of DetectedBox for each detected bordered box.
|
||||
"""
|
||||
h, w = gray.shape[:2]
|
||||
|
||||
# Binarize: dark pixels → white on black background
|
||||
_, binary = cv2.threshold(gray, 180, 255, cv2.THRESH_BINARY_INV)
|
||||
|
||||
# Horizontal morphology kernel — at least 50% of content width
|
||||
kernel_w = max(50, content_w // 2)
|
||||
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_w, 1))
|
||||
lines_img = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
|
||||
|
||||
# Horizontal projection: count line pixels per row
|
||||
h_proj = np.sum(lines_img[:, content_x:content_x + content_w] > 0, axis=1)
|
||||
line_threshold = content_w * 0.30
|
||||
|
||||
# Group consecutive rows with enough line pixels into line segments
|
||||
line_segments: List[Tuple[int, int]] = [] # (y_start, y_end)
|
||||
seg_start: Optional[int] = None
|
||||
for y in range(h):
|
||||
if h_proj[y] >= line_threshold:
|
||||
if seg_start is None:
|
||||
seg_start = y
|
||||
else:
|
||||
if seg_start is not None:
|
||||
line_segments.append((seg_start, y))
|
||||
seg_start = None
|
||||
if seg_start is not None:
|
||||
line_segments.append((seg_start, h))
|
||||
|
||||
if len(line_segments) < 2:
|
||||
return []
|
||||
|
||||
# Pair lines into boxes: top-line + bottom-line
|
||||
# Minimum box height: 30px. Maximum: 70% of content height.
|
||||
min_box_h = 30
|
||||
max_box_h = int(content_h * 0.70)
|
||||
|
||||
boxes: List[DetectedBox] = []
|
||||
used = set()
|
||||
for i, (top_start, top_end) in enumerate(line_segments):
|
||||
if i in used:
|
||||
continue
|
||||
for j in range(i + 1, len(line_segments)):
|
||||
if j in used:
|
||||
continue
|
||||
bot_start, bot_end = line_segments[j]
|
||||
box_y = top_start
|
||||
box_h = bot_end - top_start
|
||||
if box_h < min_box_h or box_h > max_box_h:
|
||||
continue
|
||||
|
||||
# Estimate border thickness from line segment heights
|
||||
border_top = top_end - top_start
|
||||
border_bot = bot_end - bot_start
|
||||
|
||||
box = DetectedBox(
|
||||
x=content_x,
|
||||
y=box_y,
|
||||
width=content_w,
|
||||
height=box_h,
|
||||
confidence=0.8,
|
||||
border_thickness=max(border_top, border_bot),
|
||||
)
|
||||
boxes.append(box)
|
||||
used.add(i)
|
||||
used.add(j)
|
||||
break # move to next top-line candidate
|
||||
|
||||
return boxes
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stage 2: Background shading detection (color + grayscale)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _detect_boxes_by_shading(
|
||||
img_bgr: np.ndarray,
|
||||
content_x: int,
|
||||
content_w: int,
|
||||
content_y: int,
|
||||
content_h: int,
|
||||
) -> List[DetectedBox]:
|
||||
"""Find boxes with shaded/colored background (no visible border lines).
|
||||
|
||||
Uses heavy median blur to remove text and reveal the underlying background.
|
||||
Then detects rectangular regions where the background differs from white.
|
||||
Works for both colored boxes (blue, green) and grayscale shading (gray on
|
||||
B/W scans).
|
||||
|
||||
Args:
|
||||
img_bgr: BGR color image (full page).
|
||||
content_x, content_w: Horizontal content bounds.
|
||||
content_y, content_h: Vertical content bounds.
|
||||
|
||||
Returns:
|
||||
List of DetectedBox for each detected shaded box.
|
||||
"""
|
||||
h, w = img_bgr.shape[:2]
|
||||
|
||||
# --- Heavy median blur removes text strokes, keeps background ---
|
||||
blur_size = 31 # large kernel to wipe out text
|
||||
blurred = cv2.medianBlur(img_bgr, blur_size)
|
||||
blur_gray = cv2.cvtColor(blurred, cv2.COLOR_BGR2GRAY)
|
||||
blur_hsv = cv2.cvtColor(blurred, cv2.COLOR_BGR2HSV)
|
||||
|
||||
# Estimate page background from top-left / top-right corners
|
||||
corner_size = max(20, min(h // 10, w // 10))
|
||||
corners = np.concatenate([
|
||||
blur_gray[:corner_size, :corner_size].ravel(),
|
||||
blur_gray[:corner_size, -corner_size:].ravel(),
|
||||
])
|
||||
page_bg = float(np.median(corners))
|
||||
|
||||
# Two masks: grayscale shading + color saturation
|
||||
# Grayscale: regions noticeably darker than the page background
|
||||
shade_thresh = max(page_bg - 30, 150)
|
||||
gray_mask = (blur_gray < shade_thresh).astype(np.uint8) * 255
|
||||
|
||||
# Color: regions with noticeable saturation (blue/green/etc. boxes)
|
||||
sat_mask = (blur_hsv[:, :, 1] > 20).astype(np.uint8) * 255
|
||||
|
||||
combined = cv2.bitwise_or(gray_mask, sat_mask)
|
||||
|
||||
# Morphological cleanup: close gaps, remove small noise
|
||||
kernel_close = cv2.getStructuringElement(cv2.MORPH_RECT, (25, 10))
|
||||
combined = cv2.morphologyEx(combined, cv2.MORPH_CLOSE, kernel_close)
|
||||
kernel_open = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 5))
|
||||
combined = cv2.morphologyEx(combined, cv2.MORPH_OPEN, kernel_open)
|
||||
|
||||
contours, _ = cv2.findContours(combined, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
# Size thresholds: smaller boxes allowed (e.g. "German leihen" ~30% width)
|
||||
min_area = content_w * 30 # at least 30px tall at full width
|
||||
min_box_h = 25
|
||||
max_box_h = int(content_h * 0.70)
|
||||
min_width_ratio = 0.25 # boxes can be ~25% of content width
|
||||
|
||||
boxes: List[DetectedBox] = []
|
||||
for cnt in contours:
|
||||
area = cv2.contourArea(cnt)
|
||||
if area < min_area:
|
||||
continue
|
||||
|
||||
bx, by, bw, bh = cv2.boundingRect(cnt)
|
||||
|
||||
# Width filter
|
||||
if bw < content_w * min_width_ratio:
|
||||
continue
|
||||
|
||||
# Height filter
|
||||
if bh < min_box_h or bh > max_box_h:
|
||||
continue
|
||||
|
||||
# Rectangularity check: area / bounding-rect area > 0.6
|
||||
rect_area = bw * bh
|
||||
if rect_area > 0 and area / rect_area < 0.5:
|
||||
continue
|
||||
|
||||
# Verify that the background inside this region is actually shaded
|
||||
roi_gray = blur_gray[by:by + bh, bx:bx + bw]
|
||||
roi_hsv = blur_hsv[by:by + bh, bx:bx + bw]
|
||||
if roi_gray.size == 0:
|
||||
continue
|
||||
|
||||
median_val = float(np.median(roi_gray))
|
||||
median_sat = float(np.median(roi_hsv[:, :, 1]))
|
||||
|
||||
# Must be noticeably different from page background
|
||||
is_shaded = median_val < (page_bg - 15)
|
||||
is_colored = median_sat > 15
|
||||
|
||||
if not is_shaded and not is_colored:
|
||||
continue
|
||||
|
||||
conf = 0.7 if is_colored else 0.6
|
||||
|
||||
boxes.append(DetectedBox(
|
||||
x=bx,
|
||||
y=by,
|
||||
width=bw,
|
||||
height=bh,
|
||||
confidence=conf,
|
||||
border_thickness=0,
|
||||
))
|
||||
|
||||
return boxes
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Validation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _validate_box(
|
||||
box: DetectedBox,
|
||||
gray: np.ndarray,
|
||||
content_w: int,
|
||||
content_h: int,
|
||||
median_row_gap: int,
|
||||
) -> bool:
|
||||
"""Validate that a detected box is genuine (not a table-row separator etc.)."""
|
||||
# Must span > 25% of content width (lowered from 60% to allow smaller boxes)
|
||||
if box.width < content_w * 0.25:
|
||||
return False
|
||||
|
||||
# Height constraints
|
||||
if box.height < 25 or box.height > content_h * 0.70:
|
||||
return False
|
||||
|
||||
# Must not be confused with a table-row separator:
|
||||
# real boxes are at least 3x the median row gap
|
||||
if median_row_gap > 0 and box.height < median_row_gap * 3:
|
||||
return False
|
||||
|
||||
# Must contain some text (ink density check)
|
||||
h, w = gray.shape[:2]
|
||||
y1 = max(0, box.y)
|
||||
y2 = min(h, box.y + box.height)
|
||||
x1 = max(0, box.x)
|
||||
x2 = min(w, box.x + box.width)
|
||||
roi = gray[y1:y2, x1:x2]
|
||||
if roi.size == 0:
|
||||
return False
|
||||
ink_ratio = np.sum(roi < 128) / roi.size
|
||||
if ink_ratio < 0.002: # nearly empty → not a real content box
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API: detect_boxes
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _merge_overlapping_boxes(boxes: List[DetectedBox]) -> List[DetectedBox]:
|
||||
"""Merge boxes that overlap significantly (IoU > 0.3 or one contains the other).
|
||||
|
||||
When two boxes overlap, keep the one with higher confidence (or the larger
|
||||
one if confidences are equal).
|
||||
"""
|
||||
if len(boxes) <= 1:
|
||||
return boxes
|
||||
|
||||
# Sort by area descending so larger boxes are processed first
|
||||
boxes = sorted(boxes, key=lambda b: b.width * b.height, reverse=True)
|
||||
keep = [True] * len(boxes)
|
||||
|
||||
for i in range(len(boxes)):
|
||||
if not keep[i]:
|
||||
continue
|
||||
bi = boxes[i]
|
||||
for j in range(i + 1, len(boxes)):
|
||||
if not keep[j]:
|
||||
continue
|
||||
bj = boxes[j]
|
||||
|
||||
# Compute overlap
|
||||
x1 = max(bi.x, bj.x)
|
||||
y1 = max(bi.y, bj.y)
|
||||
x2 = min(bi.x + bi.width, bj.x + bj.width)
|
||||
y2 = min(bi.y + bi.height, bj.y + bj.height)
|
||||
|
||||
if x2 <= x1 or y2 <= y1:
|
||||
continue # no overlap
|
||||
|
||||
inter = (x2 - x1) * (y2 - y1)
|
||||
area_i = bi.width * bi.height
|
||||
area_j = bj.width * bj.height
|
||||
smaller_area = min(area_i, area_j)
|
||||
|
||||
# If overlap covers > 50% of the smaller box, merge (drop the weaker)
|
||||
if smaller_area > 0 and inter / smaller_area > 0.50:
|
||||
# Keep the one with higher confidence; if equal, keep larger
|
||||
if bj.confidence > bi.confidence:
|
||||
keep[i] = False
|
||||
break
|
||||
else:
|
||||
keep[j] = False
|
||||
|
||||
return [b for b, k in zip(boxes, keep) if k]
|
||||
|
||||
|
||||
def detect_boxes(
|
||||
img_bgr: np.ndarray,
|
||||
content_x: int,
|
||||
content_w: int,
|
||||
content_y: int,
|
||||
content_h: int,
|
||||
median_row_gap: int = 0,
|
||||
) -> List[DetectedBox]:
|
||||
"""Detect embedded boxes on a page image.
|
||||
|
||||
Runs BOTH line-based and shading-based detection, then merges and
|
||||
deduplicates results.
|
||||
|
||||
Args:
|
||||
img_bgr: BGR color image (full page or cropped).
|
||||
content_x, content_w: Horizontal content bounds.
|
||||
content_y, content_h: Vertical content bounds.
|
||||
median_row_gap: Median row gap height (for filtering out table separators).
|
||||
|
||||
Returns:
|
||||
List of validated DetectedBox instances, sorted by y position.
|
||||
"""
|
||||
gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
# Stage 1: Line-based detection (bordered boxes)
|
||||
line_boxes = _detect_boxes_by_lines(gray, content_x, content_w, content_y, content_h)
|
||||
|
||||
# Stage 2: Shading-based detection (colored/gray background boxes)
|
||||
shade_boxes = _detect_boxes_by_shading(img_bgr, content_x, content_w, content_y, content_h)
|
||||
|
||||
logger.debug("BoxDetect: %d line-based, %d shading-based candidates",
|
||||
len(line_boxes), len(shade_boxes))
|
||||
|
||||
# Combine and deduplicate
|
||||
all_boxes = line_boxes + shade_boxes
|
||||
merged = _merge_overlapping_boxes(all_boxes)
|
||||
|
||||
# Validate
|
||||
validated = [b for b in merged if _validate_box(b, gray, content_w, content_h, median_row_gap)]
|
||||
|
||||
# Sort top to bottom
|
||||
validated.sort(key=lambda b: b.y)
|
||||
|
||||
if validated:
|
||||
logger.info("BoxDetect: %d box(es) detected (line=%d, shade=%d, merged=%d)",
|
||||
len(validated), len(line_boxes), len(shade_boxes), len(merged))
|
||||
else:
|
||||
logger.debug("BoxDetect: no boxes detected")
|
||||
|
||||
return validated
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Zone Splitting
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def split_page_into_zones(
|
||||
content_x: int,
|
||||
content_y: int,
|
||||
content_w: int,
|
||||
content_h: int,
|
||||
boxes: List[DetectedBox],
|
||||
min_zone_height: int = 40,
|
||||
) -> List[PageZone]:
|
||||
"""Split a page into vertical zones based on detected boxes.
|
||||
|
||||
Regions above, between, and below boxes become 'content' zones;
|
||||
box regions become 'box' zones.
|
||||
|
||||
Args:
|
||||
content_x, content_y, content_w, content_h: Content area bounds.
|
||||
boxes: Detected boxes, sorted by y position.
|
||||
min_zone_height: Minimum height for a content zone to be kept.
|
||||
|
||||
Returns:
|
||||
List of PageZone, ordered top to bottom.
|
||||
"""
|
||||
if not boxes:
|
||||
# Single zone: entire content area
|
||||
return [PageZone(
|
||||
index=0,
|
||||
zone_type='content',
|
||||
y=content_y,
|
||||
height=content_h,
|
||||
x=content_x,
|
||||
width=content_w,
|
||||
)]
|
||||
|
||||
zones: List[PageZone] = []
|
||||
zone_idx = 0
|
||||
cursor_y = content_y
|
||||
content_bottom = content_y + content_h
|
||||
|
||||
for box in boxes:
|
||||
# Content zone above this box
|
||||
gap_above = box.y - cursor_y
|
||||
if gap_above >= min_zone_height:
|
||||
zones.append(PageZone(
|
||||
index=zone_idx,
|
||||
zone_type='content',
|
||||
y=cursor_y,
|
||||
height=gap_above,
|
||||
x=content_x,
|
||||
width=content_w,
|
||||
))
|
||||
zone_idx += 1
|
||||
|
||||
# Box zone
|
||||
zones.append(PageZone(
|
||||
index=zone_idx,
|
||||
zone_type='box',
|
||||
y=box.y,
|
||||
height=box.height,
|
||||
x=box.x,
|
||||
width=box.width,
|
||||
box=box,
|
||||
))
|
||||
zone_idx += 1
|
||||
|
||||
cursor_y = box.y + box.height
|
||||
|
||||
# Content zone below last box
|
||||
remaining = content_bottom - cursor_y
|
||||
if remaining >= min_zone_height:
|
||||
zones.append(PageZone(
|
||||
index=zone_idx,
|
||||
zone_type='content',
|
||||
y=cursor_y,
|
||||
height=remaining,
|
||||
x=content_x,
|
||||
width=content_w,
|
||||
))
|
||||
|
||||
logger.info(f"ZoneSplit: {len(zones)} zones from {len(boxes)} box(es): "
|
||||
f"{[z.zone_type for z in zones]}")
|
||||
|
||||
return zones
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,303 +0,0 @@
|
||||
"""
|
||||
Color detection for OCR word boxes.
|
||||
|
||||
Detects the text color of existing OCR words and recovers colored text
|
||||
regions (e.g. red markers, blue headings) that standard OCR may have missed.
|
||||
|
||||
Standard OCR (Tesseract, PaddleOCR) binarises images before processing,
|
||||
destroying all color information. This module adds it back by sampling
|
||||
HSV pixel values at word-box positions and finding colored regions that
|
||||
no word-box covers.
|
||||
|
||||
Lizenz: Apache 2.0 (kommerziell nutzbar)
|
||||
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# HSV color ranges (OpenCV: H 0-180, S 0-255, V 0-255)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_COLOR_RANGES: Dict[str, List[Tuple[np.ndarray, np.ndarray]]] = {
|
||||
"red": [
|
||||
(np.array([0, 70, 50]), np.array([10, 255, 255])),
|
||||
(np.array([170, 70, 50]), np.array([180, 255, 255])),
|
||||
],
|
||||
"orange": [
|
||||
(np.array([10, 70, 50]), np.array([25, 255, 255])),
|
||||
],
|
||||
"yellow": [
|
||||
(np.array([25, 70, 50]), np.array([35, 255, 255])),
|
||||
],
|
||||
"green": [
|
||||
(np.array([35, 70, 50]), np.array([85, 255, 255])),
|
||||
],
|
||||
"blue": [
|
||||
(np.array([100, 70, 50]), np.array([130, 255, 255])),
|
||||
],
|
||||
"purple": [
|
||||
(np.array([130, 70, 50]), np.array([170, 255, 255])),
|
||||
],
|
||||
}
|
||||
|
||||
_COLOR_HEX: Dict[str, str] = {
|
||||
"black": "#000000",
|
||||
"gray": "#6b7280",
|
||||
"red": "#dc2626",
|
||||
"orange": "#ea580c",
|
||||
"yellow": "#ca8a04",
|
||||
"green": "#16a34a",
|
||||
"blue": "#2563eb",
|
||||
"purple": "#9333ea",
|
||||
}
|
||||
|
||||
|
||||
def _hue_to_color_name(hue: float) -> str:
|
||||
"""Map OpenCV hue (0-180) to a color name."""
|
||||
if hue < 10 or hue > 170:
|
||||
return "red"
|
||||
if hue < 25:
|
||||
return "orange"
|
||||
if hue < 35:
|
||||
return "yellow"
|
||||
if hue < 85:
|
||||
return "green"
|
||||
if hue < 130:
|
||||
return "blue"
|
||||
return "purple"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 1. Color annotation for existing word boxes
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def detect_word_colors(
|
||||
img_bgr: np.ndarray,
|
||||
word_boxes: List[Dict],
|
||||
sat_threshold: int = 70,
|
||||
min_sat_ratio: float = 0.25,
|
||||
) -> None:
|
||||
"""Annotate each word_box in-place with its detected text color.
|
||||
|
||||
Adds ``color`` (hex string) and ``color_name`` (e.g. 'red', 'black')
|
||||
keys to each dict.
|
||||
|
||||
Algorithm per word:
|
||||
1. Crop the word region from the image.
|
||||
2. Otsu-threshold for text/background separation.
|
||||
3. Sample background color from border pixels of the crop.
|
||||
4. Remove text pixels that match the background (avoids colored
|
||||
backgrounds like blue boxes leaking into the result).
|
||||
5. Use **median** hue (robust to outliers) and require a minimum
|
||||
ratio of saturated pixels before classifying as colored.
|
||||
"""
|
||||
if img_bgr is None or not word_boxes:
|
||||
return
|
||||
|
||||
img_hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
|
||||
img_h, img_w = img_bgr.shape[:2]
|
||||
|
||||
colored_count = 0
|
||||
|
||||
for wb in word_boxes:
|
||||
x1 = max(0, int(wb["left"]))
|
||||
y1 = max(0, int(wb["top"]))
|
||||
x2 = min(img_w, int(wb["left"] + wb["width"]))
|
||||
y2 = min(img_h, int(wb["top"] + wb["height"]))
|
||||
|
||||
if x2 <= x1 or y2 <= y1:
|
||||
wb["color"] = _COLOR_HEX["black"]
|
||||
wb["color_name"] = "black"
|
||||
continue
|
||||
|
||||
crop_hsv = img_hsv[y1:y2, x1:x2]
|
||||
crop_bgr = img_bgr[y1:y2, x1:x2]
|
||||
crop_gray = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2GRAY)
|
||||
ch, cw = crop_hsv.shape[:2]
|
||||
|
||||
# --- Text mask: Otsu (adaptive) + high-saturation pixels ---
|
||||
_, dark_mask = cv2.threshold(
|
||||
crop_gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU,
|
||||
)
|
||||
sat_mask = (crop_hsv[:, :, 1] > sat_threshold).astype(np.uint8) * 255
|
||||
text_mask = cv2.bitwise_or(dark_mask, sat_mask)
|
||||
|
||||
text_pixels = crop_hsv[text_mask > 0]
|
||||
|
||||
if len(text_pixels) < 3:
|
||||
wb["color"] = _COLOR_HEX["black"]
|
||||
wb["color_name"] = "black"
|
||||
continue
|
||||
|
||||
# --- Background subtraction via border pixels ---
|
||||
# Sample background from the 2px border ring of the crop
|
||||
if ch > 6 and cw > 6:
|
||||
border = 2
|
||||
bg_top = crop_hsv[:border, :].reshape(-1, 3)
|
||||
bg_bot = crop_hsv[-border:, :].reshape(-1, 3)
|
||||
bg_lft = crop_hsv[border:-border, :border].reshape(-1, 3)
|
||||
bg_rgt = crop_hsv[border:-border, -border:].reshape(-1, 3)
|
||||
bg_pixels = np.vstack([bg_top, bg_bot, bg_lft, bg_rgt])
|
||||
|
||||
bg_med_h = float(np.median(bg_pixels[:, 0]))
|
||||
bg_med_s = float(np.median(bg_pixels[:, 1]))
|
||||
|
||||
# If background is tinted (S > 15), remove text pixels
|
||||
# with similar hue to avoid false colored detections
|
||||
if bg_med_s > 15:
|
||||
hue_diff = np.minimum(
|
||||
np.abs(text_pixels[:, 0].astype(float) - bg_med_h),
|
||||
180.0 - np.abs(text_pixels[:, 0].astype(float) - bg_med_h),
|
||||
)
|
||||
keep = hue_diff > 20
|
||||
if np.any(keep):
|
||||
text_pixels = text_pixels[keep]
|
||||
|
||||
if len(text_pixels) < 3:
|
||||
wb["color"] = _COLOR_HEX["black"]
|
||||
wb["color_name"] = "black"
|
||||
continue
|
||||
|
||||
# --- Classification using MEDIAN (robust to outliers) ---
|
||||
median_sat = float(np.median(text_pixels[:, 1]))
|
||||
sat_count = int(np.sum(text_pixels[:, 1] > sat_threshold))
|
||||
sat_ratio = sat_count / len(text_pixels)
|
||||
|
||||
if median_sat < sat_threshold or sat_ratio < min_sat_ratio:
|
||||
wb["color"] = _COLOR_HEX["black"]
|
||||
wb["color_name"] = "black"
|
||||
else:
|
||||
# Use median hue of saturated pixels only for cleaner signal
|
||||
sat_pixels = text_pixels[text_pixels[:, 1] > sat_threshold]
|
||||
median_hue = float(np.median(sat_pixels[:, 0]))
|
||||
name = _hue_to_color_name(median_hue)
|
||||
wb["color"] = _COLOR_HEX.get(name, _COLOR_HEX["black"])
|
||||
wb["color_name"] = name
|
||||
colored_count += 1
|
||||
|
||||
if colored_count:
|
||||
logger.info("color annotation: %d / %d words are colored",
|
||||
colored_count, len(word_boxes))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 2. Recover colored text that OCR missed
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def recover_colored_text(
|
||||
img_bgr: np.ndarray,
|
||||
existing_words: List[Dict],
|
||||
min_area: int = 40,
|
||||
max_regions: int = 60,
|
||||
) -> List[Dict]:
|
||||
"""Find colored text regions not covered by any existing word box.
|
||||
|
||||
Returns a list of recovered word dicts with ``color``, ``color_name``,
|
||||
and ``recovered=True`` fields. The ``text`` is set via a lightweight
|
||||
shape heuristic (e.g. ``!`` for tall narrow shapes) or ``?``.
|
||||
"""
|
||||
if img_bgr is None:
|
||||
return []
|
||||
|
||||
img_hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
|
||||
ih, iw = img_bgr.shape[:2]
|
||||
max_area = int(ih * iw * 0.005)
|
||||
|
||||
# --- Build occupancy mask from existing words (adaptive padding) ---
|
||||
# Pad word boxes generously to prevent colored-pixel artifacts in
|
||||
# narrow inter-word gaps from being recovered as false characters.
|
||||
heights = [wb["height"] for wb in existing_words if wb.get("height", 0) > 0]
|
||||
median_h = int(np.median(heights)) if heights else 20
|
||||
pad = max(8, int(median_h * 0.35))
|
||||
|
||||
occupied = np.zeros((ih, iw), dtype=np.uint8)
|
||||
for wb in existing_words:
|
||||
x1 = max(0, int(wb["left"]) - pad)
|
||||
y1 = max(0, int(wb["top"]) - pad)
|
||||
x2 = min(iw, int(wb["left"] + wb["width"]) + pad)
|
||||
y2 = min(ih, int(wb["top"] + wb["height"]) + pad)
|
||||
occupied[y1:y2, x1:x2] = 255
|
||||
|
||||
recovered: List[Dict] = []
|
||||
|
||||
for color_name, ranges in _COLOR_RANGES.items():
|
||||
# Create mask for this color
|
||||
mask = np.zeros((ih, iw), dtype=np.uint8)
|
||||
for lower, upper in ranges:
|
||||
mask = cv2.bitwise_or(mask, cv2.inRange(img_hsv, lower, upper))
|
||||
|
||||
# Remove pixels already covered by existing OCR words
|
||||
mask = cv2.bitwise_and(mask, cv2.bitwise_not(occupied))
|
||||
|
||||
# Morphological cleanup:
|
||||
# - Close with tall kernel to merge ! stroke + dot
|
||||
# - Open to remove noise specks
|
||||
kernel_close = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 8))
|
||||
mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel_close)
|
||||
kernel_open = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
|
||||
mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel_open)
|
||||
|
||||
contours, _ = cv2.findContours(
|
||||
mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE,
|
||||
)
|
||||
|
||||
candidates = []
|
||||
for cnt in contours:
|
||||
area = cv2.contourArea(cnt)
|
||||
if area < min_area or area > max_area:
|
||||
continue
|
||||
bx, by, bw, bh = cv2.boundingRect(cnt)
|
||||
if bh < 6:
|
||||
continue
|
||||
# Reject regions too wide to be single characters
|
||||
if bw > median_h * 4:
|
||||
continue
|
||||
candidates.append((area, bx, by, bw, bh))
|
||||
|
||||
# Keep largest first, limited count
|
||||
candidates.sort(key=lambda c: c[0], reverse=True)
|
||||
|
||||
for area, bx, by, bw, bh in candidates[:max_regions]:
|
||||
text = _identify_shape(bw, bh)
|
||||
recovered.append({
|
||||
"text": text,
|
||||
"left": bx,
|
||||
"top": by,
|
||||
"width": bw,
|
||||
"height": bh,
|
||||
"conf": 45,
|
||||
"color": _COLOR_HEX.get(color_name, "#000000"),
|
||||
"color_name": color_name,
|
||||
"recovered": True,
|
||||
})
|
||||
|
||||
if recovered:
|
||||
logger.info(
|
||||
"color recovery: %d colored regions found (%s)",
|
||||
len(recovered),
|
||||
", ".join(
|
||||
f"{c}: {sum(1 for r in recovered if r['color_name'] == c)}"
|
||||
for c in sorted({r["color_name"] for r in recovered})
|
||||
),
|
||||
)
|
||||
|
||||
return recovered
|
||||
|
||||
|
||||
def _identify_shape(w: int, h: int) -> str:
|
||||
"""Simple shape heuristic for common single-character text markers."""
|
||||
aspect = w / h if h > 0 else 1.0
|
||||
if aspect < 0.55 and h > 10:
|
||||
# Tall, narrow — likely exclamation mark
|
||||
return "!"
|
||||
if 0.6 < aspect < 1.5 and max(w, h) < 25:
|
||||
# Small, roughly square — bullet or dot
|
||||
return "•"
|
||||
return "?"
|
||||
@@ -1,313 +0,0 @@
|
||||
"""
|
||||
Graphical element detection for OCR pages.
|
||||
|
||||
Region-based approach:
|
||||
1. Build a color mask (saturation channel — black text is invisible).
|
||||
2. Dilate heavily to merge nearby colored pixels into regions.
|
||||
3. For each region, check overlap with OCR word boxes:
|
||||
- High word overlap → colored text (skip)
|
||||
- Low word overlap → colored graphic / image (keep)
|
||||
4. Separately detect large black-ink illustrations via ink mask.
|
||||
|
||||
Boxes and text colors are handled by cv_box_detect / cv_color_detect.
|
||||
|
||||
Lizenz: Apache 2.0 (kommerziell nutzbar)
|
||||
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
__all__ = ["detect_graphic_elements", "GraphicElement"]
|
||||
|
||||
|
||||
@dataclass
|
||||
class GraphicElement:
|
||||
"""A detected non-text graphical element."""
|
||||
x: int
|
||||
y: int
|
||||
width: int
|
||||
height: int
|
||||
area: int
|
||||
shape: str # image, illustration
|
||||
color_name: str # dominant color or 'black'
|
||||
color_hex: str
|
||||
confidence: float
|
||||
contour: Any = field(default=None, repr=False)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Color helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_COLOR_HEX = {
|
||||
"black": "#000000",
|
||||
"gray": "#6b7280",
|
||||
"red": "#dc2626",
|
||||
"orange": "#ea580c",
|
||||
"yellow": "#ca8a04",
|
||||
"green": "#16a34a",
|
||||
"blue": "#2563eb",
|
||||
"purple": "#9333ea",
|
||||
}
|
||||
|
||||
|
||||
def _dominant_color(hsv_roi: np.ndarray, sat_threshold: int = 40) -> tuple:
|
||||
"""Return (color_name, color_hex) for an HSV region."""
|
||||
if hsv_roi.size == 0:
|
||||
return "black", _COLOR_HEX["black"]
|
||||
|
||||
pixels = hsv_roi.reshape(-1, 3)
|
||||
sat = pixels[:, 1]
|
||||
sat_mask = sat > sat_threshold
|
||||
sat_ratio = np.sum(sat_mask) / len(pixels) if len(pixels) > 0 else 0
|
||||
|
||||
if sat_ratio < 0.15:
|
||||
return "black", _COLOR_HEX["black"]
|
||||
|
||||
sat_pixels = pixels[sat_mask]
|
||||
if len(sat_pixels) < 3:
|
||||
return "black", _COLOR_HEX["black"]
|
||||
|
||||
med_hue = float(np.median(sat_pixels[:, 0]))
|
||||
|
||||
if med_hue < 10 or med_hue > 170:
|
||||
name = "red"
|
||||
elif med_hue < 25:
|
||||
name = "orange"
|
||||
elif med_hue < 35:
|
||||
name = "yellow"
|
||||
elif med_hue < 85:
|
||||
name = "green"
|
||||
elif med_hue < 130:
|
||||
name = "blue"
|
||||
else:
|
||||
name = "purple"
|
||||
|
||||
return name, _COLOR_HEX.get(name, _COLOR_HEX["black"])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main detection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def detect_graphic_elements(
|
||||
img_bgr: np.ndarray,
|
||||
word_boxes: List[Dict],
|
||||
detected_boxes: Optional[List[Dict]] = None,
|
||||
max_elements: int = 50,
|
||||
) -> List[GraphicElement]:
|
||||
"""Find non-text graphical regions on the page.
|
||||
|
||||
Region-based: dilate color mask to form regions, then check word
|
||||
overlap to distinguish colored text from colored graphics.
|
||||
|
||||
Args:
|
||||
img_bgr: BGR color image.
|
||||
word_boxes: List of OCR word dicts with left/top/width/height.
|
||||
detected_boxes: Optional list of detected box dicts (x/y/w/h).
|
||||
max_elements: Maximum number of elements to return.
|
||||
|
||||
Returns:
|
||||
List of GraphicElement, sorted by area descending.
|
||||
"""
|
||||
if img_bgr is None:
|
||||
return []
|
||||
|
||||
h, w = img_bgr.shape[:2]
|
||||
|
||||
logger.debug("GraphicDetect: image %dx%d, %d word_boxes, %d detected_boxes",
|
||||
w, h, len(word_boxes), len(detected_boxes or []))
|
||||
|
||||
hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
|
||||
candidates: List[GraphicElement] = []
|
||||
|
||||
# --- Build word mask (for overlap checking) ---
|
||||
word_mask = np.zeros((h, w), dtype=np.uint8)
|
||||
for wb in word_boxes:
|
||||
x1 = max(0, int(wb.get("left", 0)))
|
||||
y1 = max(0, int(wb.get("top", 0)))
|
||||
x2 = min(w, int(wb.get("left", 0) + wb.get("width", 0)))
|
||||
y2 = min(h, int(wb.get("top", 0) + wb.get("height", 0)))
|
||||
word_mask[y1:y2, x1:x2] = 255
|
||||
|
||||
# =====================================================================
|
||||
# PASS 1 — COLORED IMAGE REGIONS
|
||||
# =====================================================================
|
||||
# Color mask: saturated pixels (black text has sat ≈ 0 → invisible)
|
||||
sat_mask = (hsv[:, :, 1] > 40).astype(np.uint8) * 255
|
||||
val_mask = (hsv[:, :, 2] < 240).astype(np.uint8) * 255
|
||||
color_pixels = cv2.bitwise_and(sat_mask, val_mask)
|
||||
|
||||
# Remove tiny speckle
|
||||
kernel_open = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
|
||||
color_pixels = cv2.morphologyEx(color_pixels, cv2.MORPH_OPEN, kernel_open)
|
||||
|
||||
# Count raw colored pixels before dilation (for density check later)
|
||||
color_pixel_raw = color_pixels.copy()
|
||||
|
||||
# Heavy dilation to merge nearby colored elements into regions.
|
||||
# A 25x25 kernel merges elements within ~12px of each other.
|
||||
kernel_dilate = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (25, 25))
|
||||
region_mask = cv2.dilate(color_pixels, kernel_dilate, iterations=1)
|
||||
|
||||
contours_regions, _ = cv2.findContours(
|
||||
region_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE,
|
||||
)
|
||||
logger.debug("GraphicDetect PASS1: %d color regions after dilation", len(contours_regions))
|
||||
|
||||
for cnt in contours_regions:
|
||||
bx, by, bw, bh = cv2.boundingRect(cnt)
|
||||
|
||||
# Skip tiny regions
|
||||
if bw < 15 or bh < 15:
|
||||
continue
|
||||
|
||||
# Skip page-spanning regions
|
||||
if bw > w * 0.5 or bh > h * 0.5:
|
||||
logger.debug("GraphicDetect PASS1 skip page-spanning (%d,%d) %dx%d", bx, by, bw, bh)
|
||||
continue
|
||||
|
||||
bbox_area = bw * bh
|
||||
|
||||
# Check: how much of this region's bounding box overlaps with words?
|
||||
roi_words = word_mask[by:by + bh, bx:bx + bw]
|
||||
word_pixel_count = int(np.sum(roi_words > 0))
|
||||
word_overlap = word_pixel_count / bbox_area if bbox_area > 0 else 0
|
||||
|
||||
# Check: how many actual colored pixels are in this region?
|
||||
roi_color = color_pixel_raw[by:by + bh, bx:bx + bw]
|
||||
color_pixel_count = int(np.sum(roi_color > 0))
|
||||
|
||||
# If most of the region is covered by word boxes → colored text, skip
|
||||
if word_overlap > 0.5:
|
||||
logger.debug("GraphicDetect PASS1 skip text region (%d,%d) %dx%d overlap=%.0f%%",
|
||||
bx, by, bw, bh, word_overlap * 100)
|
||||
continue
|
||||
|
||||
# Need a minimum number of colored pixels (not just dilated area)
|
||||
if color_pixel_count < 200:
|
||||
continue
|
||||
|
||||
# Determine dominant color from the actual colored pixels
|
||||
roi_hsv = hsv[by:by + bh, bx:bx + bw]
|
||||
color_px_mask = roi_color > 0
|
||||
if np.sum(color_px_mask) > 0:
|
||||
masked_hsv = roi_hsv[color_px_mask]
|
||||
color_name, color_hex = _dominant_color(masked_hsv)
|
||||
else:
|
||||
color_name, color_hex = "black", _COLOR_HEX["black"]
|
||||
|
||||
# Confidence based on color density and low word overlap
|
||||
density = color_pixel_count / bbox_area if bbox_area > 0 else 0
|
||||
conf = min(0.95, 0.5 + density * 0.5)
|
||||
|
||||
logger.debug("GraphicDetect PASS1 accept (%d,%d) %dx%d px=%d overlap=%.0f%% %s",
|
||||
bx, by, bw, bh, color_pixel_count, word_overlap * 100, color_name)
|
||||
candidates.append(GraphicElement(
|
||||
x=bx, y=by, width=bw, height=bh,
|
||||
area=color_pixel_count,
|
||||
shape="image",
|
||||
color_name=color_name, color_hex=color_hex,
|
||||
confidence=round(conf, 2), contour=cnt,
|
||||
))
|
||||
|
||||
# =====================================================================
|
||||
# PASS 2 — LARGE BLACK-INK ILLUSTRATIONS
|
||||
# =====================================================================
|
||||
gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
|
||||
_, dark_mask = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
|
||||
|
||||
# Exclude words and colored regions already found
|
||||
exclusion = np.zeros((h, w), dtype=np.uint8)
|
||||
word_pad = 5
|
||||
for wb in word_boxes:
|
||||
x1 = max(0, int(wb.get("left", 0)) - word_pad)
|
||||
y1 = max(0, int(wb.get("top", 0)) - word_pad)
|
||||
x2 = min(w, int(wb.get("left", 0) + wb.get("width", 0)) + word_pad)
|
||||
y2 = min(h, int(wb.get("top", 0) + wb.get("height", 0)) + word_pad)
|
||||
exclusion[y1:y2, x1:x2] = 255
|
||||
|
||||
if detected_boxes:
|
||||
for box in detected_boxes:
|
||||
bbx = int(box.get("x", 0))
|
||||
bby = int(box.get("y", 0))
|
||||
bbw = int(box.get("w", box.get("width", 0)))
|
||||
bbh = int(box.get("h", box.get("height", 0)))
|
||||
inset = 8
|
||||
x1 = max(0, bbx + inset)
|
||||
y1 = max(0, bby + inset)
|
||||
x2 = min(w, bbx + bbw - inset)
|
||||
y2 = min(h, bby + bbh - inset)
|
||||
if x2 > x1 and y2 > y1:
|
||||
exclusion[y1:y2, x1:x2] = 255
|
||||
|
||||
ink_only = cv2.bitwise_and(dark_mask, cv2.bitwise_not(exclusion))
|
||||
ink_only = cv2.bitwise_and(ink_only, cv2.bitwise_not(color_pixels))
|
||||
|
||||
contours_ink, _ = cv2.findContours(
|
||||
ink_only, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE,
|
||||
)
|
||||
logger.debug("GraphicDetect PASS2 ink: %d contours", len(contours_ink))
|
||||
|
||||
for cnt in contours_ink:
|
||||
area = cv2.contourArea(cnt)
|
||||
bx, by, bw, bh = cv2.boundingRect(cnt)
|
||||
|
||||
if area < 5000 or min(bw, bh) < 40:
|
||||
continue
|
||||
if bw > w * 0.8 or bh > h * 0.8:
|
||||
continue
|
||||
|
||||
logger.debug("GraphicDetect PASS2 accept (%d,%d) %dx%d area=%d",
|
||||
bx, by, bw, bh, int(area))
|
||||
candidates.append(GraphicElement(
|
||||
x=bx, y=by, width=bw, height=bh,
|
||||
area=int(area), shape="illustration",
|
||||
color_name="black", color_hex="#000000",
|
||||
confidence=0.5, contour=cnt,
|
||||
))
|
||||
|
||||
# =====================================================================
|
||||
# Deduplicate and return
|
||||
# =====================================================================
|
||||
candidates.sort(key=lambda g: g.area, reverse=True)
|
||||
|
||||
final: List[GraphicElement] = []
|
||||
for c in candidates:
|
||||
overlap = False
|
||||
for f in final:
|
||||
ix1 = max(c.x, f.x)
|
||||
iy1 = max(c.y, f.y)
|
||||
ix2 = min(c.x + c.width, f.x + f.width)
|
||||
iy2 = min(c.y + c.height, f.y + f.height)
|
||||
if ix2 > ix1 and iy2 > iy1:
|
||||
inter = (ix2 - ix1) * (iy2 - iy1)
|
||||
smaller = min(c.width * c.height, f.width * f.height)
|
||||
if smaller > 0 and inter / smaller > 0.5:
|
||||
overlap = True
|
||||
break
|
||||
if not overlap:
|
||||
final.append(c)
|
||||
|
||||
result = final[:max_elements]
|
||||
|
||||
if result:
|
||||
shape_counts: Dict[str, int] = {}
|
||||
for g in result:
|
||||
shape_counts[g.shape] = shape_counts.get(g.shape, 0) + 1
|
||||
logger.info(
|
||||
"GraphicDetect: %d elements found (%s)",
|
||||
len(result),
|
||||
", ".join(f"{s}: {c}" for s, c in sorted(shape_counts.items())),
|
||||
)
|
||||
else:
|
||||
logger.info("GraphicDetect: no graphic elements found")
|
||||
|
||||
return result
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,180 +0,0 @@
|
||||
"""
|
||||
Shared types, constants, and availability guards for the CV vocabulary pipeline.
|
||||
|
||||
Lizenz: Apache 2.0 (kommerziell nutzbar)
|
||||
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re # noqa: F401 — re-exported for downstream modules
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import numpy as np # noqa: F401
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# --- Availability Guards ---
|
||||
|
||||
try:
|
||||
import cv2 # noqa: F401
|
||||
CV2_AVAILABLE = True
|
||||
except ImportError:
|
||||
cv2 = None # type: ignore[assignment]
|
||||
CV2_AVAILABLE = False
|
||||
logger.warning("OpenCV not available — CV pipeline disabled")
|
||||
|
||||
try:
|
||||
import pytesseract # noqa: F401
|
||||
from PIL import Image # noqa: F401
|
||||
TESSERACT_AVAILABLE = True
|
||||
except ImportError:
|
||||
pytesseract = None # type: ignore[assignment]
|
||||
Image = None # type: ignore[assignment,misc]
|
||||
TESSERACT_AVAILABLE = False
|
||||
logger.warning("pytesseract/Pillow not available — CV pipeline disabled")
|
||||
|
||||
CV_PIPELINE_AVAILABLE = CV2_AVAILABLE and TESSERACT_AVAILABLE
|
||||
|
||||
# --- IPA Dictionary ---
|
||||
|
||||
IPA_AVAILABLE = False
|
||||
_ipa_convert_american = None
|
||||
_britfone_dict: Dict[str, str] = {}
|
||||
|
||||
try:
|
||||
import eng_to_ipa as _eng_to_ipa
|
||||
_ipa_convert_american = _eng_to_ipa.convert
|
||||
IPA_AVAILABLE = True
|
||||
logger.info("eng_to_ipa available — American IPA lookup enabled")
|
||||
except ImportError:
|
||||
logger.info("eng_to_ipa not installed — American IPA disabled")
|
||||
|
||||
# Load Britfone dictionary (MIT license, ~15k British English IPA entries)
|
||||
_britfone_path = os.path.join(os.path.dirname(__file__), 'data', 'britfone_ipa.json')
|
||||
if os.path.exists(_britfone_path):
|
||||
try:
|
||||
with open(_britfone_path, 'r', encoding='utf-8') as f:
|
||||
_britfone_dict = json.load(f)
|
||||
IPA_AVAILABLE = True
|
||||
logger.info(f"Britfone loaded — {len(_britfone_dict)} British IPA entries")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load Britfone: {e}")
|
||||
else:
|
||||
logger.info("Britfone not found — British IPA disabled")
|
||||
|
||||
# --- Language Detection Constants ---
|
||||
|
||||
GERMAN_FUNCTION_WORDS = {'der', 'die', 'das', 'und', 'ist', 'ein', 'eine', 'nicht',
|
||||
'von', 'zu', 'mit', 'auf', 'fuer', 'den', 'dem', 'sich', 'auch', 'wird',
|
||||
'nach', 'bei', 'aus', 'wie', 'oder', 'wenn', 'noch', 'aber', 'hat', 'nur',
|
||||
'ueber', 'kann', 'als', 'ich', 'er', 'sie', 'es', 'wir', 'ihr', 'haben',
|
||||
'sein', 'werden', 'war', 'sind', 'muss', 'soll', 'dieser', 'diese', 'diesem'}
|
||||
|
||||
ENGLISH_FUNCTION_WORDS = {'the', 'a', 'an', 'is', 'are', 'was', 'were', 'to', 'of',
|
||||
'and', 'in', 'that', 'it', 'for', 'on', 'with', 'as', 'at', 'by', 'from',
|
||||
'or', 'but', 'not', 'be', 'have', 'has', 'had', 'do', 'does', 'did', 'will',
|
||||
'would', 'can', 'could', 'should', 'may', 'might', 'this', 'they', 'you', 'he',
|
||||
'she', 'we', 'my', 'your', 'his', 'her', 'its', 'our', 'their', 'which'}
|
||||
|
||||
|
||||
# --- Data Classes ---
|
||||
|
||||
@dataclass
|
||||
class PageRegion:
|
||||
"""A detected region on the page."""
|
||||
type: str # 'column_en', 'column_de', 'column_example', 'page_ref', 'column_marker', 'column_text', 'header', 'footer', 'margin_top', 'margin_bottom'
|
||||
x: int
|
||||
y: int
|
||||
width: int
|
||||
height: int
|
||||
classification_confidence: float = 1.0 # 0.0-1.0
|
||||
classification_method: str = "" # 'content', 'position_enhanced', 'position_fallback'
|
||||
|
||||
|
||||
@dataclass
|
||||
class ColumnGeometry:
|
||||
"""Geometrisch erkannte Spalte vor Typ-Klassifikation."""
|
||||
index: int # 0-basiert, links->rechts
|
||||
x: int
|
||||
y: int
|
||||
width: int
|
||||
height: int
|
||||
word_count: int
|
||||
words: List[Dict] # Wort-Dicts aus Tesseract (text, conf, left, top, ...)
|
||||
width_ratio: float # width / content_width (0.0-1.0)
|
||||
is_sub_column: bool = False # True if created by _detect_sub_columns() split
|
||||
|
||||
|
||||
@dataclass
|
||||
class RowGeometry:
|
||||
"""Geometrisch erkannte Zeile mit Kopf-/Fusszeilen-Klassifikation."""
|
||||
index: int # 0-basiert, oben→unten
|
||||
x: int # absolute left (= content left_x)
|
||||
y: int # absolute y start
|
||||
width: int # content width
|
||||
height: int # Zeilenhoehe in px
|
||||
word_count: int
|
||||
words: List[Dict]
|
||||
row_type: str = 'content' # 'content' | 'header' | 'footer'
|
||||
gap_before: int = 0 # Gap in px ueber dieser Zeile
|
||||
|
||||
|
||||
@dataclass
|
||||
class VocabRow:
|
||||
"""A single vocabulary entry assembled from multi-column OCR."""
|
||||
english: str = ""
|
||||
german: str = ""
|
||||
example: str = ""
|
||||
source_page: str = ""
|
||||
confidence: float = 0.0
|
||||
y_position: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class PipelineResult:
|
||||
"""Complete result of the CV pipeline."""
|
||||
vocabulary: List[Dict[str, Any]] = field(default_factory=list)
|
||||
word_count: int = 0
|
||||
columns_detected: int = 0
|
||||
duration_seconds: float = 0.0
|
||||
stages: Dict[str, float] = field(default_factory=dict)
|
||||
error: Optional[str] = None
|
||||
image_width: int = 0
|
||||
image_height: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class DocumentTypeResult:
|
||||
"""Result of automatic document type detection."""
|
||||
doc_type: str # 'vocab_table' | 'full_text' | 'generic_table'
|
||||
confidence: float # 0.0-1.0
|
||||
pipeline: str # 'cell_first' | 'full_page'
|
||||
skip_steps: List[str] = field(default_factory=list) # e.g. ['columns', 'rows']
|
||||
features: Dict[str, Any] = field(default_factory=dict) # debug info
|
||||
|
||||
|
||||
@dataclass
|
||||
class DetectedBox:
|
||||
"""An embedded box (e.g. grammar tip, exercise) detected on the page."""
|
||||
x: int # absolute pixel position
|
||||
y: int
|
||||
width: int
|
||||
height: int
|
||||
confidence: float # 0.0-1.0
|
||||
border_thickness: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class PageZone:
|
||||
"""A horizontal zone of the page — either normal content or a detected box."""
|
||||
index: int # 0-based, top to bottom
|
||||
zone_type: str # 'content' | 'box'
|
||||
y: int # absolute pixel y
|
||||
height: int
|
||||
x: int
|
||||
width: int
|
||||
box: Optional[DetectedBox] = None
|
||||
columns: List[ColumnGeometry] = field(default_factory=list)
|
||||
@@ -1,355 +0,0 @@
|
||||
"""
|
||||
Words-First Grid Builder (Bottom-Up).
|
||||
|
||||
Builds a cell grid from Tesseract word_boxes directly, without requiring
|
||||
pre-detected columns or rows. Algorithm:
|
||||
|
||||
1. Cluster words into columns by X-gap analysis
|
||||
2. Cluster words into rows by Y-proximity
|
||||
3. Build cells at (column, row) intersections
|
||||
|
||||
Returns the same (cells, columns_meta) format as build_cell_grid_v2().
|
||||
|
||||
Lizenz: Apache 2.0 (kommerziell nutzbar)
|
||||
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
import statistics
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from cv_ocr_engines import (
|
||||
_group_words_into_lines,
|
||||
_words_to_reading_order_text,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 1. Column clustering
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _cluster_columns(
|
||||
words: List[Dict],
|
||||
img_w: int,
|
||||
min_gap_pct: float = 3.0,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Cluster words into columns by finding large horizontal gaps.
|
||||
|
||||
Returns a list of column dicts:
|
||||
[{'index': 0, 'type': 'column_1', 'x_min': ..., 'x_max': ...}, ...]
|
||||
sorted left-to-right.
|
||||
"""
|
||||
if not words:
|
||||
return []
|
||||
|
||||
# Sort by X center
|
||||
sorted_w = sorted(words, key=lambda w: w['left'] + w['width'] / 2)
|
||||
|
||||
# Collect word heights to compute adaptive threshold
|
||||
heights = [w['height'] for w in sorted_w if w.get('height', 0) > 0]
|
||||
median_h = statistics.median(heights) if heights else 30
|
||||
|
||||
# Adaptive gap threshold: 3× median word height, but at least min_gap_pct of image width
|
||||
min_gap_px = max(median_h * 3, img_w * min_gap_pct / 100) if img_w > 0 else median_h * 3
|
||||
|
||||
# Find X-gap boundaries between consecutive words (sorted by X-center)
|
||||
# For each word, compute right edge; for next word, compute left edge
|
||||
boundaries: List[float] = [] # X positions where columns split
|
||||
for i in range(len(sorted_w) - 1):
|
||||
right_edge = sorted_w[i]['left'] + sorted_w[i]['width']
|
||||
left_edge = sorted_w[i + 1]['left']
|
||||
gap = left_edge - right_edge
|
||||
if gap > min_gap_px:
|
||||
# Split point is midway through the gap
|
||||
boundaries.append((right_edge + left_edge) / 2)
|
||||
|
||||
# Build column ranges from boundaries
|
||||
# Column ranges: (-inf, boundary[0]), (boundary[0], boundary[1]), ..., (boundary[-1], +inf)
|
||||
col_edges = [0.0] + boundaries + [float(img_w)]
|
||||
columns = []
|
||||
for ci in range(len(col_edges) - 1):
|
||||
columns.append({
|
||||
'index': ci,
|
||||
'type': f'column_{ci + 1}' if len(col_edges) > 2 else 'column_text',
|
||||
'x_min': col_edges[ci],
|
||||
'x_max': col_edges[ci + 1],
|
||||
})
|
||||
|
||||
return columns
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 2. Row clustering
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _cluster_rows(
|
||||
words: List[Dict],
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Cluster words into visual rows by Y-proximity.
|
||||
|
||||
Uses half the median word height as Y-tolerance.
|
||||
|
||||
Returns a list of row dicts:
|
||||
[{'index': 0, 'y_min': ..., 'y_max': ..., 'y_center': ...}, ...]
|
||||
sorted top-to-bottom.
|
||||
"""
|
||||
if not words:
|
||||
return []
|
||||
|
||||
heights = [w['height'] for w in words if w.get('height', 0) > 0]
|
||||
median_h = statistics.median(heights) if heights else 20
|
||||
y_tol = max(median_h * 0.5, 5)
|
||||
|
||||
lines = _group_words_into_lines(words, y_tolerance_px=int(y_tol))
|
||||
|
||||
rows = []
|
||||
for ri, line_words in enumerate(lines):
|
||||
y_min = min(w['top'] for w in line_words)
|
||||
y_max = max(w['top'] + w['height'] for w in line_words)
|
||||
rows.append({
|
||||
'index': ri,
|
||||
'y_min': y_min,
|
||||
'y_max': y_max,
|
||||
'y_center': (y_min + y_max) / 2,
|
||||
})
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 3. Build cells
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _assign_word_to_column(word: Dict, columns: List[Dict]) -> int:
|
||||
"""Return column index for a word based on its X-center."""
|
||||
x_center = word['left'] + word['width'] / 2
|
||||
for col in columns:
|
||||
if col['x_min'] <= x_center < col['x_max']:
|
||||
return col['index']
|
||||
# Fallback: nearest column
|
||||
return min(columns, key=lambda c: abs((c['x_min'] + c['x_max']) / 2 - x_center))['index']
|
||||
|
||||
|
||||
def _assign_word_to_row(word: Dict, rows: List[Dict]) -> int:
|
||||
"""Return row index for a word based on its Y-center.
|
||||
|
||||
When rows overlap (e.g. due to tall border-ghost characters inflating
|
||||
a row's y_max), prefer the row whose y_center is closest.
|
||||
"""
|
||||
y_center = word['top'] + word['height'] / 2
|
||||
# Find all rows whose y_range contains this word's center
|
||||
matching = [r for r in rows if r['y_min'] <= y_center <= r['y_max']]
|
||||
if matching:
|
||||
return min(matching, key=lambda r: abs(r['y_center'] - y_center))['index']
|
||||
# Fallback: nearest row by Y-center
|
||||
return min(rows, key=lambda r: abs(r['y_center'] - y_center))['index']
|
||||
|
||||
|
||||
def _build_cells(
|
||||
words: List[Dict],
|
||||
columns: List[Dict],
|
||||
rows: List[Dict],
|
||||
img_w: int,
|
||||
img_h: int,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Build cell dicts from word assignments to (column, row) pairs."""
|
||||
if not columns or not rows:
|
||||
return []
|
||||
|
||||
# Bucket words into (col_idx, row_idx)
|
||||
buckets: Dict[Tuple[int, int], List[Dict]] = {}
|
||||
for w in words:
|
||||
ci = _assign_word_to_column(w, columns)
|
||||
ri = _assign_word_to_row(w, rows)
|
||||
buckets.setdefault((ci, ri), []).append(w)
|
||||
|
||||
cells = []
|
||||
for (ci, ri), cell_words in sorted(buckets.items(), key=lambda kv: (kv[0][1], kv[0][0])):
|
||||
col = columns[ci]
|
||||
row = rows[ri]
|
||||
|
||||
# Compute tight bbox from actual word positions
|
||||
x_min = min(w['left'] for w in cell_words)
|
||||
y_min = min(w['top'] for w in cell_words)
|
||||
x_max = max(w['left'] + w['width'] for w in cell_words)
|
||||
y_max = max(w['top'] + w['height'] for w in cell_words)
|
||||
bw = x_max - x_min
|
||||
bh = y_max - y_min
|
||||
|
||||
# Text from words in reading order
|
||||
text = _words_to_reading_order_text(cell_words, y_tolerance_px=max(10, int(bh * 0.4)))
|
||||
|
||||
# Average confidence
|
||||
confs = [w.get('conf', 0) for w in cell_words if w.get('conf', 0) > 0]
|
||||
avg_conf = sum(confs) / len(confs) if confs else 0.0
|
||||
|
||||
# Word boxes with absolute pixel coordinates (consistent with cv_cell_grid.py).
|
||||
# PaddleOCR returns phrase-level boxes (e.g. "competition [kompa'tifn]"),
|
||||
# but the overlay slide mechanism expects one box per word. Split multi-word
|
||||
# boxes into individual word positions proportional to character length.
|
||||
# Also split at "[" boundaries (IPA patterns like "badge[bxd3]").
|
||||
#
|
||||
# Sort in reading order: group by Y (same visual line), then sort by X.
|
||||
# Simple (top, left) sort fails when words on the same line have slightly
|
||||
# different top values (1-6px), causing wrong word order.
|
||||
y_tol_wb = max(10, int(bh * 0.4))
|
||||
reading_lines = _group_words_into_lines(cell_words, y_tolerance_px=y_tol_wb)
|
||||
ordered_cell_words = [w for line in reading_lines for w in line]
|
||||
|
||||
word_boxes = []
|
||||
for w in ordered_cell_words:
|
||||
raw_text = w.get('text', '').strip()
|
||||
# Split by whitespace, at "[" boundaries (IPA), and after leading "!"
|
||||
# e.g. "badge[bxd3]" → ["badge", "[bxd3]"]
|
||||
# e.g. "profit['proft]" → ["profit", "['proft]"]
|
||||
# e.g. "!Betonung" → ["!", "Betonung"]
|
||||
tokens = re.split(r'\s+|(?=\[)|(?<=!)(?=[A-Za-z\u00c0-\u024f])', raw_text)
|
||||
tokens = [t for t in tokens if t] # remove empty strings
|
||||
if len(tokens) <= 1:
|
||||
# Single word — keep as-is
|
||||
word_boxes.append({
|
||||
'text': raw_text,
|
||||
'left': w['left'],
|
||||
'top': w['top'],
|
||||
'width': w['width'],
|
||||
'height': w['height'],
|
||||
'conf': w.get('conf', 0),
|
||||
})
|
||||
else:
|
||||
# Multi-word phrase — split proportionally by character count
|
||||
total_chars = sum(len(t) for t in tokens)
|
||||
if total_chars == 0:
|
||||
continue
|
||||
# Small gap between words (2% of box width per gap)
|
||||
n_gaps = len(tokens) - 1
|
||||
gap_px = w['width'] * 0.02
|
||||
usable_w = w['width'] - gap_px * n_gaps
|
||||
cursor = w['left']
|
||||
for t in tokens:
|
||||
token_w = max(1, usable_w * len(t) / total_chars)
|
||||
word_boxes.append({
|
||||
'text': t,
|
||||
'left': round(cursor),
|
||||
'top': w['top'],
|
||||
'width': round(token_w),
|
||||
'height': w['height'],
|
||||
'conf': w.get('conf', 0),
|
||||
})
|
||||
cursor += token_w + gap_px
|
||||
|
||||
cells.append({
|
||||
'cell_id': f"R{ri:02d}_C{ci}",
|
||||
'row_index': ri,
|
||||
'col_index': ci,
|
||||
'col_type': col['type'],
|
||||
'text': text,
|
||||
'confidence': round(avg_conf, 1),
|
||||
'bbox_px': {'x': x_min, 'y': y_min, 'w': bw, 'h': bh},
|
||||
'bbox_pct': {
|
||||
'x': round(x_min / img_w * 100, 2) if img_w else 0,
|
||||
'y': round(y_min / img_h * 100, 2) if img_h else 0,
|
||||
'w': round(bw / img_w * 100, 2) if img_w else 0,
|
||||
'h': round(bh / img_h * 100, 2) if img_h else 0,
|
||||
},
|
||||
'word_boxes': word_boxes,
|
||||
'ocr_engine': 'words_first',
|
||||
'is_bold': False,
|
||||
})
|
||||
|
||||
return cells
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 4. Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def build_grid_from_words(
|
||||
word_dicts: List[Dict],
|
||||
img_w: int,
|
||||
img_h: int,
|
||||
min_confidence: int = 30,
|
||||
box_rects: Optional[List[Dict]] = None,
|
||||
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
|
||||
"""Build a cell grid bottom-up from Tesseract word boxes.
|
||||
|
||||
Args:
|
||||
word_dicts: Flat list of word dicts with keys:
|
||||
text, left, top, width, height, conf
|
||||
(absolute pixel coordinates).
|
||||
img_w: Image width in pixels.
|
||||
img_h: Image height in pixels.
|
||||
min_confidence: Minimum OCR confidence to keep a word.
|
||||
box_rects: Optional list of box dicts with keys x, y, width, height.
|
||||
Words inside these boxes are excluded from column clustering
|
||||
(box-internal columns are detected separately in sub-sessions).
|
||||
|
||||
Returns:
|
||||
(cells, columns_meta) — same format as build_cell_grid_v2().
|
||||
cells: list of cell dicts with cell_id, bbox_px, bbox_pct, etc.
|
||||
columns_meta: list of {'index', 'type', 'x', 'width'} dicts.
|
||||
"""
|
||||
if not word_dicts:
|
||||
logger.info("build_grid_from_words: no words — returning empty grid")
|
||||
return [], []
|
||||
|
||||
# Filter by confidence
|
||||
words = [
|
||||
w for w in word_dicts
|
||||
if w.get('conf', 0) >= min_confidence and w.get('text', '').strip()
|
||||
]
|
||||
if not words:
|
||||
logger.info("build_grid_from_words: all words filtered (conf < %d)", min_confidence)
|
||||
return [], []
|
||||
|
||||
logger.info("build_grid_from_words: %d words (after confidence filter from %d)", len(words), len(word_dicts))
|
||||
|
||||
# Exclude words inside detected boxes — box columns are detected separately
|
||||
if box_rects:
|
||||
content_words = []
|
||||
for w in words:
|
||||
w_cx = w['left'] + w['width'] / 2
|
||||
w_cy = w['top'] + w['height'] / 2
|
||||
inside = any(
|
||||
b['x'] <= w_cx <= b['x'] + b['width']
|
||||
and b['y'] <= w_cy <= b['y'] + b['height']
|
||||
for b in box_rects
|
||||
)
|
||||
if not inside:
|
||||
content_words.append(w)
|
||||
excluded = len(words) - len(content_words)
|
||||
if excluded:
|
||||
logger.info("build_grid_from_words: excluded %d words inside %d box(es)",
|
||||
excluded, len(box_rects))
|
||||
words = content_words
|
||||
if not words:
|
||||
logger.info("build_grid_from_words: all words inside boxes — returning empty grid")
|
||||
return [], []
|
||||
|
||||
# Step 1: cluster columns
|
||||
columns = _cluster_columns(words, img_w)
|
||||
logger.info("build_grid_from_words: %d column(s) detected", len(columns))
|
||||
|
||||
# Step 2: cluster rows
|
||||
rows = _cluster_rows(words)
|
||||
logger.info("build_grid_from_words: %d row(s) detected", len(rows))
|
||||
|
||||
# Step 3: build cells
|
||||
cells = _build_cells(words, columns, rows, img_w, img_h)
|
||||
logger.info("build_grid_from_words: %d cells built", len(cells))
|
||||
|
||||
# Build columns_meta in same format as build_cell_grid_v2
|
||||
columns_meta = []
|
||||
for col in columns:
|
||||
x = int(col['x_min'])
|
||||
w = int(col['x_max'] - col['x_min'])
|
||||
columns_meta.append({
|
||||
'index': col['index'],
|
||||
'type': col['type'],
|
||||
'x': x,
|
||||
'width': w,
|
||||
})
|
||||
|
||||
return cells, columns_meta
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
@@ -1,276 +0,0 @@
|
||||
"""
|
||||
Handwriting HTR API - Hochwertige Handschriftenerkennung (HTR) fuer Klausurkorrekturen.
|
||||
|
||||
Endpoints:
|
||||
- POST /api/v1/htr/recognize - Bild hochladen → handgeschriebener Text
|
||||
- POST /api/v1/htr/recognize-session - OCR-Pipeline Session als Quelle nutzen
|
||||
|
||||
Modell-Strategie:
|
||||
1. qwen2.5vl:32b via Ollama (primaer, hoechste Qualitaet als VLM)
|
||||
2. microsoft/trocr-large-handwritten (Fallback, offline, kein Ollama)
|
||||
|
||||
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal auf dem Mac Mini.
|
||||
"""
|
||||
|
||||
import io
|
||||
import os
|
||||
import logging
|
||||
import time
|
||||
import base64
|
||||
from typing import Optional
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from fastapi import APIRouter, HTTPException, Query, UploadFile, File
|
||||
from pydantic import BaseModel
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/api/v1/htr", tags=["HTR"])
|
||||
|
||||
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://host.docker.internal:11434")
|
||||
OLLAMA_HTR_MODEL = os.getenv("OLLAMA_HTR_MODEL", "qwen2.5vl:32b")
|
||||
HTR_FALLBACK_MODEL = os.getenv("HTR_FALLBACK_MODEL", "trocr-large")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pydantic Models
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class HTRSessionRequest(BaseModel):
|
||||
session_id: str
|
||||
model: str = "auto" # "auto" | "qwen2.5vl" | "trocr-large"
|
||||
use_clean: bool = True # Prefer clean_png (after handwriting removal)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Preprocessing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _preprocess_for_htr(img_bgr: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
CLAHE contrast enhancement + upscale to improve HTR accuracy.
|
||||
Returns grayscale enhanced image.
|
||||
"""
|
||||
gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
|
||||
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
||||
enhanced = clahe.apply(gray)
|
||||
|
||||
# Upscale if image is too small
|
||||
h, w = enhanced.shape
|
||||
if min(h, w) < 800:
|
||||
scale = 800 / min(h, w)
|
||||
enhanced = cv2.resize(
|
||||
enhanced, None, fx=scale, fy=scale,
|
||||
interpolation=cv2.INTER_CUBIC
|
||||
)
|
||||
|
||||
return enhanced
|
||||
|
||||
|
||||
def _bgr_to_png_bytes(img_bgr: np.ndarray) -> bytes:
|
||||
"""Convert BGR ndarray to PNG bytes."""
|
||||
success, buf = cv2.imencode(".png", img_bgr)
|
||||
if not success:
|
||||
raise RuntimeError("Failed to encode image to PNG")
|
||||
return buf.tobytes()
|
||||
|
||||
|
||||
def _preprocess_image_bytes(image_bytes: bytes) -> bytes:
|
||||
"""Load image, apply HTR preprocessing, return PNG bytes."""
|
||||
arr = np.frombuffer(image_bytes, dtype=np.uint8)
|
||||
img_bgr = cv2.imdecode(arr, cv2.IMREAD_COLOR)
|
||||
if img_bgr is None:
|
||||
raise ValueError("Could not decode image")
|
||||
|
||||
enhanced = _preprocess_for_htr(img_bgr)
|
||||
# Convert grayscale back to BGR for encoding
|
||||
enhanced_bgr = cv2.cvtColor(enhanced, cv2.COLOR_GRAY2BGR)
|
||||
return _bgr_to_png_bytes(enhanced_bgr)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Backend: Ollama qwen2.5vl
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def _recognize_with_qwen_vl(image_bytes: bytes, language: str) -> Optional[str]:
|
||||
"""
|
||||
Send image to Ollama qwen2.5vl:32b for HTR.
|
||||
Returns extracted text or None on error.
|
||||
"""
|
||||
import httpx
|
||||
|
||||
lang_hint = {
|
||||
"de": "Deutsch",
|
||||
"en": "Englisch",
|
||||
"de+en": "Deutsch und Englisch",
|
||||
}.get(language, "Deutsch")
|
||||
|
||||
prompt = (
|
||||
f"Du bist ein OCR-Experte fuer handgeschriebenen Text auf {lang_hint}. "
|
||||
"Lies den Text im Bild exakt ab — korrigiere KEINE Rechtschreibfehler. "
|
||||
"Antworte NUR mit dem erkannten Text, ohne Erklaerungen."
|
||||
)
|
||||
|
||||
img_b64 = base64.b64encode(image_bytes).decode("utf-8")
|
||||
|
||||
payload = {
|
||||
"model": OLLAMA_HTR_MODEL,
|
||||
"prompt": prompt,
|
||||
"images": [img_b64],
|
||||
"stream": False,
|
||||
}
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||
resp = await client.post(f"{OLLAMA_BASE_URL}/api/generate", json=payload)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
return data.get("response", "").strip()
|
||||
except Exception as e:
|
||||
logger.warning(f"Ollama qwen2.5vl HTR failed: {e}")
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Backend: TrOCR-large fallback
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def _recognize_with_trocr_large(image_bytes: bytes) -> Optional[str]:
|
||||
"""
|
||||
Use microsoft/trocr-large-handwritten via trocr_service.py.
|
||||
Returns extracted text or None on error.
|
||||
"""
|
||||
try:
|
||||
from services.trocr_service import run_trocr_ocr, _check_trocr_available
|
||||
if not _check_trocr_available():
|
||||
logger.warning("TrOCR not available for HTR fallback")
|
||||
return None
|
||||
|
||||
text, confidence = await run_trocr_ocr(image_bytes, handwritten=True, size="large")
|
||||
return text.strip() if text else None
|
||||
except Exception as e:
|
||||
logger.warning(f"TrOCR-large HTR failed: {e}")
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Core recognition logic
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def _do_recognize(
|
||||
image_bytes: bytes,
|
||||
model: str = "auto",
|
||||
preprocess: bool = True,
|
||||
language: str = "de",
|
||||
) -> dict:
|
||||
"""
|
||||
Core HTR logic: preprocess → try Ollama → fallback to TrOCR-large.
|
||||
Returns dict with text, model_used, processing_time_ms.
|
||||
"""
|
||||
t0 = time.monotonic()
|
||||
|
||||
if preprocess:
|
||||
try:
|
||||
image_bytes = _preprocess_image_bytes(image_bytes)
|
||||
except Exception as e:
|
||||
logger.warning(f"HTR preprocessing failed, using raw image: {e}")
|
||||
|
||||
text: Optional[str] = None
|
||||
model_used: str = "none"
|
||||
|
||||
use_qwen = model in ("auto", "qwen2.5vl")
|
||||
use_trocr = model in ("auto", "trocr-large") or (use_qwen and text is None)
|
||||
|
||||
if use_qwen:
|
||||
text = await _recognize_with_qwen_vl(image_bytes, language)
|
||||
if text is not None:
|
||||
model_used = f"qwen2.5vl ({OLLAMA_HTR_MODEL})"
|
||||
|
||||
if text is None and (use_trocr or model == "trocr-large"):
|
||||
text = await _recognize_with_trocr_large(image_bytes)
|
||||
if text is not None:
|
||||
model_used = "trocr-large-handwritten"
|
||||
|
||||
if text is None:
|
||||
text = ""
|
||||
model_used = "none (all backends failed)"
|
||||
|
||||
elapsed_ms = int((time.monotonic() - t0) * 1000)
|
||||
|
||||
return {
|
||||
"text": text,
|
||||
"model_used": model_used,
|
||||
"processing_time_ms": elapsed_ms,
|
||||
"language": language,
|
||||
"preprocessed": preprocess,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Endpoints
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.post("/recognize")
|
||||
async def recognize_handwriting(
|
||||
file: UploadFile = File(...),
|
||||
model: str = Query("auto", description="auto | qwen2.5vl | trocr-large"),
|
||||
preprocess: bool = Query(True, description="Apply CLAHE + upscale before recognition"),
|
||||
language: str = Query("de", description="de | en | de+en"),
|
||||
):
|
||||
"""
|
||||
Upload an image and get back the handwritten text as plain text.
|
||||
|
||||
Tries qwen2.5vl:32b via Ollama first, falls back to TrOCR-large-handwritten.
|
||||
"""
|
||||
if model not in ("auto", "qwen2.5vl", "trocr-large"):
|
||||
raise HTTPException(status_code=400, detail="model must be one of: auto, qwen2.5vl, trocr-large")
|
||||
if language not in ("de", "en", "de+en"):
|
||||
raise HTTPException(status_code=400, detail="language must be one of: de, en, de+en")
|
||||
|
||||
image_bytes = await file.read()
|
||||
if not image_bytes:
|
||||
raise HTTPException(status_code=400, detail="Empty file")
|
||||
|
||||
return await _do_recognize(image_bytes, model=model, preprocess=preprocess, language=language)
|
||||
|
||||
|
||||
@router.post("/recognize-session")
|
||||
async def recognize_from_session(req: HTRSessionRequest):
|
||||
"""
|
||||
Use an OCR-Pipeline session as image source for HTR.
|
||||
|
||||
Set use_clean=true to prefer the clean image (after handwriting removal step).
|
||||
This is useful when you want to do HTR on isolated handwriting regions.
|
||||
"""
|
||||
from ocr_pipeline_session_store import get_session_db, get_session_image
|
||||
|
||||
session = await get_session_db(req.session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail=f"Session {req.session_id} not found")
|
||||
|
||||
# Choose source image
|
||||
image_bytes: Optional[bytes] = None
|
||||
source_used: str = ""
|
||||
|
||||
if req.use_clean:
|
||||
image_bytes = await get_session_image(req.session_id, "clean")
|
||||
if image_bytes:
|
||||
source_used = "clean"
|
||||
|
||||
if not image_bytes:
|
||||
image_bytes = await get_session_image(req.session_id, "deskewed")
|
||||
if image_bytes:
|
||||
source_used = "deskewed"
|
||||
|
||||
if not image_bytes:
|
||||
image_bytes = await get_session_image(req.session_id, "original")
|
||||
source_used = "original"
|
||||
|
||||
if not image_bytes:
|
||||
raise HTTPException(status_code=404, detail="No image available in session")
|
||||
|
||||
result = await _do_recognize(image_bytes, model=req.model)
|
||||
result["session_id"] = req.session_id
|
||||
result["source_image"] = source_used
|
||||
return result
|
||||
@@ -42,14 +42,6 @@ try:
|
||||
except ImportError:
|
||||
trocr_router = None
|
||||
from vocab_worksheet_api import router as vocab_router, set_db_pool as set_vocab_db_pool, _init_vocab_table, _load_all_sessions, DATABASE_URL as VOCAB_DATABASE_URL
|
||||
from ocr_pipeline_api import router as ocr_pipeline_router, _cache as ocr_pipeline_cache
|
||||
from grid_editor_api import router as grid_editor_router
|
||||
from orientation_crop_api import router as orientation_crop_router, set_cache_ref as set_orientation_crop_cache
|
||||
from ocr_pipeline_session_store import init_ocr_pipeline_tables
|
||||
try:
|
||||
from handwriting_htr_api import router as htr_router
|
||||
except ImportError:
|
||||
htr_router = None
|
||||
try:
|
||||
from dsfa_rag_api import router as dsfa_rag_router, set_db_pool as set_dsfa_db_pool
|
||||
from dsfa_corpus_ingestion import DSFAQdrantService, DATABASE_URL as DSFA_DATABASE_URL
|
||||
@@ -83,13 +75,6 @@ async def lifespan(app: FastAPI):
|
||||
except Exception as e:
|
||||
print(f"Warning: Vocab sessions database initialization failed: {e}")
|
||||
|
||||
# Initialize OCR Pipeline session tables
|
||||
try:
|
||||
await init_ocr_pipeline_tables()
|
||||
print("OCR Pipeline session tables initialized")
|
||||
except Exception as e:
|
||||
print(f"Warning: OCR Pipeline tables initialization failed: {e}")
|
||||
|
||||
# Initialize database pool for DSFA RAG
|
||||
dsfa_db_pool = None
|
||||
if DSFA_DATABASE_URL and set_dsfa_db_pool:
|
||||
@@ -119,19 +104,6 @@ async def lifespan(app: FastAPI):
|
||||
# Ensure EH upload directory exists
|
||||
os.makedirs(EH_UPLOAD_DIR, exist_ok=True)
|
||||
|
||||
# Preload LightOnOCR model if OCR_ENGINE=lighton (avoids cold-start on first request)
|
||||
ocr_engine_env = os.getenv("OCR_ENGINE", "auto")
|
||||
if ocr_engine_env == "lighton":
|
||||
try:
|
||||
import asyncio
|
||||
from services.lighton_ocr_service import get_lighton_model
|
||||
loop = asyncio.get_event_loop()
|
||||
print("Preloading LightOnOCR-2-1B at startup (OCR_ENGINE=lighton)...")
|
||||
await loop.run_in_executor(None, get_lighton_model)
|
||||
print("LightOnOCR-2-1B preloaded")
|
||||
except Exception as e:
|
||||
print(f"Warning: LightOnOCR preload failed: {e}")
|
||||
|
||||
yield
|
||||
|
||||
print("Klausur-Service shutting down...")
|
||||
@@ -178,12 +150,6 @@ app.include_router(mail_router) # Unified Inbox Mail
|
||||
if trocr_router:
|
||||
app.include_router(trocr_router) # TrOCR Handwriting OCR
|
||||
app.include_router(vocab_router) # Vocabulary Worksheet Generator
|
||||
app.include_router(ocr_pipeline_router) # OCR Pipeline (step-by-step)
|
||||
app.include_router(grid_editor_router) # Grid Editor (Excel-like)
|
||||
set_orientation_crop_cache(ocr_pipeline_cache)
|
||||
app.include_router(orientation_crop_router) # OCR Pipeline: Orientation + Crop
|
||||
if htr_router:
|
||||
app.include_router(htr_router) # Handwriting HTR (Klausur)
|
||||
if dsfa_rag_router:
|
||||
app.include_router(dsfa_rag_router) # DSFA RAG Corpus Search
|
||||
|
||||
|
||||
@@ -1,28 +0,0 @@
|
||||
-- OCR Pipeline Sessions - Persistent session storage
|
||||
-- Applied automatically by ocr_pipeline_session_store.init_ocr_pipeline_tables()
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ocr_pipeline_sessions (
|
||||
id UUID PRIMARY KEY,
|
||||
name VARCHAR(255) NOT NULL,
|
||||
filename VARCHAR(255),
|
||||
status VARCHAR(50) DEFAULT 'active',
|
||||
current_step INT DEFAULT 1,
|
||||
original_png BYTEA,
|
||||
deskewed_png BYTEA,
|
||||
binarized_png BYTEA,
|
||||
dewarped_png BYTEA,
|
||||
deskew_result JSONB,
|
||||
dewarp_result JSONB,
|
||||
column_result JSONB,
|
||||
ground_truth JSONB DEFAULT '{}',
|
||||
auto_shear_degrees FLOAT,
|
||||
created_at TIMESTAMP DEFAULT NOW(),
|
||||
updated_at TIMESTAMP DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Index for listing sessions
|
||||
CREATE INDEX IF NOT EXISTS idx_ocr_pipeline_sessions_created
|
||||
ON ocr_pipeline_sessions (created_at DESC);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_ocr_pipeline_sessions_status
|
||||
ON ocr_pipeline_sessions (status);
|
||||
@@ -1,4 +0,0 @@
|
||||
-- Migration 003: Add row_result column for row geometry detection
|
||||
-- Stores detected row geometries including header/footer classification
|
||||
|
||||
ALTER TABLE ocr_pipeline_sessions ADD COLUMN IF NOT EXISTS row_result JSONB;
|
||||
@@ -1,4 +0,0 @@
|
||||
-- Migration 004: Add word_result column for OCR Pipeline Step 5
|
||||
-- Stores the word recognition grid result (entries with english/german/example + bboxes)
|
||||
|
||||
ALTER TABLE ocr_pipeline_sessions ADD COLUMN IF NOT EXISTS word_result JSONB;
|
||||
@@ -1,7 +0,0 @@
|
||||
-- Migration 005: Add document type detection columns
|
||||
-- These columns store the result of automatic document type detection
|
||||
-- (vocab_table, full_text, generic_table) after dewarp.
|
||||
|
||||
ALTER TABLE ocr_pipeline_sessions
|
||||
ADD COLUMN IF NOT EXISTS doc_type VARCHAR(50),
|
||||
ADD COLUMN IF NOT EXISTS doc_type_result JSONB;
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user