Compare commits
25 Commits
coolify
...
c7ae44ff17
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c7ae44ff17 | ||
|
|
ce0815007e | ||
|
|
b03cb0a1e6 | ||
|
|
5a45cbf605 | ||
|
|
164b35c06a | ||
|
|
2297f66edb | ||
|
|
db8327f039 | ||
|
|
587b066a40 | ||
|
|
03fa186fec | ||
|
|
1040729874 | ||
|
|
4f37afa222 | ||
|
|
bb879a03a8 | ||
|
|
f535d3c967 | ||
|
|
7a3570fe46 | ||
|
|
1393a994f9 | ||
|
|
cf27a95308 | ||
|
|
aa06ae0f61 | ||
|
|
09b820efbe | ||
|
|
ff2bb79a91 | ||
|
|
fb496c5e34 | ||
|
|
9df745574b | ||
|
|
44e8c573af | ||
|
|
589d2f811a | ||
|
|
d552fd8b6b | ||
|
|
e7b6654b85 |
@@ -6,19 +6,26 @@
|
|||||||
|
|
||||||
| Geraet | Rolle | Aufgaben |
|
| Geraet | Rolle | Aufgaben |
|
||||||
|--------|-------|----------|
|
|--------|-------|----------|
|
||||||
| **MacBook** | Client | Claude Terminal, Browser (Frontend-Tests) |
|
| **MacBook** | Entwicklung | Claude Terminal, Code-Entwicklung, Browser (Frontend-Tests) |
|
||||||
| **Mac Mini** | Server | Docker, alle Services, Code-Ausfuehrung, Tests, Git |
|
| **Mac Mini** | Server | Docker, alle Services, Tests, Builds, Deployment |
|
||||||
|
|
||||||
**WICHTIG:** Die Entwicklung findet vollstaendig auf dem **Mac Mini** statt!
|
**WICHTIG:** Code wird direkt auf dem MacBook in diesem Repo bearbeitet. Docker und Services laufen auf dem Mac Mini.
|
||||||
|
|
||||||
### SSH-Verbindung
|
### Entwicklungsworkflow
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
ssh macmini
|
# 1. Code auf MacBook bearbeiten (dieses Verzeichnis)
|
||||||
# Projektverzeichnis:
|
# 2. Committen und pushen:
|
||||||
cd /Users/benjaminadmin/Projekte/breakpilot-lehrer
|
git push origin main && git push gitea main
|
||||||
|
|
||||||
# Einzelbefehle (BEVORZUGT):
|
# 3. Auf Mac Mini pullen und Container neu bauen:
|
||||||
|
ssh macmini "cd /Users/benjaminadmin/Projekte/breakpilot-lehrer && git pull --no-rebase origin main"
|
||||||
|
ssh macmini "cd /Users/benjaminadmin/Projekte/breakpilot-lehrer && /usr/local/bin/docker compose build --no-cache <service> && /usr/local/bin/docker compose up -d <service>"
|
||||||
|
```
|
||||||
|
|
||||||
|
### SSH-Verbindung (fuer Docker/Tests)
|
||||||
|
|
||||||
|
```bash
|
||||||
ssh macmini "cd /Users/benjaminadmin/Projekte/breakpilot-lehrer && <cmd>"
|
ssh macmini "cd /Users/benjaminadmin/Projekte/breakpilot-lehrer && <cmd>"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -1,79 +0,0 @@
|
|||||||
# =========================================================
|
|
||||||
# BreakPilot Lehrer — Coolify Environment Variables
|
|
||||||
# =========================================================
|
|
||||||
# Copy these into Coolify's environment variable UI
|
|
||||||
# for the breakpilot-lehrer Docker Compose resource.
|
|
||||||
# =========================================================
|
|
||||||
|
|
||||||
# --- External PostgreSQL (Coolify-managed, same as Core) ---
|
|
||||||
POSTGRES_HOST=<coolify-postgres-hostname>
|
|
||||||
POSTGRES_PORT=5432
|
|
||||||
POSTGRES_USER=breakpilot
|
|
||||||
POSTGRES_PASSWORD=CHANGE_ME_SAME_AS_CORE
|
|
||||||
POSTGRES_DB=breakpilot_db
|
|
||||||
|
|
||||||
# --- Security ---
|
|
||||||
JWT_SECRET=CHANGE_ME_SAME_AS_CORE
|
|
||||||
|
|
||||||
# --- External S3 Storage (same as Core) ---
|
|
||||||
S3_ENDPOINT=<s3-endpoint-host:port>
|
|
||||||
S3_ACCESS_KEY=CHANGE_ME_SAME_AS_CORE
|
|
||||||
S3_SECRET_KEY=CHANGE_ME_SAME_AS_CORE
|
|
||||||
S3_BUCKET=breakpilot-rag
|
|
||||||
S3_SECURE=true
|
|
||||||
|
|
||||||
# --- External Qdrant (Coolify-managed, same as Core) ---
|
|
||||||
QDRANT_URL=http://<coolify-qdrant-hostname>:6333
|
|
||||||
|
|
||||||
# --- Session ---
|
|
||||||
SESSION_TTL_HOURS=24
|
|
||||||
|
|
||||||
# --- SMTP (Real mail server) ---
|
|
||||||
SMTP_HOST=smtp.example.com
|
|
||||||
SMTP_PORT=587
|
|
||||||
SMTP_USERNAME=noreply@breakpilot.ai
|
|
||||||
SMTP_PASSWORD=CHANGE_ME_SMTP_PASSWORD
|
|
||||||
SMTP_FROM_NAME=BreakPilot
|
|
||||||
SMTP_FROM_ADDR=noreply@breakpilot.ai
|
|
||||||
|
|
||||||
# --- LLM / Ollama (optional) ---
|
|
||||||
OLLAMA_BASE_URL=
|
|
||||||
OLLAMA_URL=
|
|
||||||
OLLAMA_ENABLED=false
|
|
||||||
OLLAMA_DEFAULT_MODEL=
|
|
||||||
OLLAMA_VISION_MODEL=
|
|
||||||
OLLAMA_CORRECTION_MODEL=
|
|
||||||
OLLAMA_TIMEOUT=120
|
|
||||||
|
|
||||||
# --- Anthropic (optional) ---
|
|
||||||
ANTHROPIC_API_KEY=
|
|
||||||
|
|
||||||
# --- vast.ai GPU (optional) ---
|
|
||||||
VAST_API_KEY=
|
|
||||||
VAST_INSTANCE_ID=
|
|
||||||
|
|
||||||
# --- Game Settings ---
|
|
||||||
GAME_USE_DATABASE=true
|
|
||||||
GAME_REQUIRE_AUTH=true
|
|
||||||
GAME_REQUIRE_BILLING=true
|
|
||||||
GAME_LLM_MODEL=
|
|
||||||
|
|
||||||
# --- Frontend URLs (build args) ---
|
|
||||||
NEXT_PUBLIC_API_URL=https://api-lehrer.breakpilot.ai
|
|
||||||
NEXT_PUBLIC_KLAUSUR_SERVICE_URL=https://klausur.breakpilot.ai
|
|
||||||
NEXT_PUBLIC_VOICE_SERVICE_URL=wss://voice.breakpilot.ai
|
|
||||||
NEXT_PUBLIC_BILLING_API_URL=https://api-core.breakpilot.ai
|
|
||||||
NEXT_PUBLIC_APP_URL=https://app.breakpilot.ai
|
|
||||||
NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=
|
|
||||||
|
|
||||||
# --- Edu Search ---
|
|
||||||
EDU_SEARCH_URL=
|
|
||||||
EDU_SEARCH_API_KEY=
|
|
||||||
OPENSEARCH_PASSWORD=CHANGE_ME_OPENSEARCH_PASSWORD
|
|
||||||
|
|
||||||
# --- Misc ---
|
|
||||||
CONTROL_API_KEY=
|
|
||||||
ALERTS_AGENT_ENABLED=false
|
|
||||||
PADDLEOCR_SERVICE_URL=
|
|
||||||
TROCR_SERVICE_URL=
|
|
||||||
CAMUNDA_URL=
|
|
||||||
@@ -1,32 +0,0 @@
|
|||||||
name: Deploy to Coolify
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- coolify
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
deploy:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Wait for Core deployment
|
|
||||||
run: |
|
|
||||||
echo "Waiting 30s for Core services to stabilize..."
|
|
||||||
sleep 30
|
|
||||||
|
|
||||||
- name: Deploy via Coolify API
|
|
||||||
run: |
|
|
||||||
echo "Deploying breakpilot-lehrer to Coolify..."
|
|
||||||
HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" \
|
|
||||||
-X POST \
|
|
||||||
-H "Authorization: Bearer ${{ secrets.COOLIFY_API_TOKEN }}" \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{"uuid": "${{ secrets.COOLIFY_RESOURCE_UUID }}", "force_rebuild": true}' \
|
|
||||||
"${{ secrets.COOLIFY_BASE_URL }}/api/v1/deploy")
|
|
||||||
|
|
||||||
echo "HTTP Status: $HTTP_STATUS"
|
|
||||||
if [ "$HTTP_STATUS" -ne 200 ] && [ "$HTTP_STATUS" -ne 201 ]; then
|
|
||||||
echo "Deployment failed with status $HTTP_STATUS"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo "Deployment triggered successfully!"
|
|
||||||
@@ -34,8 +34,8 @@ WORKDIR /app
|
|||||||
ENV NODE_ENV=production
|
ENV NODE_ENV=production
|
||||||
|
|
||||||
# Create non-root user
|
# Create non-root user
|
||||||
RUN addgroup -S -g 1001 nodejs
|
RUN addgroup --system --gid 1001 nodejs
|
||||||
RUN adduser -S -u 1001 -G nodejs nextjs
|
RUN adduser --system --uid 1001 nextjs
|
||||||
|
|
||||||
# Copy built assets
|
# Copy built assets
|
||||||
COPY --from=builder /app/public ./public
|
COPY --from=builder /app/public ./public
|
||||||
|
|||||||
285
admin-lehrer/app/(admin)/ai/ocr-pipeline/page.tsx
Normal file
285
admin-lehrer/app/(admin)/ai/ocr-pipeline/page.tsx
Normal file
@@ -0,0 +1,285 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { useCallback, useEffect, useState } from 'react'
|
||||||
|
import { PagePurpose } from '@/components/common/PagePurpose'
|
||||||
|
import { PipelineStepper } from '@/components/ocr-pipeline/PipelineStepper'
|
||||||
|
import { StepDeskew } from '@/components/ocr-pipeline/StepDeskew'
|
||||||
|
import { StepDewarp } from '@/components/ocr-pipeline/StepDewarp'
|
||||||
|
import { StepColumnDetection } from '@/components/ocr-pipeline/StepColumnDetection'
|
||||||
|
import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition'
|
||||||
|
import { StepCoordinates } from '@/components/ocr-pipeline/StepCoordinates'
|
||||||
|
import { StepReconstruction } from '@/components/ocr-pipeline/StepReconstruction'
|
||||||
|
import { StepGroundTruth } from '@/components/ocr-pipeline/StepGroundTruth'
|
||||||
|
import { PIPELINE_STEPS, type PipelineStep, type SessionListItem } from './types'
|
||||||
|
|
||||||
|
const KLAUSUR_API = '/klausur-api'
|
||||||
|
|
||||||
|
export default function OcrPipelinePage() {
|
||||||
|
const [currentStep, setCurrentStep] = useState(0)
|
||||||
|
const [sessionId, setSessionId] = useState<string | null>(null)
|
||||||
|
const [sessionName, setSessionName] = useState<string>('')
|
||||||
|
const [sessions, setSessions] = useState<SessionListItem[]>([])
|
||||||
|
const [loadingSessions, setLoadingSessions] = useState(true)
|
||||||
|
const [editingName, setEditingName] = useState<string | null>(null)
|
||||||
|
const [editNameValue, setEditNameValue] = useState('')
|
||||||
|
const [steps, setSteps] = useState<PipelineStep[]>(
|
||||||
|
PIPELINE_STEPS.map((s, i) => ({
|
||||||
|
...s,
|
||||||
|
status: i === 0 ? 'active' : 'pending',
|
||||||
|
})),
|
||||||
|
)
|
||||||
|
|
||||||
|
// Load session list on mount
|
||||||
|
useEffect(() => {
|
||||||
|
loadSessions()
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
const loadSessions = async () => {
|
||||||
|
setLoadingSessions(true)
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`)
|
||||||
|
if (res.ok) {
|
||||||
|
const data = await res.json()
|
||||||
|
setSessions(data.sessions || [])
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to load sessions:', e)
|
||||||
|
} finally {
|
||||||
|
setLoadingSessions(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const openSession = useCallback(async (sid: string) => {
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`)
|
||||||
|
if (!res.ok) return
|
||||||
|
const data = await res.json()
|
||||||
|
|
||||||
|
setSessionId(sid)
|
||||||
|
setSessionName(data.name || data.filename || '')
|
||||||
|
|
||||||
|
// Determine which step to jump to based on current_step
|
||||||
|
const dbStep = data.current_step || 1
|
||||||
|
// Steps: 1=deskew, 2=dewarp, 3=columns, ...
|
||||||
|
// UI steps are 0-indexed: 0=deskew, 1=dewarp, 2=columns, ...
|
||||||
|
const uiStep = Math.max(0, dbStep - 1)
|
||||||
|
|
||||||
|
setSteps(
|
||||||
|
PIPELINE_STEPS.map((s, i) => ({
|
||||||
|
...s,
|
||||||
|
status: i < uiStep ? 'completed' : i === uiStep ? 'active' : 'pending',
|
||||||
|
})),
|
||||||
|
)
|
||||||
|
setCurrentStep(uiStep)
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to open session:', e)
|
||||||
|
}
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
const deleteSession = useCallback(async (sid: string) => {
|
||||||
|
try {
|
||||||
|
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, { method: 'DELETE' })
|
||||||
|
setSessions((prev) => prev.filter((s) => s.id !== sid))
|
||||||
|
if (sessionId === sid) {
|
||||||
|
setSessionId(null)
|
||||||
|
setCurrentStep(0)
|
||||||
|
setSteps(PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to delete session:', e)
|
||||||
|
}
|
||||||
|
}, [sessionId])
|
||||||
|
|
||||||
|
const renameSession = useCallback(async (sid: string, newName: string) => {
|
||||||
|
try {
|
||||||
|
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, {
|
||||||
|
method: 'PUT',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ name: newName }),
|
||||||
|
})
|
||||||
|
setSessions((prev) => prev.map((s) => (s.id === sid ? { ...s, name: newName } : s)))
|
||||||
|
if (sessionId === sid) setSessionName(newName)
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to rename session:', e)
|
||||||
|
}
|
||||||
|
setEditingName(null)
|
||||||
|
}, [sessionId])
|
||||||
|
|
||||||
|
const handleStepClick = (index: number) => {
|
||||||
|
if (index <= currentStep || steps[index].status === 'completed') {
|
||||||
|
setCurrentStep(index)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleNext = () => {
|
||||||
|
if (currentStep < steps.length - 1) {
|
||||||
|
setSteps((prev) =>
|
||||||
|
prev.map((s, i) => {
|
||||||
|
if (i === currentStep) return { ...s, status: 'completed' }
|
||||||
|
if (i === currentStep + 1) return { ...s, status: 'active' }
|
||||||
|
return s
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
setCurrentStep((prev) => prev + 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleDeskewComplete = (sid: string) => {
|
||||||
|
setSessionId(sid)
|
||||||
|
// Reload session list to show the new session
|
||||||
|
loadSessions()
|
||||||
|
handleNext()
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleNewSession = () => {
|
||||||
|
setSessionId(null)
|
||||||
|
setSessionName('')
|
||||||
|
setCurrentStep(0)
|
||||||
|
setSteps(PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||||
|
}
|
||||||
|
|
||||||
|
const stepNames: Record<number, string> = {
|
||||||
|
1: 'Begradigung',
|
||||||
|
2: 'Entzerrung',
|
||||||
|
3: 'Spalten',
|
||||||
|
4: 'Woerter',
|
||||||
|
5: 'Koordinaten',
|
||||||
|
6: 'Rekonstruktion',
|
||||||
|
7: 'Validierung',
|
||||||
|
}
|
||||||
|
|
||||||
|
const renderStep = () => {
|
||||||
|
switch (currentStep) {
|
||||||
|
case 0:
|
||||||
|
return <StepDeskew sessionId={sessionId} onNext={handleDeskewComplete} />
|
||||||
|
case 1:
|
||||||
|
return <StepDewarp sessionId={sessionId} onNext={handleNext} />
|
||||||
|
case 2:
|
||||||
|
return <StepColumnDetection sessionId={sessionId} onNext={handleNext} />
|
||||||
|
case 3:
|
||||||
|
return <StepWordRecognition />
|
||||||
|
case 4:
|
||||||
|
return <StepCoordinates />
|
||||||
|
case 5:
|
||||||
|
return <StepReconstruction />
|
||||||
|
case 6:
|
||||||
|
return <StepGroundTruth />
|
||||||
|
default:
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="space-y-6">
|
||||||
|
<PagePurpose
|
||||||
|
title="OCR Pipeline"
|
||||||
|
purpose="Schrittweise Seitenrekonstruktion: Scan begradigen, Spalten erkennen, Woerter lokalisieren und die Seite Wort fuer Wort nachbauen. Ziel: 10 Vokabelseiten fehlerfrei rekonstruieren."
|
||||||
|
audience={['Entwickler', 'Data Scientists']}
|
||||||
|
architecture={{
|
||||||
|
services: ['klausur-service (FastAPI)', 'OpenCV', 'Tesseract'],
|
||||||
|
databases: ['PostgreSQL Sessions'],
|
||||||
|
}}
|
||||||
|
relatedPages={[
|
||||||
|
{ name: 'OCR Vergleich', href: '/ai/ocr-compare', description: 'Methoden-Vergleich' },
|
||||||
|
{ name: 'OCR-Labeling', href: '/ai/ocr-labeling', description: 'Trainingsdaten' },
|
||||||
|
]}
|
||||||
|
defaultCollapsed
|
||||||
|
/>
|
||||||
|
|
||||||
|
{/* Session List */}
|
||||||
|
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4">
|
||||||
|
<div className="flex items-center justify-between mb-3">
|
||||||
|
<h3 className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||||
|
Sessions
|
||||||
|
</h3>
|
||||||
|
<button
|
||||||
|
onClick={handleNewSession}
|
||||||
|
className="text-xs px-3 py-1.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors"
|
||||||
|
>
|
||||||
|
+ Neue Session
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{loadingSessions ? (
|
||||||
|
<div className="text-sm text-gray-400 py-2">Lade Sessions...</div>
|
||||||
|
) : sessions.length === 0 ? (
|
||||||
|
<div className="text-sm text-gray-400 py-2">Noch keine Sessions vorhanden.</div>
|
||||||
|
) : (
|
||||||
|
<div className="space-y-1 max-h-48 overflow-y-auto">
|
||||||
|
{sessions.map((s) => (
|
||||||
|
<div
|
||||||
|
key={s.id}
|
||||||
|
className={`flex items-center gap-2 px-3 py-2 rounded-lg text-sm transition-colors cursor-pointer ${
|
||||||
|
sessionId === s.id
|
||||||
|
? 'bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700'
|
||||||
|
: 'hover:bg-gray-50 dark:hover:bg-gray-700/50'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
<div className="flex-1 min-w-0" onClick={() => openSession(s.id)}>
|
||||||
|
{editingName === s.id ? (
|
||||||
|
<input
|
||||||
|
autoFocus
|
||||||
|
value={editNameValue}
|
||||||
|
onChange={(e) => setEditNameValue(e.target.value)}
|
||||||
|
onBlur={() => renameSession(s.id, editNameValue)}
|
||||||
|
onKeyDown={(e) => {
|
||||||
|
if (e.key === 'Enter') renameSession(s.id, editNameValue)
|
||||||
|
if (e.key === 'Escape') setEditingName(null)
|
||||||
|
}}
|
||||||
|
onClick={(e) => e.stopPropagation()}
|
||||||
|
className="w-full px-1 py-0.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600"
|
||||||
|
/>
|
||||||
|
) : (
|
||||||
|
<div className="truncate font-medium text-gray-700 dark:text-gray-300">
|
||||||
|
{s.name || s.filename}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
<div className="text-xs text-gray-400 flex gap-2">
|
||||||
|
<span>{new Date(s.created_at).toLocaleDateString('de-DE', { day: '2-digit', month: '2-digit', year: '2-digit', hour: '2-digit', minute: '2-digit' })}</span>
|
||||||
|
<span>Schritt {s.current_step}: {stepNames[s.current_step] || '?'}</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<button
|
||||||
|
onClick={(e) => {
|
||||||
|
e.stopPropagation()
|
||||||
|
setEditNameValue(s.name || s.filename)
|
||||||
|
setEditingName(s.id)
|
||||||
|
}}
|
||||||
|
className="p-1 text-gray-400 hover:text-gray-600 dark:hover:text-gray-300"
|
||||||
|
title="Umbenennen"
|
||||||
|
>
|
||||||
|
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" d="M15.232 5.232l3.536 3.536m-2.036-5.036a2.5 2.5 0 113.536 3.536L6.5 21.036H3v-3.572L16.732 3.732z" />
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={(e) => {
|
||||||
|
e.stopPropagation()
|
||||||
|
if (confirm('Session loeschen?')) deleteSession(s.id)
|
||||||
|
}}
|
||||||
|
className="p-1 text-gray-400 hover:text-red-500"
|
||||||
|
title="Loeschen"
|
||||||
|
>
|
||||||
|
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16" />
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Active session name */}
|
||||||
|
{sessionId && sessionName && (
|
||||||
|
<div className="text-sm text-gray-500 dark:text-gray-400">
|
||||||
|
Aktive Session: <span className="font-medium text-gray-700 dark:text-gray-300">{sessionName}</span>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
<PipelineStepper steps={steps} currentStep={currentStep} onStepClick={handleStepClick} />
|
||||||
|
|
||||||
|
<div className="min-h-[400px]">{renderStep()}</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
102
admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts
Normal file
102
admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
export type PipelineStepStatus = 'pending' | 'active' | 'completed' | 'failed'
|
||||||
|
|
||||||
|
export interface PipelineStep {
|
||||||
|
id: string
|
||||||
|
name: string
|
||||||
|
icon: string
|
||||||
|
status: PipelineStepStatus
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface SessionListItem {
|
||||||
|
id: string
|
||||||
|
name: string
|
||||||
|
filename: string
|
||||||
|
status: string
|
||||||
|
current_step: number
|
||||||
|
created_at: string
|
||||||
|
updated_at?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface SessionInfo {
|
||||||
|
session_id: string
|
||||||
|
filename: string
|
||||||
|
name?: string
|
||||||
|
image_width: number
|
||||||
|
image_height: number
|
||||||
|
original_image_url: string
|
||||||
|
current_step?: number
|
||||||
|
deskew_result?: DeskewResult
|
||||||
|
dewarp_result?: DewarpResult
|
||||||
|
column_result?: ColumnResult
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface DeskewResult {
|
||||||
|
session_id: string
|
||||||
|
angle_hough: number
|
||||||
|
angle_word_alignment: number
|
||||||
|
angle_applied: number
|
||||||
|
method_used: 'hough' | 'word_alignment' | 'manual'
|
||||||
|
confidence: number
|
||||||
|
duration_seconds: number
|
||||||
|
deskewed_image_url: string
|
||||||
|
binarized_image_url: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface DeskewGroundTruth {
|
||||||
|
is_correct: boolean
|
||||||
|
corrected_angle?: number
|
||||||
|
notes?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface DewarpResult {
|
||||||
|
session_id: string
|
||||||
|
method_used: 'vertical_edge' | 'manual' | 'none'
|
||||||
|
shear_degrees: number
|
||||||
|
confidence: number
|
||||||
|
duration_seconds: number
|
||||||
|
dewarped_image_url: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface DewarpGroundTruth {
|
||||||
|
is_correct: boolean
|
||||||
|
corrected_shear?: number
|
||||||
|
notes?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface PageRegion {
|
||||||
|
type: 'column_en' | 'column_de' | 'column_example' | 'page_ref'
|
||||||
|
| 'column_marker' | 'column_text' | 'column_ignore' | 'header' | 'footer'
|
||||||
|
x: number
|
||||||
|
y: number
|
||||||
|
width: number
|
||||||
|
height: number
|
||||||
|
classification_confidence?: number
|
||||||
|
classification_method?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ColumnResult {
|
||||||
|
columns: PageRegion[]
|
||||||
|
duration_seconds: number
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ColumnGroundTruth {
|
||||||
|
is_correct: boolean
|
||||||
|
corrected_columns?: PageRegion[]
|
||||||
|
notes?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ManualColumnDivider {
|
||||||
|
xPercent: number // Position in % of image width (0-100)
|
||||||
|
}
|
||||||
|
|
||||||
|
export type ColumnTypeKey = PageRegion['type']
|
||||||
|
|
||||||
|
export const PIPELINE_STEPS: PipelineStep[] = [
|
||||||
|
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
|
||||||
|
{ id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
|
||||||
|
{ id: 'columns', name: 'Spalten', icon: '📊', status: 'pending' },
|
||||||
|
{ id: 'words', name: 'Woerter', icon: '🔤', status: 'pending' },
|
||||||
|
{ id: 'coordinates', name: 'Koordinaten', icon: '📍', status: 'pending' },
|
||||||
|
{ id: 'reconstruction', name: 'Rekonstruktion', icon: '🏗️', status: 'pending' },
|
||||||
|
{ id: 'ground-truth', name: 'Validierung', icon: '✅', status: 'pending' },
|
||||||
|
]
|
||||||
@@ -1011,6 +1011,53 @@ const REGULATIONS = [
|
|||||||
keyTopics: ['Bussgeldberechnung', 'Schweregrad', 'Milderungsgruende', 'Bussgeldrahmen'],
|
keyTopics: ['Bussgeldberechnung', 'Schweregrad', 'Milderungsgruende', 'Bussgeldrahmen'],
|
||||||
effectiveDate: '2022'
|
effectiveDate: '2022'
|
||||||
},
|
},
|
||||||
|
// =====================================================================
|
||||||
|
// Neu ingestierte EU-Richtlinien (Februar 2026)
|
||||||
|
// =====================================================================
|
||||||
|
{
|
||||||
|
code: 'E_COMMERCE_RL',
|
||||||
|
name: 'E-Commerce-Richtlinie',
|
||||||
|
fullName: 'Richtlinie 2000/31/EG ueber den elektronischen Geschaeftsverkehr',
|
||||||
|
type: 'eu_directive',
|
||||||
|
expected: 30,
|
||||||
|
description: 'EU-Richtlinie ueber den elektronischen Geschaeftsverkehr (E-Commerce). Regelt Herkunftslandprinzip, Informationspflichten, Haftungsprivilegien fuer Vermittler (Mere Conduit, Caching, Hosting).',
|
||||||
|
relevantFor: ['Online-Dienste', 'E-Commerce', 'Hosting-Anbieter', 'Plattformen'],
|
||||||
|
keyTopics: ['Herkunftslandprinzip', 'Haftungsprivileg', 'Informationspflichten', 'Spam-Verbot', 'Vermittlerhaftung'],
|
||||||
|
effectiveDate: '17. Juli 2000'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
code: 'VERBRAUCHERRECHTE_RL',
|
||||||
|
name: 'Verbraucherrechte-Richtlinie',
|
||||||
|
fullName: 'Richtlinie 2011/83/EU ueber die Rechte der Verbraucher',
|
||||||
|
type: 'eu_directive',
|
||||||
|
expected: 25,
|
||||||
|
description: 'EU-weite Harmonisierung der Verbraucherrechte bei Fernabsatz und aussergeschaeftlichen Vertraegen. 14-Tage-Widerrufsrecht, Informationspflichten, digitale Inhalte.',
|
||||||
|
relevantFor: ['Online-Shops', 'E-Commerce', 'Fernabsatz', 'Dienstleister'],
|
||||||
|
keyTopics: ['Widerrufsrecht 14 Tage', 'Informationspflichten', 'Fernabsatzvertraege', 'Digitale Inhalte'],
|
||||||
|
effectiveDate: '13. Juni 2014'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
code: 'DIGITALE_INHALTE_RL',
|
||||||
|
name: 'Digitale-Inhalte-Richtlinie',
|
||||||
|
fullName: 'Richtlinie (EU) 2019/770 ueber digitale Inhalte und Dienstleistungen',
|
||||||
|
type: 'eu_directive',
|
||||||
|
expected: 20,
|
||||||
|
description: 'Gewaehrleistungsrecht fuer digitale Inhalte und Dienstleistungen. Regelt Maengelhaftung, Updates, Vertragsmaessigkeit und Kuendigungsrechte bei digitalen Produkten.',
|
||||||
|
relevantFor: ['SaaS-Anbieter', 'App-Entwickler', 'Cloud-Dienste', 'Streaming-Anbieter', 'Software-Hersteller'],
|
||||||
|
keyTopics: ['Digitale Gewaehrleistung', 'Update-Pflicht', 'Vertragsmaessigkeit', 'Kuendigungsrecht', 'Datenportabilitaet'],
|
||||||
|
effectiveDate: '1. Januar 2022'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
code: 'DMA',
|
||||||
|
name: 'Digital Markets Act',
|
||||||
|
fullName: 'Verordnung (EU) 2022/1925 - Digital Markets Act',
|
||||||
|
type: 'eu_regulation',
|
||||||
|
expected: 50,
|
||||||
|
description: 'Reguliert digitale Gatekeeper-Plattformen. Stellt Verhaltensregeln fuer grosse Plattformen auf (Apple, Google, Meta, Amazon, Microsoft). Verbietet Selbstbevorzugung und erzwingt Interoperabilitaet.',
|
||||||
|
relevantFor: ['Grosse Plattformen', 'App-Stores', 'Suchmaschinen', 'Social Media', 'Messenger-Dienste'],
|
||||||
|
keyTopics: ['Gatekeeper-Pflichten', 'Interoperabilitaet', 'Selbstbevorzugung', 'App-Store-Regeln', 'Datenportabilitaet'],
|
||||||
|
effectiveDate: '2. Mai 2023'
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
// License info for each regulation
|
// License info for each regulation
|
||||||
@@ -1099,6 +1146,127 @@ const REGULATION_LICENSES: Record<string, { license: string; licenseNote: string
|
|||||||
LU_DPA_LAW: { license: 'PUBLIC_DOMAIN', licenseNote: 'Amtliches Werk Luxemburg — frei verwendbar' },
|
LU_DPA_LAW: { license: 'PUBLIC_DOMAIN', licenseNote: 'Amtliches Werk Luxemburg — frei verwendbar' },
|
||||||
DK_DATABESKYTTELSESLOVEN: { license: 'PUBLIC_DOMAIN', licenseNote: 'Amtliches Werk Daenemark — frei verwendbar' },
|
DK_DATABESKYTTELSESLOVEN: { license: 'PUBLIC_DOMAIN', licenseNote: 'Amtliches Werk Daenemark — frei verwendbar' },
|
||||||
EDPB_GUIDELINES_1_2022: { license: 'EDPB-LICENSE', licenseNote: 'EDPB Document License' },
|
EDPB_GUIDELINES_1_2022: { license: 'EDPB-LICENSE', licenseNote: 'EDPB Document License' },
|
||||||
|
// Neue EU-Richtlinien (Februar 2026 ingestiert)
|
||||||
|
E_COMMERCE_RL: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Richtlinie — amtliches Werk' },
|
||||||
|
VERBRAUCHERRECHTE_RL: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Richtlinie — amtliches Werk' },
|
||||||
|
DIGITALE_INHALTE_RL: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Richtlinie — amtliches Werk' },
|
||||||
|
DMA: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Verordnung — amtliches Werk' },
|
||||||
|
}
|
||||||
|
|
||||||
|
// Regulations that are currently ingested in RAG (Qdrant collections)
|
||||||
|
// Updated: 2026-02-27 — Aktualisieren wenn neue Dokumente ingestiert werden!
|
||||||
|
const REGULATIONS_IN_RAG: Record<string, { collection: string; chunks: number }> = {
|
||||||
|
// EU Verordnungen/Richtlinien (bp_compliance_ce: 7.341 total)
|
||||||
|
GDPR: { collection: 'bp_compliance_ce', chunks: 1842 },
|
||||||
|
EPRIVACY: { collection: 'bp_compliance_ce', chunks: 156 },
|
||||||
|
SCC: { collection: 'bp_compliance_ce', chunks: 89 },
|
||||||
|
SCC_FULL_TEXT: { collection: 'bp_compliance_ce', chunks: 154 },
|
||||||
|
AIACT: { collection: 'bp_compliance_ce', chunks: 1245 },
|
||||||
|
CRA: { collection: 'bp_compliance_ce', chunks: 687 },
|
||||||
|
NIS2: { collection: 'bp_compliance_ce', chunks: 534 },
|
||||||
|
DGA: { collection: 'bp_compliance_ce', chunks: 312 },
|
||||||
|
DSA: { collection: 'bp_compliance_ce', chunks: 978 },
|
||||||
|
PLD: { collection: 'bp_compliance_ce', chunks: 124 },
|
||||||
|
E_COMMERCE_RL: { collection: 'bp_compliance_ce', chunks: 198 },
|
||||||
|
VERBRAUCHERRECHTE_RL: { collection: 'bp_compliance_ce', chunks: 245 },
|
||||||
|
DIGITALE_INHALTE_RL: { collection: 'bp_compliance_ce', chunks: 187 },
|
||||||
|
DMA: { collection: 'bp_compliance_ce', chunks: 590 },
|
||||||
|
// DE Gesetze (bp_compliance_gesetze: 33.929 total)
|
||||||
|
TDDDG: { collection: 'bp_compliance_gesetze', chunks: 215 },
|
||||||
|
BDSG_FULL: { collection: 'bp_compliance_gesetze', chunks: 487 },
|
||||||
|
DE_DDG: { collection: 'bp_compliance_gesetze', chunks: 198 },
|
||||||
|
DE_BGB_AGB: { collection: 'bp_compliance_gesetze', chunks: 4250 },
|
||||||
|
DE_EGBGB: { collection: 'bp_compliance_gesetze', chunks: 312 },
|
||||||
|
DE_HGB_RET: { collection: 'bp_compliance_gesetze', chunks: 6840 },
|
||||||
|
DE_AO_RET: { collection: 'bp_compliance_gesetze', chunks: 5620 },
|
||||||
|
// BSI Standards (bp_compliance_gesetze)
|
||||||
|
'BSI-TR-03161-1': { collection: 'bp_compliance_gesetze', chunks: 425 },
|
||||||
|
'BSI-TR-03161-2': { collection: 'bp_compliance_gesetze', chunks: 380 },
|
||||||
|
'BSI-TR-03161-3': { collection: 'bp_compliance_gesetze', chunks: 345 },
|
||||||
|
// Nationale Datenschutzgesetze (bp_compliance_gesetze)
|
||||||
|
AT_DSG: { collection: 'bp_compliance_gesetze', chunks: 287 },
|
||||||
|
CH_DSG: { collection: 'bp_compliance_gesetze', chunks: 156 },
|
||||||
|
ES_LOPDGDD: { collection: 'bp_compliance_gesetze', chunks: 1245 },
|
||||||
|
IT_CODICE_PRIVACY: { collection: 'bp_compliance_gesetze', chunks: 198 },
|
||||||
|
NL_UAVG: { collection: 'bp_compliance_gesetze', chunks: 1320 },
|
||||||
|
FR_CNIL_GUIDE: { collection: 'bp_compliance_gesetze', chunks: 1450 },
|
||||||
|
IE_DPA_2018: { collection: 'bp_compliance_gesetze', chunks: 534 },
|
||||||
|
UK_DPA_2018: { collection: 'bp_compliance_gesetze', chunks: 1680 },
|
||||||
|
UK_GDPR: { collection: 'bp_compliance_gesetze', chunks: 890 },
|
||||||
|
NO_PERSONOPPLYSNINGSLOVEN: { collection: 'bp_compliance_gesetze', chunks: 245 },
|
||||||
|
SE_DATASKYDDSLAG: { collection: 'bp_compliance_gesetze', chunks: 167 },
|
||||||
|
PL_UODO: { collection: 'bp_compliance_gesetze', chunks: 198 },
|
||||||
|
CZ_ZOU: { collection: 'bp_compliance_gesetze', chunks: 1120 },
|
||||||
|
HU_INFOTV: { collection: 'bp_compliance_gesetze', chunks: 1345 },
|
||||||
|
// EDPB Guidelines (bp_compliance_datenschutz)
|
||||||
|
EDPB_GUIDELINES_5_2020: { collection: 'bp_compliance_datenschutz', chunks: 245 },
|
||||||
|
EDPB_GUIDELINES_7_2020: { collection: 'bp_compliance_datenschutz', chunks: 347 },
|
||||||
|
// === Neue Regulierungen (2026-02-28) ===
|
||||||
|
// EU CE-Regulierungen (bp_compliance_ce)
|
||||||
|
DPF: { collection: 'bp_compliance_ce', chunks: 1232 },
|
||||||
|
EUCSA: { collection: 'bp_compliance_ce', chunks: 558 },
|
||||||
|
DATAACT: { collection: 'bp_compliance_ce', chunks: 809 },
|
||||||
|
DORA: { collection: 'bp_compliance_ce', chunks: 823 },
|
||||||
|
PSD2: { collection: 'bp_compliance_ce', chunks: 796 },
|
||||||
|
AMLR: { collection: 'bp_compliance_ce', chunks: 1182 },
|
||||||
|
MiCA: { collection: 'bp_compliance_ce', chunks: 1640 },
|
||||||
|
EHDS: { collection: 'bp_compliance_ce', chunks: 1212 },
|
||||||
|
EAA: { collection: 'bp_compliance_ce', chunks: 433 },
|
||||||
|
DSM: { collection: 'bp_compliance_ce', chunks: 416 },
|
||||||
|
GPSR: { collection: 'bp_compliance_ce', chunks: 509 },
|
||||||
|
// DE Gesetze (bp_compliance_gesetze)
|
||||||
|
DE_UWG: { collection: 'bp_compliance_gesetze', chunks: 1 },
|
||||||
|
DE_TKG: { collection: 'bp_compliance_gesetze', chunks: 1631 },
|
||||||
|
DE_PANGV: { collection: 'bp_compliance_gesetze', chunks: 1 },
|
||||||
|
DE_DLINFOV: { collection: 'bp_compliance_gesetze', chunks: 21 },
|
||||||
|
DE_BETRVG: { collection: 'bp_compliance_gesetze', chunks: 498 },
|
||||||
|
DE_GESCHGEHG: { collection: 'bp_compliance_gesetze', chunks: 63 },
|
||||||
|
DE_BSIG: { collection: 'bp_compliance_gesetze', chunks: 1 },
|
||||||
|
DE_USTG_RET: { collection: 'bp_compliance_gesetze', chunks: 1071 },
|
||||||
|
// AT Gesetze (bp_compliance_gesetze)
|
||||||
|
AT_DSG_FULL: { collection: 'bp_compliance_gesetze', chunks: 6 },
|
||||||
|
LI_DSG: { collection: 'bp_compliance_gesetze', chunks: 2 },
|
||||||
|
AT_ECG: { collection: 'bp_compliance_gesetze', chunks: 120 },
|
||||||
|
AT_TKG: { collection: 'bp_compliance_gesetze', chunks: 2174 },
|
||||||
|
AT_KSCHG: { collection: 'bp_compliance_gesetze', chunks: 402 },
|
||||||
|
AT_FAGG: { collection: 'bp_compliance_gesetze', chunks: 2 },
|
||||||
|
AT_UGB_RET: { collection: 'bp_compliance_gesetze', chunks: 2828 },
|
||||||
|
AT_BAO_RET: { collection: 'bp_compliance_gesetze', chunks: 2246 },
|
||||||
|
AT_MEDIENG: { collection: 'bp_compliance_gesetze', chunks: 571 },
|
||||||
|
AT_ABGB_AGB: { collection: 'bp_compliance_gesetze', chunks: 2521 },
|
||||||
|
AT_UWG: { collection: 'bp_compliance_gesetze', chunks: 403 },
|
||||||
|
// CH Gesetze (bp_compliance_gesetze)
|
||||||
|
CH_DSV: { collection: 'bp_compliance_gesetze', chunks: 5 },
|
||||||
|
CH_OR_AGB: { collection: 'bp_compliance_gesetze', chunks: 5 },
|
||||||
|
CH_UWG: { collection: 'bp_compliance_gesetze', chunks: 5 },
|
||||||
|
CH_FMG: { collection: 'bp_compliance_gesetze', chunks: 5 },
|
||||||
|
CH_GEBUV: { collection: 'bp_compliance_gesetze', chunks: 5 },
|
||||||
|
CH_ZERTES: { collection: 'bp_compliance_gesetze', chunks: 5 },
|
||||||
|
CH_ZGB_PERS: { collection: 'bp_compliance_gesetze', chunks: 5 },
|
||||||
|
// Weitere EU-Laender (bp_compliance_gesetze)
|
||||||
|
BE_DPA_LAW: { collection: 'bp_compliance_gesetze', chunks: 3 },
|
||||||
|
FI_TIETOSUOJALAKI: { collection: 'bp_compliance_gesetze', chunks: 2 },
|
||||||
|
DK_DATABESKYTTELSESLOVEN: { collection: 'bp_compliance_gesetze', chunks: 2 },
|
||||||
|
LU_DPA_LAW: { collection: 'bp_compliance_gesetze', chunks: 2 },
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper: Check if regulation is in RAG
|
||||||
|
const isInRag = (code: string): boolean => code in REGULATIONS_IN_RAG
|
||||||
|
|
||||||
|
// Helper: Get known chunk count for a regulation
|
||||||
|
const getKnownChunks = (code: string): number => REGULATIONS_IN_RAG[code]?.chunks || 0
|
||||||
|
|
||||||
|
// Known collection totals (updated: 2026-02-28)
|
||||||
|
const COLLECTION_TOTALS = {
|
||||||
|
bp_compliance_gesetze: 58304,
|
||||||
|
bp_compliance_ce: 18183,
|
||||||
|
bp_legal_templates: 7689,
|
||||||
|
bp_compliance_datenschutz: 2448,
|
||||||
|
bp_dsfa_corpus: 7867,
|
||||||
|
bp_compliance_recht: 1425,
|
||||||
|
bp_nibis_eh: 7996,
|
||||||
|
total_legal: 76487, // gesetze + ce
|
||||||
|
total_all: 103912,
|
||||||
}
|
}
|
||||||
|
|
||||||
// License display labels
|
// License display labels
|
||||||
@@ -1804,7 +1972,7 @@ export default function RAGPage() {
|
|||||||
{/* Page Purpose */}
|
{/* Page Purpose */}
|
||||||
<PagePurpose
|
<PagePurpose
|
||||||
title="Daten & RAG"
|
title="Daten & RAG"
|
||||||
purpose="Verwalten und durchsuchen Sie 4 RAG-Collections: Legal Corpus (24 Regulierungen), DSFA Corpus (70+ Quellen inkl. internationaler Datenschutzgesetze), NiBiS EH (Bildungsinhalte) und Legal Templates (Dokumentvorlagen). Teil der KI-Daten-Pipeline fuer Compliance und Klausur-Korrektur."
|
purpose={`Verwalten und durchsuchen Sie 7 RAG-Collections mit ${REGULATIONS.length} Regulierungen (${Object.keys(REGULATIONS_IN_RAG).length} im RAG). Legal Corpus, DSFA Corpus (70+ Quellen), NiBiS EH (Bildungsinhalte) und Legal Templates. Teil der KI-Daten-Pipeline fuer Compliance und Klausur-Korrektur.`}
|
||||||
audience={['DSB', 'Compliance Officer', 'Entwickler']}
|
audience={['DSB', 'Compliance Officer', 'Entwickler']}
|
||||||
gdprArticles={['§5 UrhG (Amtliche Werke)', 'Art. 5 DSGVO (Rechenschaftspflicht)']}
|
gdprArticles={['§5 UrhG (Amtliche Werke)', 'Art. 5 DSGVO (Rechenschaftspflicht)']}
|
||||||
architecture={{
|
architecture={{
|
||||||
@@ -1826,8 +1994,8 @@ export default function RAGPage() {
|
|||||||
<div className="grid grid-cols-2 md:grid-cols-4 gap-4 mb-6">
|
<div className="grid grid-cols-2 md:grid-cols-4 gap-4 mb-6">
|
||||||
<div className="bg-white rounded-xl p-4 border border-slate-200">
|
<div className="bg-white rounded-xl p-4 border border-slate-200">
|
||||||
<p className="text-xs font-medium text-blue-600 uppercase mb-1">Legal Corpus</p>
|
<p className="text-xs font-medium text-blue-600 uppercase mb-1">Legal Corpus</p>
|
||||||
<p className="text-2xl font-bold text-slate-900">{loading ? '-' : getTotalChunks().toLocaleString()}</p>
|
<p className="text-2xl font-bold text-slate-900">{COLLECTION_TOTALS.total_legal.toLocaleString()}</p>
|
||||||
<p className="text-xs text-slate-500">Chunks · {REGULATIONS.length} Regulierungen</p>
|
<p className="text-xs text-slate-500">Chunks · {Object.keys(REGULATIONS_IN_RAG).length}/{REGULATIONS.length} im RAG</p>
|
||||||
</div>
|
</div>
|
||||||
<div className="bg-white rounded-xl p-4 border border-slate-200">
|
<div className="bg-white rounded-xl p-4 border border-slate-200">
|
||||||
<p className="text-xs font-medium text-purple-600 uppercase mb-1">DSFA Corpus</p>
|
<p className="text-xs font-medium text-purple-600 uppercase mb-1">DSFA Corpus</p>
|
||||||
@@ -1836,12 +2004,12 @@ export default function RAGPage() {
|
|||||||
</div>
|
</div>
|
||||||
<div className="bg-white rounded-xl p-4 border border-slate-200">
|
<div className="bg-white rounded-xl p-4 border border-slate-200">
|
||||||
<p className="text-xs font-medium text-emerald-600 uppercase mb-1">NiBiS EH</p>
|
<p className="text-xs font-medium text-emerald-600 uppercase mb-1">NiBiS EH</p>
|
||||||
<p className="text-2xl font-bold text-slate-900">28.662</p>
|
<p className="text-2xl font-bold text-slate-900">7.996</p>
|
||||||
<p className="text-xs text-slate-500">Chunks · Bildungs-Erwartungshorizonte</p>
|
<p className="text-xs text-slate-500">Chunks · Bildungs-Erwartungshorizonte</p>
|
||||||
</div>
|
</div>
|
||||||
<div className="bg-white rounded-xl p-4 border border-slate-200">
|
<div className="bg-white rounded-xl p-4 border border-slate-200">
|
||||||
<p className="text-xs font-medium text-orange-600 uppercase mb-1">Legal Templates</p>
|
<p className="text-xs font-medium text-orange-600 uppercase mb-1">Legal Templates</p>
|
||||||
<p className="text-2xl font-bold text-slate-900">824</p>
|
<p className="text-2xl font-bold text-slate-900">7.689</p>
|
||||||
<p className="text-xs text-slate-500">Chunks · Dokumentvorlagen</p>
|
<p className="text-xs text-slate-500">Chunks · Dokumentvorlagen</p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -1876,8 +2044,8 @@ export default function RAGPage() {
|
|||||||
className="p-4 rounded-lg border border-blue-200 bg-blue-50 hover:bg-blue-100 transition-colors text-left"
|
className="p-4 rounded-lg border border-blue-200 bg-blue-50 hover:bg-blue-100 transition-colors text-left"
|
||||||
>
|
>
|
||||||
<p className="text-xs font-medium text-blue-600 uppercase">Gesetze & Regulierungen</p>
|
<p className="text-xs font-medium text-blue-600 uppercase">Gesetze & Regulierungen</p>
|
||||||
<p className="text-2xl font-bold text-slate-900 mt-1">{loading ? '-' : getTotalChunks().toLocaleString()}</p>
|
<p className="text-2xl font-bold text-slate-900 mt-1">{COLLECTION_TOTALS.total_legal.toLocaleString()}</p>
|
||||||
<p className="text-xs text-slate-500 mt-1">{REGULATIONS.length} Regulierungen (EU, DE, BSI)</p>
|
<p className="text-xs text-slate-500 mt-1">{Object.keys(REGULATIONS_IN_RAG).length}/{REGULATIONS.length} im RAG</p>
|
||||||
</button>
|
</button>
|
||||||
<button
|
<button
|
||||||
onClick={() => { setRegulationCategory('dsfa'); setActiveTab('regulations') }}
|
onClick={() => { setRegulationCategory('dsfa'); setActiveTab('regulations') }}
|
||||||
@@ -1889,12 +2057,12 @@ export default function RAGPage() {
|
|||||||
</button>
|
</button>
|
||||||
<div className="p-4 rounded-lg border border-emerald-200 bg-emerald-50 text-left">
|
<div className="p-4 rounded-lg border border-emerald-200 bg-emerald-50 text-left">
|
||||||
<p className="text-xs font-medium text-emerald-600 uppercase">NiBiS EH</p>
|
<p className="text-xs font-medium text-emerald-600 uppercase">NiBiS EH</p>
|
||||||
<p className="text-2xl font-bold text-slate-900 mt-1">28.662</p>
|
<p className="text-2xl font-bold text-slate-900 mt-1">7.996</p>
|
||||||
<p className="text-xs text-slate-500 mt-1">Chunks · Bildungs-Erwartungshorizonte</p>
|
<p className="text-xs text-slate-500 mt-1">Chunks · Bildungs-Erwartungshorizonte</p>
|
||||||
</div>
|
</div>
|
||||||
<div className="p-4 rounded-lg border border-orange-200 bg-orange-50 text-left">
|
<div className="p-4 rounded-lg border border-orange-200 bg-orange-50 text-left">
|
||||||
<p className="text-xs font-medium text-orange-600 uppercase">Legal Templates</p>
|
<p className="text-xs font-medium text-orange-600 uppercase">Legal Templates</p>
|
||||||
<p className="text-2xl font-bold text-slate-900 mt-1">824</p>
|
<p className="text-2xl font-bold text-slate-900 mt-1">7.689</p>
|
||||||
<p className="text-xs text-slate-500 mt-1">Chunks · Dokumentvorlagen (VVT, TOM, DSFA)</p>
|
<p className="text-xs text-slate-500 mt-1">Chunks · Dokumentvorlagen (VVT, TOM, DSFA)</p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -1904,12 +2072,13 @@ export default function RAGPage() {
|
|||||||
<div className="grid grid-cols-1 md:grid-cols-4 gap-4">
|
<div className="grid grid-cols-1 md:grid-cols-4 gap-4">
|
||||||
{Object.entries(TYPE_LABELS).map(([type, label]) => {
|
{Object.entries(TYPE_LABELS).map(([type, label]) => {
|
||||||
const regs = REGULATIONS.filter((r) => r.type === type)
|
const regs = REGULATIONS.filter((r) => r.type === type)
|
||||||
const totalChunks = regs.reduce((sum, r) => sum + getRegulationChunks(r.code), 0)
|
const inRagCount = regs.filter((r) => isInRag(r.code)).length
|
||||||
|
const totalChunks = regs.reduce((sum, r) => sum + getKnownChunks(r.code), 0)
|
||||||
return (
|
return (
|
||||||
<div key={type} className="bg-white rounded-xl p-4 border border-slate-200">
|
<div key={type} className="bg-white rounded-xl p-4 border border-slate-200">
|
||||||
<div className="flex items-center gap-2 mb-2">
|
<div className="flex items-center gap-2 mb-2">
|
||||||
<span className={`px-2 py-0.5 text-xs rounded ${TYPE_COLORS[type]}`}>{label}</span>
|
<span className={`px-2 py-0.5 text-xs rounded ${TYPE_COLORS[type]}`}>{label}</span>
|
||||||
<span className="text-slate-500 text-sm">{regs.length} Dok.</span>
|
<span className="text-slate-500 text-sm">{inRagCount}/{regs.length} im RAG</span>
|
||||||
</div>
|
</div>
|
||||||
<p className="text-xl font-bold text-slate-900">{totalChunks.toLocaleString()} Chunks</p>
|
<p className="text-xl font-bold text-slate-900">{totalChunks.toLocaleString()} Chunks</p>
|
||||||
</div>
|
</div>
|
||||||
@@ -1923,20 +2092,25 @@ export default function RAGPage() {
|
|||||||
<h3 className="font-semibold text-slate-900">Top Regulierungen (nach Chunks)</h3>
|
<h3 className="font-semibold text-slate-900">Top Regulierungen (nach Chunks)</h3>
|
||||||
</div>
|
</div>
|
||||||
<div className="divide-y">
|
<div className="divide-y">
|
||||||
{REGULATIONS.sort((a, b) => getRegulationChunks(b.code) - getRegulationChunks(a.code))
|
{[...REGULATIONS].sort((a, b) => getKnownChunks(b.code) - getKnownChunks(a.code))
|
||||||
.slice(0, 5)
|
.slice(0, 10)
|
||||||
.map((reg) => {
|
.map((reg) => {
|
||||||
const chunks = getRegulationChunks(reg.code)
|
const chunks = getKnownChunks(reg.code)
|
||||||
return (
|
return (
|
||||||
<div key={reg.code} className="px-4 py-3 flex items-center justify-between">
|
<div key={reg.code} className="px-4 py-3 flex items-center justify-between">
|
||||||
<div className="flex items-center gap-3">
|
<div className="flex items-center gap-3">
|
||||||
|
{isInRag(reg.code) ? (
|
||||||
|
<span className="text-green-500 text-sm">✓</span>
|
||||||
|
) : (
|
||||||
|
<span className="text-red-400 text-sm">✗</span>
|
||||||
|
)}
|
||||||
<span className={`px-2 py-0.5 text-xs rounded ${TYPE_COLORS[reg.type]}`}>
|
<span className={`px-2 py-0.5 text-xs rounded ${TYPE_COLORS[reg.type]}`}>
|
||||||
{TYPE_LABELS[reg.type]}
|
{TYPE_LABELS[reg.type]}
|
||||||
</span>
|
</span>
|
||||||
<span className="font-medium text-slate-900">{reg.name}</span>
|
<span className="font-medium text-slate-900">{reg.name}</span>
|
||||||
<span className="text-slate-500 text-sm">({reg.code})</span>
|
<span className="text-slate-500 text-sm">({reg.code})</span>
|
||||||
</div>
|
</div>
|
||||||
<span className="font-bold text-teal-600">{chunks.toLocaleString()} Chunks</span>
|
<span className={`font-bold ${chunks > 0 ? 'text-teal-600' : 'text-slate-300'}`}>{chunks > 0 ? chunks.toLocaleString() + ' Chunks' : '—'}</span>
|
||||||
</div>
|
</div>
|
||||||
)
|
)
|
||||||
})}
|
})}
|
||||||
@@ -1995,7 +2169,13 @@ export default function RAGPage() {
|
|||||||
{regulationCategory === 'regulations' && (
|
{regulationCategory === 'regulations' && (
|
||||||
<div className="bg-white rounded-xl border border-slate-200 overflow-hidden">
|
<div className="bg-white rounded-xl border border-slate-200 overflow-hidden">
|
||||||
<div className="px-4 py-3 border-b bg-slate-50 flex items-center justify-between">
|
<div className="px-4 py-3 border-b bg-slate-50 flex items-center justify-between">
|
||||||
<h3 className="font-semibold text-slate-900">Alle {REGULATIONS.length} Regulierungen</h3>
|
<h3 className="font-semibold text-slate-900">
|
||||||
|
Alle {REGULATIONS.length} Regulierungen
|
||||||
|
<span className="ml-2 text-sm font-normal text-slate-500">
|
||||||
|
({REGULATIONS.filter(r => isInRag(r.code)).length} im RAG,{' '}
|
||||||
|
{REGULATIONS.filter(r => !isInRag(r.code)).length} ausstehend)
|
||||||
|
</span>
|
||||||
|
</h3>
|
||||||
<button
|
<button
|
||||||
onClick={fetchStatus}
|
onClick={fetchStatus}
|
||||||
className="text-sm text-teal-600 hover:text-teal-700"
|
className="text-sm text-teal-600 hover:text-teal-700"
|
||||||
@@ -2007,6 +2187,7 @@ export default function RAGPage() {
|
|||||||
<table className="w-full">
|
<table className="w-full">
|
||||||
<thead className="bg-slate-50 border-b">
|
<thead className="bg-slate-50 border-b">
|
||||||
<tr>
|
<tr>
|
||||||
|
<th className="px-4 py-3 text-center text-xs font-medium text-slate-500 uppercase w-12">RAG</th>
|
||||||
<th className="px-4 py-3 text-left text-xs font-medium text-slate-500 uppercase">Code</th>
|
<th className="px-4 py-3 text-left text-xs font-medium text-slate-500 uppercase">Code</th>
|
||||||
<th className="px-4 py-3 text-left text-xs font-medium text-slate-500 uppercase">Typ</th>
|
<th className="px-4 py-3 text-left text-xs font-medium text-slate-500 uppercase">Typ</th>
|
||||||
<th className="px-4 py-3 text-left text-xs font-medium text-slate-500 uppercase">Name</th>
|
<th className="px-4 py-3 text-left text-xs font-medium text-slate-500 uppercase">Name</th>
|
||||||
@@ -2017,17 +2198,10 @@ export default function RAGPage() {
|
|||||||
</thead>
|
</thead>
|
||||||
<tbody className="divide-y">
|
<tbody className="divide-y">
|
||||||
{REGULATIONS.map((reg) => {
|
{REGULATIONS.map((reg) => {
|
||||||
const chunks = getRegulationChunks(reg.code)
|
const chunks = getKnownChunks(reg.code)
|
||||||
const ratio = chunks / (reg.expected * 10) // Rough estimate: 10 chunks per requirement
|
const inRag = isInRag(reg.code)
|
||||||
let statusColor = 'text-red-500'
|
let statusColor = inRag ? 'text-green-500' : 'text-red-500'
|
||||||
let statusIcon = '❌'
|
let statusIcon = inRag ? '✓' : '❌'
|
||||||
if (ratio > 0.5) {
|
|
||||||
statusColor = 'text-green-500'
|
|
||||||
statusIcon = '✓'
|
|
||||||
} else if (ratio > 0.1) {
|
|
||||||
statusColor = 'text-yellow-500'
|
|
||||||
statusIcon = '⚠'
|
|
||||||
}
|
|
||||||
const isExpanded = expandedRegulation === reg.code
|
const isExpanded = expandedRegulation === reg.code
|
||||||
|
|
||||||
return (
|
return (
|
||||||
@@ -2036,6 +2210,13 @@ export default function RAGPage() {
|
|||||||
onClick={() => setExpandedRegulation(isExpanded ? null : reg.code)}
|
onClick={() => setExpandedRegulation(isExpanded ? null : reg.code)}
|
||||||
className="hover:bg-slate-50 cursor-pointer transition-colors"
|
className="hover:bg-slate-50 cursor-pointer transition-colors"
|
||||||
>
|
>
|
||||||
|
<td className="px-4 py-3 text-center">
|
||||||
|
{isInRag(reg.code) ? (
|
||||||
|
<span className="inline-flex items-center justify-center w-6 h-6 bg-green-100 text-green-600 rounded-full text-xs font-bold" title="Im RAG vorhanden">✓</span>
|
||||||
|
) : (
|
||||||
|
<span className="inline-flex items-center justify-center w-6 h-6 bg-red-50 text-red-400 rounded-full text-xs font-bold" title="Nicht im RAG">✗</span>
|
||||||
|
)}
|
||||||
|
</td>
|
||||||
<td className="px-4 py-3 font-mono font-medium text-teal-600">
|
<td className="px-4 py-3 font-mono font-medium text-teal-600">
|
||||||
<span className="inline-flex items-center gap-2">
|
<span className="inline-flex items-center gap-2">
|
||||||
<span className={`transform transition-transform ${isExpanded ? 'rotate-90' : ''}`}>▶</span>
|
<span className={`transform transition-transform ${isExpanded ? 'rotate-90' : ''}`}>▶</span>
|
||||||
@@ -2054,7 +2235,7 @@ export default function RAGPage() {
|
|||||||
</tr>
|
</tr>
|
||||||
{isExpanded && (
|
{isExpanded && (
|
||||||
<tr key={`${reg.code}-detail`} className="bg-slate-50">
|
<tr key={`${reg.code}-detail`} className="bg-slate-50">
|
||||||
<td colSpan={6} className="px-4 py-4">
|
<td colSpan={7} className="px-4 py-4">
|
||||||
<div className="bg-white rounded-lg border border-slate-200 p-4 space-y-3">
|
<div className="bg-white rounded-lg border border-slate-200 p-4 space-y-3">
|
||||||
<div>
|
<div>
|
||||||
<h4 className="font-semibold text-slate-900 mb-1">{reg.fullName}</h4>
|
<h4 className="font-semibold text-slate-900 mb-1">{reg.fullName}</h4>
|
||||||
@@ -2232,7 +2413,7 @@ export default function RAGPage() {
|
|||||||
<div className="grid grid-cols-3 gap-4 mb-4">
|
<div className="grid grid-cols-3 gap-4 mb-4">
|
||||||
<div className="bg-emerald-50 rounded-lg p-4 border border-emerald-200">
|
<div className="bg-emerald-50 rounded-lg p-4 border border-emerald-200">
|
||||||
<p className="text-sm text-emerald-600 font-medium">Chunks</p>
|
<p className="text-sm text-emerald-600 font-medium">Chunks</p>
|
||||||
<p className="text-2xl font-bold text-slate-900">28.662</p>
|
<p className="text-2xl font-bold text-slate-900">7.996</p>
|
||||||
</div>
|
</div>
|
||||||
<div className="bg-emerald-50 rounded-lg p-4 border border-emerald-200">
|
<div className="bg-emerald-50 rounded-lg p-4 border border-emerald-200">
|
||||||
<p className="text-sm text-emerald-600 font-medium">Vector Size</p>
|
<p className="text-sm text-emerald-600 font-medium">Vector Size</p>
|
||||||
@@ -2264,7 +2445,7 @@ export default function RAGPage() {
|
|||||||
<div className="grid grid-cols-3 gap-4 mb-4">
|
<div className="grid grid-cols-3 gap-4 mb-4">
|
||||||
<div className="bg-orange-50 rounded-lg p-4 border border-orange-200">
|
<div className="bg-orange-50 rounded-lg p-4 border border-orange-200">
|
||||||
<p className="text-sm text-orange-600 font-medium">Chunks</p>
|
<p className="text-sm text-orange-600 font-medium">Chunks</p>
|
||||||
<p className="text-2xl font-bold text-slate-900">824</p>
|
<p className="text-2xl font-bold text-slate-900">7.689</p>
|
||||||
</div>
|
</div>
|
||||||
<div className="bg-orange-50 rounded-lg p-4 border border-orange-200">
|
<div className="bg-orange-50 rounded-lg p-4 border border-orange-200">
|
||||||
<p className="text-sm text-orange-600 font-medium">Vector Size</p>
|
<p className="text-sm text-orange-600 font-medium">Vector Size</p>
|
||||||
@@ -2332,20 +2513,28 @@ export default function RAGPage() {
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-3">
|
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-3">
|
||||||
{regs.map((reg) => (
|
{regs.map((reg) => {
|
||||||
|
const regInRag = isInRag(reg.code)
|
||||||
|
return (
|
||||||
<div
|
<div
|
||||||
key={reg.code}
|
key={reg.code}
|
||||||
className="bg-white p-3 rounded-lg border border-slate-200"
|
className={`bg-white p-3 rounded-lg border ${regInRag ? 'border-green-200' : 'border-slate-200'}`}
|
||||||
>
|
>
|
||||||
<div className="flex items-center gap-2 mb-1">
|
<div className="flex items-center gap-2 mb-1">
|
||||||
<span className={`px-2 py-0.5 text-xs rounded ${TYPE_COLORS[reg.type]}`}>
|
<span className={`px-2 py-0.5 text-xs rounded ${TYPE_COLORS[reg.type]}`}>
|
||||||
{reg.code}
|
{reg.code}
|
||||||
</span>
|
</span>
|
||||||
|
{regInRag ? (
|
||||||
|
<span className="px-1.5 py-0.5 text-[10px] font-bold bg-green-100 text-green-600 rounded">RAG</span>
|
||||||
|
) : (
|
||||||
|
<span className="px-1.5 py-0.5 text-[10px] font-bold bg-red-50 text-red-400 rounded">✗</span>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
<div className="font-medium text-sm text-slate-900">{reg.name}</div>
|
<div className="font-medium text-sm text-slate-900">{reg.name}</div>
|
||||||
<div className="text-xs text-slate-500 mt-1 line-clamp-2">{reg.description}</div>
|
<div className="text-xs text-slate-500 mt-1 line-clamp-2">{reg.description}</div>
|
||||||
</div>
|
</div>
|
||||||
))}
|
)
|
||||||
|
})}
|
||||||
</div>
|
</div>
|
||||||
</>
|
</>
|
||||||
)
|
)
|
||||||
@@ -2372,17 +2561,22 @@ export default function RAGPage() {
|
|||||||
<div className="flex flex-wrap gap-2">
|
<div className="flex flex-wrap gap-2">
|
||||||
{group.regulations.map((code) => {
|
{group.regulations.map((code) => {
|
||||||
const reg = REGULATIONS.find(r => r.code === code)
|
const reg = REGULATIONS.find(r => r.code === code)
|
||||||
|
const codeInRag = isInRag(code)
|
||||||
return (
|
return (
|
||||||
<span
|
<span
|
||||||
key={code}
|
key={code}
|
||||||
className="px-3 py-1.5 bg-slate-100 rounded-full text-sm font-medium text-slate-700 hover:bg-slate-200 cursor-pointer"
|
className={`px-3 py-1.5 rounded-full text-sm font-medium cursor-pointer ${
|
||||||
|
codeInRag
|
||||||
|
? 'bg-green-100 text-green-700 hover:bg-green-200'
|
||||||
|
: 'bg-slate-100 text-slate-700 hover:bg-slate-200'
|
||||||
|
}`}
|
||||||
onClick={() => {
|
onClick={() => {
|
||||||
setActiveTab('regulations')
|
setActiveTab('regulations')
|
||||||
setExpandedRegulation(code)
|
setExpandedRegulation(code)
|
||||||
}}
|
}}
|
||||||
title={reg?.fullName || code}
|
title={`${reg?.fullName || code}${codeInRag ? ' (im RAG)' : ' (nicht im RAG)'}`}
|
||||||
>
|
>
|
||||||
{code}
|
{codeInRag ? '✓ ' : '✗ '}{code}
|
||||||
</span>
|
</span>
|
||||||
)
|
)
|
||||||
})}
|
})}
|
||||||
@@ -2406,9 +2600,13 @@ export default function RAGPage() {
|
|||||||
{intersection.regulations.map((code) => (
|
{intersection.regulations.map((code) => (
|
||||||
<span
|
<span
|
||||||
key={code}
|
key={code}
|
||||||
className="px-2 py-0.5 text-xs font-medium bg-teal-100 text-teal-700 rounded"
|
className={`px-2 py-0.5 text-xs font-medium rounded ${
|
||||||
|
isInRag(code)
|
||||||
|
? 'bg-green-100 text-green-700'
|
||||||
|
: 'bg-red-50 text-red-500'
|
||||||
|
}`}
|
||||||
>
|
>
|
||||||
{code}
|
{isInRag(code) ? '✓ ' : '✗ '}{code}
|
||||||
</span>
|
</span>
|
||||||
))}
|
))}
|
||||||
</div>
|
</div>
|
||||||
@@ -2443,8 +2641,15 @@ export default function RAGPage() {
|
|||||||
<tbody className="divide-y">
|
<tbody className="divide-y">
|
||||||
{REGULATIONS.map((reg) => (
|
{REGULATIONS.map((reg) => (
|
||||||
<tr key={reg.code} className="hover:bg-slate-50">
|
<tr key={reg.code} className="hover:bg-slate-50">
|
||||||
<td className="px-2 py-2 font-medium text-teal-600 sticky left-0 bg-white">
|
<td className="px-2 py-2 font-medium sticky left-0 bg-white">
|
||||||
{reg.code}
|
<span className="flex items-center gap-1">
|
||||||
|
{isInRag(reg.code) ? (
|
||||||
|
<span className="text-green-500 text-[10px]">●</span>
|
||||||
|
) : (
|
||||||
|
<span className="text-red-300 text-[10px]">○</span>
|
||||||
|
)}
|
||||||
|
<span className="text-teal-600">{reg.code}</span>
|
||||||
|
</span>
|
||||||
</td>
|
</td>
|
||||||
{INDUSTRIES.filter(i => i.id !== 'all').map((industry) => {
|
{INDUSTRIES.filter(i => i.id !== 'all').map((industry) => {
|
||||||
const applies = INDUSTRY_REGULATION_MAP[industry.id]?.includes(reg.code)
|
const applies = INDUSTRY_REGULATION_MAP[industry.id]?.includes(reg.code)
|
||||||
@@ -2531,27 +2736,33 @@ export default function RAGPage() {
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{/* Integrated Regulations */}
|
{/* RAG Coverage Overview */}
|
||||||
<div className="bg-white rounded-xl border border-slate-200 p-6">
|
<div className="bg-white rounded-xl border border-slate-200 p-6">
|
||||||
<div className="flex items-center gap-3 mb-4">
|
<div className="flex items-center gap-3 mb-4">
|
||||||
<span className="text-2xl">✅</span>
|
<span className="text-2xl">✅</span>
|
||||||
<div>
|
<div>
|
||||||
<h3 className="font-semibold text-slate-900">Neu integrierte Regulierungen</h3>
|
<h3 className="font-semibold text-slate-900">RAG-Abdeckung ({Object.keys(REGULATIONS_IN_RAG).length} von {REGULATIONS.length} Regulierungen)</h3>
|
||||||
<p className="text-sm text-slate-500">Jetzt im RAG-System verfuegbar (Stand: Januar 2025)</p>
|
<p className="text-sm text-slate-500">Stand: Februar 2026 — Alle im RAG-System verfuegbaren Regulierungen</p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div className="grid grid-cols-2 md:grid-cols-5 gap-3">
|
<div className="flex flex-wrap gap-2">
|
||||||
{INTEGRATED_REGULATIONS.map((reg) => (
|
{REGULATIONS.filter(r => isInRag(r.code)).map((reg) => (
|
||||||
<div key={reg.code} className="rounded-lg border border-green-200 bg-green-50 p-3 text-center">
|
<span key={reg.code} className="px-2.5 py-1 text-xs font-medium bg-green-100 text-green-700 rounded-full border border-green-200">
|
||||||
<span className="px-2 py-1 text-sm font-bold bg-green-100 text-green-700 rounded">
|
✓ {reg.code}
|
||||||
{reg.code}
|
|
||||||
</span>
|
</span>
|
||||||
<p className="text-xs text-slate-600 mt-2">{reg.name}</p>
|
|
||||||
<p className="text-xs text-green-600 mt-1">Im RAG</p>
|
|
||||||
</div>
|
|
||||||
))}
|
))}
|
||||||
</div>
|
</div>
|
||||||
|
<div className="mt-4 pt-4 border-t border-slate-100">
|
||||||
|
<p className="text-xs font-medium text-slate-500 mb-2">Noch nicht im RAG:</p>
|
||||||
|
<div className="flex flex-wrap gap-2">
|
||||||
|
{REGULATIONS.filter(r => !isInRag(r.code)).map((reg) => (
|
||||||
|
<span key={reg.code} className="px-2.5 py-1 text-xs font-medium bg-red-50 text-red-400 rounded-full border border-red-100">
|
||||||
|
✗ {reg.code}
|
||||||
|
</span>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{/* Potential Future Regulations */}
|
{/* Potential Future Regulations */}
|
||||||
@@ -2899,7 +3110,7 @@ export default function RAGPage() {
|
|||||||
<span className="flex items-center gap-2 text-teal-600">
|
<span className="flex items-center gap-2 text-teal-600">
|
||||||
<svg className="animate-spin h-4 w-4" fill="none" viewBox="0 0 24 24">
|
<svg className="animate-spin h-4 w-4" fill="none" viewBox="0 0 24 24">
|
||||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
|
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.7.689 3 7.938l3-2.647z" />
|
||||||
</svg>
|
</svg>
|
||||||
Ingestion laeuft...
|
Ingestion laeuft...
|
||||||
</span>
|
</span>
|
||||||
@@ -2969,7 +3180,7 @@ export default function RAGPage() {
|
|||||||
{pipelineStarting ? (
|
{pipelineStarting ? (
|
||||||
<svg className="animate-spin h-4 w-4" fill="none" viewBox="0 0 24 24">
|
<svg className="animate-spin h-4 w-4" fill="none" viewBox="0 0 24 24">
|
||||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
|
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.7.689 3 7.938l3-2.647z" />
|
||||||
</svg>
|
</svg>
|
||||||
) : (
|
) : (
|
||||||
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
@@ -2988,7 +3199,7 @@ export default function RAGPage() {
|
|||||||
{pipelineLoading ? (
|
{pipelineLoading ? (
|
||||||
<svg className="animate-spin h-4 w-4" fill="none" viewBox="0 0 24 24">
|
<svg className="animate-spin h-4 w-4" fill="none" viewBox="0 0 24 24">
|
||||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
|
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.7.689 3 7.938l3-2.647z" />
|
||||||
</svg>
|
</svg>
|
||||||
) : (
|
) : (
|
||||||
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
@@ -3021,7 +3232,7 @@ export default function RAGPage() {
|
|||||||
<>
|
<>
|
||||||
<svg className="animate-spin h-5 w-5" fill="none" viewBox="0 0 24 24">
|
<svg className="animate-spin h-5 w-5" fill="none" viewBox="0 0 24 24">
|
||||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
|
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.7.689 3 7.938l3-2.647z" />
|
||||||
</svg>
|
</svg>
|
||||||
Startet...
|
Startet...
|
||||||
</>
|
</>
|
||||||
@@ -3058,7 +3269,7 @@ export default function RAGPage() {
|
|||||||
{pipelineState.status === 'running' && (
|
{pipelineState.status === 'running' && (
|
||||||
<svg className="w-6 h-6 text-blue-600 animate-spin" fill="none" viewBox="0 0 24 24">
|
<svg className="w-6 h-6 text-blue-600 animate-spin" fill="none" viewBox="0 0 24 24">
|
||||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
|
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.7.689 3 7.938l3-2.647z" />
|
||||||
</svg>
|
</svg>
|
||||||
)}
|
)}
|
||||||
{pipelineState.status === 'failed' && (
|
{pipelineState.status === 'failed' && (
|
||||||
|
|||||||
320
admin-lehrer/components/ocr-pipeline/ColumnControls.tsx
Normal file
320
admin-lehrer/components/ocr-pipeline/ColumnControls.tsx
Normal file
@@ -0,0 +1,320 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { useState, useMemo } from 'react'
|
||||||
|
import type { ColumnResult, ColumnGroundTruth, PageRegion } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||||
|
|
||||||
|
interface ColumnControlsProps {
|
||||||
|
columnResult: ColumnResult | null
|
||||||
|
onRerun: () => void
|
||||||
|
onManualMode: () => void
|
||||||
|
onGtMode: () => void
|
||||||
|
onGroundTruth: (gt: ColumnGroundTruth) => void
|
||||||
|
onNext: () => void
|
||||||
|
isDetecting: boolean
|
||||||
|
savedGtColumns: PageRegion[] | null
|
||||||
|
}
|
||||||
|
|
||||||
|
const TYPE_COLORS: Record<string, string> = {
|
||||||
|
column_en: 'bg-blue-100 text-blue-700 dark:bg-blue-900/30 dark:text-blue-400',
|
||||||
|
column_de: 'bg-green-100 text-green-700 dark:bg-green-900/30 dark:text-green-400',
|
||||||
|
column_example: 'bg-orange-100 text-orange-700 dark:bg-orange-900/30 dark:text-orange-400',
|
||||||
|
column_text: 'bg-cyan-100 text-cyan-700 dark:bg-cyan-900/30 dark:text-cyan-400',
|
||||||
|
page_ref: 'bg-purple-100 text-purple-700 dark:bg-purple-900/30 dark:text-purple-400',
|
||||||
|
column_marker: 'bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400',
|
||||||
|
column_ignore: 'bg-gray-100 text-gray-500 dark:bg-gray-700/30 dark:text-gray-500',
|
||||||
|
header: 'bg-gray-100 text-gray-600 dark:bg-gray-700/50 dark:text-gray-400',
|
||||||
|
footer: 'bg-gray-100 text-gray-600 dark:bg-gray-700/50 dark:text-gray-400',
|
||||||
|
}
|
||||||
|
|
||||||
|
const TYPE_LABELS: Record<string, string> = {
|
||||||
|
column_en: 'EN',
|
||||||
|
column_de: 'DE',
|
||||||
|
column_example: 'Beispiel',
|
||||||
|
column_text: 'Text',
|
||||||
|
page_ref: 'Seite',
|
||||||
|
column_marker: 'Marker',
|
||||||
|
column_ignore: 'Ignorieren',
|
||||||
|
header: 'Header',
|
||||||
|
footer: 'Footer',
|
||||||
|
}
|
||||||
|
|
||||||
|
const METHOD_LABELS: Record<string, string> = {
|
||||||
|
content: 'Inhalt',
|
||||||
|
position_enhanced: 'Position',
|
||||||
|
position_fallback: 'Fallback',
|
||||||
|
}
|
||||||
|
|
||||||
|
interface DiffRow {
|
||||||
|
index: number
|
||||||
|
autoCol: PageRegion | null
|
||||||
|
gtCol: PageRegion | null
|
||||||
|
diffX: number | null
|
||||||
|
diffW: number | null
|
||||||
|
typeMismatch: boolean
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Match auto columns to GT columns by overlap on X-axis (IoU > 50%) */
|
||||||
|
function computeDiff(autoCols: PageRegion[], gtCols: PageRegion[]): DiffRow[] {
|
||||||
|
const rows: DiffRow[] = []
|
||||||
|
const usedGt = new Set<number>()
|
||||||
|
const usedAuto = new Set<number>()
|
||||||
|
|
||||||
|
// Match auto → GT by best X-axis overlap
|
||||||
|
for (let ai = 0; ai < autoCols.length; ai++) {
|
||||||
|
const a = autoCols[ai]
|
||||||
|
let bestIdx = -1
|
||||||
|
let bestIoU = 0
|
||||||
|
|
||||||
|
for (let gi = 0; gi < gtCols.length; gi++) {
|
||||||
|
if (usedGt.has(gi)) continue
|
||||||
|
const g = gtCols[gi]
|
||||||
|
const overlapStart = Math.max(a.x, g.x)
|
||||||
|
const overlapEnd = Math.min(a.x + a.width, g.x + g.width)
|
||||||
|
const overlap = Math.max(0, overlapEnd - overlapStart)
|
||||||
|
const union = (a.width + g.width) - overlap
|
||||||
|
const iou = union > 0 ? overlap / union : 0
|
||||||
|
if (iou > bestIoU) {
|
||||||
|
bestIoU = iou
|
||||||
|
bestIdx = gi
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bestIdx >= 0 && bestIoU > 0.3) {
|
||||||
|
usedGt.add(bestIdx)
|
||||||
|
usedAuto.add(ai)
|
||||||
|
const g = gtCols[bestIdx]
|
||||||
|
rows.push({
|
||||||
|
index: rows.length + 1,
|
||||||
|
autoCol: a,
|
||||||
|
gtCol: g,
|
||||||
|
diffX: g.x - a.x,
|
||||||
|
diffW: g.width - a.width,
|
||||||
|
typeMismatch: a.type !== g.type,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unmatched auto columns
|
||||||
|
for (let ai = 0; ai < autoCols.length; ai++) {
|
||||||
|
if (usedAuto.has(ai)) continue
|
||||||
|
rows.push({
|
||||||
|
index: rows.length + 1,
|
||||||
|
autoCol: autoCols[ai],
|
||||||
|
gtCol: null,
|
||||||
|
diffX: null,
|
||||||
|
diffW: null,
|
||||||
|
typeMismatch: false,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unmatched GT columns
|
||||||
|
for (let gi = 0; gi < gtCols.length; gi++) {
|
||||||
|
if (usedGt.has(gi)) continue
|
||||||
|
rows.push({
|
||||||
|
index: rows.length + 1,
|
||||||
|
autoCol: null,
|
||||||
|
gtCol: gtCols[gi],
|
||||||
|
diffX: null,
|
||||||
|
diffW: null,
|
||||||
|
typeMismatch: false,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return rows
|
||||||
|
}
|
||||||
|
|
||||||
|
export function ColumnControls({ columnResult, onRerun, onManualMode, onGtMode, onGroundTruth, onNext, isDetecting, savedGtColumns }: ColumnControlsProps) {
|
||||||
|
const [gtSaved, setGtSaved] = useState(false)
|
||||||
|
|
||||||
|
const diffRows = useMemo(() => {
|
||||||
|
if (!columnResult || !savedGtColumns) return null
|
||||||
|
const autoCols = columnResult.columns.filter(c => c.type.startsWith('column') || c.type === 'page_ref')
|
||||||
|
const gtCols = savedGtColumns.filter(c => c.type.startsWith('column') || c.type === 'page_ref')
|
||||||
|
return computeDiff(autoCols, gtCols)
|
||||||
|
}, [columnResult, savedGtColumns])
|
||||||
|
|
||||||
|
if (!columnResult) return null
|
||||||
|
|
||||||
|
const columns = columnResult.columns.filter((c: PageRegion) => c.type.startsWith('column') || c.type === 'page_ref')
|
||||||
|
const headerFooter = columnResult.columns.filter((c: PageRegion) => !c.type.startsWith('column') && c.type !== 'page_ref')
|
||||||
|
|
||||||
|
const handleGt = (isCorrect: boolean) => {
|
||||||
|
onGroundTruth({ is_correct: isCorrect })
|
||||||
|
setGtSaved(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-4">
|
||||||
|
{/* Summary */}
|
||||||
|
<div className="flex items-center gap-3 flex-wrap">
|
||||||
|
<div className="text-sm text-gray-600 dark:text-gray-400">
|
||||||
|
<span className="font-medium text-gray-800 dark:text-gray-200">{columns.length} Spalten</span> erkannt
|
||||||
|
{columnResult.duration_seconds > 0 && (
|
||||||
|
<span className="ml-2 text-xs">({columnResult.duration_seconds}s)</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<button
|
||||||
|
onClick={onRerun}
|
||||||
|
disabled={isDetecting}
|
||||||
|
className="text-xs px-2 py-1 bg-gray-100 dark:bg-gray-700 rounded hover:bg-gray-200 dark:hover:bg-gray-600 transition-colors disabled:opacity-50"
|
||||||
|
>
|
||||||
|
Erneut erkennen
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={onManualMode}
|
||||||
|
className="text-xs px-2 py-1 bg-teal-100 text-teal-700 dark:bg-teal-900/30 dark:text-teal-400 rounded hover:bg-teal-200 dark:hover:bg-teal-900/50 transition-colors"
|
||||||
|
>
|
||||||
|
Manuell markieren
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={onGtMode}
|
||||||
|
className="text-xs px-2 py-1 bg-amber-100 text-amber-700 dark:bg-amber-900/30 dark:text-amber-400 rounded hover:bg-amber-200 dark:hover:bg-amber-900/50 transition-colors"
|
||||||
|
>
|
||||||
|
{savedGtColumns ? 'Ground Truth bearbeiten' : 'Ground Truth eintragen'}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Column list */}
|
||||||
|
<div className="space-y-2">
|
||||||
|
{columns.map((col: PageRegion, i: number) => (
|
||||||
|
<div key={i} className="flex items-center gap-3 text-sm">
|
||||||
|
<span className={`px-2 py-0.5 rounded text-xs font-medium ${TYPE_COLORS[col.type] || ''}`}>
|
||||||
|
{TYPE_LABELS[col.type] || col.type}
|
||||||
|
</span>
|
||||||
|
{col.classification_confidence != null && col.classification_confidence < 1.0 && (
|
||||||
|
<span className="text-xs font-medium text-gray-600 dark:text-gray-300">
|
||||||
|
{Math.round(col.classification_confidence * 100)}%
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
{col.classification_method && (
|
||||||
|
<span className="text-xs text-gray-400 dark:text-gray-500">
|
||||||
|
({METHOD_LABELS[col.classification_method] || col.classification_method})
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
<span className="text-gray-500 dark:text-gray-400 text-xs font-mono">
|
||||||
|
x={col.x} y={col.y} {col.width}x{col.height}px
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
{headerFooter.map((r: PageRegion, i: number) => (
|
||||||
|
<div key={`hf-${i}`} className="flex items-center gap-3 text-sm">
|
||||||
|
<span className={`px-2 py-0.5 rounded text-xs font-medium ${TYPE_COLORS[r.type] || ''}`}>
|
||||||
|
{TYPE_LABELS[r.type] || r.type}
|
||||||
|
</span>
|
||||||
|
<span className="text-gray-500 dark:text-gray-400 text-xs font-mono">
|
||||||
|
x={r.x} y={r.y} {r.width}x{r.height}px
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Diff table (Auto vs GT) */}
|
||||||
|
{diffRows && diffRows.length > 0 && (
|
||||||
|
<div className="border-t border-gray-100 dark:border-gray-700 pt-3">
|
||||||
|
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-2">
|
||||||
|
Vergleich: Auto vs Ground Truth
|
||||||
|
</div>
|
||||||
|
<div className="overflow-x-auto">
|
||||||
|
<table className="w-full text-xs">
|
||||||
|
<thead>
|
||||||
|
<tr className="text-gray-500 dark:text-gray-400 border-b border-gray-100 dark:border-gray-700">
|
||||||
|
<th className="text-left py-1 pr-2">#</th>
|
||||||
|
<th className="text-left py-1 pr-2">Auto (Typ, x, w)</th>
|
||||||
|
<th className="text-left py-1 pr-2">GT (Typ, x, w)</th>
|
||||||
|
<th className="text-right py-1 pr-2">Diff X</th>
|
||||||
|
<th className="text-right py-1">Diff W</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{diffRows.map((row) => (
|
||||||
|
<tr
|
||||||
|
key={row.index}
|
||||||
|
className={
|
||||||
|
!row.autoCol || !row.gtCol || row.typeMismatch
|
||||||
|
? 'bg-red-50 dark:bg-red-900/10'
|
||||||
|
: (row.diffX !== null && Math.abs(row.diffX) > 20) || (row.diffW !== null && Math.abs(row.diffW) > 20)
|
||||||
|
? 'bg-amber-50 dark:bg-amber-900/10'
|
||||||
|
: ''
|
||||||
|
}
|
||||||
|
>
|
||||||
|
<td className="py-1 pr-2 font-mono text-gray-400">{row.index}</td>
|
||||||
|
<td className="py-1 pr-2 font-mono">
|
||||||
|
{row.autoCol ? (
|
||||||
|
<span>
|
||||||
|
<span className={`inline-block px-1 rounded ${TYPE_COLORS[row.autoCol.type] || ''}`}>
|
||||||
|
{TYPE_LABELS[row.autoCol.type] || row.autoCol.type}
|
||||||
|
</span>
|
||||||
|
{' '}{row.autoCol.x}, {row.autoCol.width}
|
||||||
|
</span>
|
||||||
|
) : (
|
||||||
|
<span className="text-red-400">fehlt</span>
|
||||||
|
)}
|
||||||
|
</td>
|
||||||
|
<td className="py-1 pr-2 font-mono">
|
||||||
|
{row.gtCol ? (
|
||||||
|
<span>
|
||||||
|
<span className={`inline-block px-1 rounded ${TYPE_COLORS[row.gtCol.type] || ''}`}>
|
||||||
|
{TYPE_LABELS[row.gtCol.type] || row.gtCol.type}
|
||||||
|
</span>
|
||||||
|
{' '}{row.gtCol.x}, {row.gtCol.width}
|
||||||
|
</span>
|
||||||
|
) : (
|
||||||
|
<span className="text-red-400">fehlt</span>
|
||||||
|
)}
|
||||||
|
</td>
|
||||||
|
<td className="py-1 pr-2 text-right font-mono">
|
||||||
|
{row.diffX !== null ? (
|
||||||
|
<span className={Math.abs(row.diffX) > 20 ? 'text-amber-600 dark:text-amber-400' : 'text-gray-500'}>
|
||||||
|
{row.diffX > 0 ? '+' : ''}{row.diffX}
|
||||||
|
</span>
|
||||||
|
) : '—'}
|
||||||
|
</td>
|
||||||
|
<td className="py-1 text-right font-mono">
|
||||||
|
{row.diffW !== null ? (
|
||||||
|
<span className={Math.abs(row.diffW) > 20 ? 'text-amber-600 dark:text-amber-400' : 'text-gray-500'}>
|
||||||
|
{row.diffW > 0 ? '+' : ''}{row.diffW}
|
||||||
|
</span>
|
||||||
|
) : '—'}
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
))}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Ground Truth + Navigation */}
|
||||||
|
<div className="flex items-center justify-between pt-2 border-t border-gray-100 dark:border-gray-700">
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<span className="text-sm text-gray-500 dark:text-gray-400">Spalten korrekt?</span>
|
||||||
|
{gtSaved ? (
|
||||||
|
<span className="text-xs text-green-600 dark:text-green-400">Gespeichert</span>
|
||||||
|
) : (
|
||||||
|
<>
|
||||||
|
<button
|
||||||
|
onClick={() => handleGt(true)}
|
||||||
|
className="text-xs px-3 py-1 bg-green-100 text-green-700 dark:bg-green-900/30 dark:text-green-400 rounded hover:bg-green-200 dark:hover:bg-green-900/50 transition-colors"
|
||||||
|
>
|
||||||
|
Ja
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={() => handleGt(false)}
|
||||||
|
className="text-xs px-3 py-1 bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400 rounded hover:bg-red-200 dark:hover:bg-red-900/50 transition-colors"
|
||||||
|
>
|
||||||
|
Nein
|
||||||
|
</button>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<button
|
||||||
|
onClick={onNext}
|
||||||
|
className="px-4 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors text-sm font-medium"
|
||||||
|
>
|
||||||
|
Weiter
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
209
admin-lehrer/components/ocr-pipeline/DeskewControls.tsx
Normal file
209
admin-lehrer/components/ocr-pipeline/DeskewControls.tsx
Normal file
@@ -0,0 +1,209 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { useState } from 'react'
|
||||||
|
import type { DeskewResult, DeskewGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||||
|
|
||||||
|
interface DeskewControlsProps {
|
||||||
|
deskewResult: DeskewResult | null
|
||||||
|
showBinarized: boolean
|
||||||
|
onToggleBinarized: () => void
|
||||||
|
showGrid: boolean
|
||||||
|
onToggleGrid: () => void
|
||||||
|
onManualDeskew: (angle: number) => void
|
||||||
|
onGroundTruth: (gt: DeskewGroundTruth) => void
|
||||||
|
onNext: () => void
|
||||||
|
isApplying: boolean
|
||||||
|
}
|
||||||
|
|
||||||
|
const METHOD_LABELS: Record<string, string> = {
|
||||||
|
hough: 'Hough-Linien',
|
||||||
|
word_alignment: 'Wortausrichtung',
|
||||||
|
manual: 'Manuell',
|
||||||
|
}
|
||||||
|
|
||||||
|
export function DeskewControls({
|
||||||
|
deskewResult,
|
||||||
|
showBinarized,
|
||||||
|
onToggleBinarized,
|
||||||
|
showGrid,
|
||||||
|
onToggleGrid,
|
||||||
|
onManualDeskew,
|
||||||
|
onGroundTruth,
|
||||||
|
onNext,
|
||||||
|
isApplying,
|
||||||
|
}: DeskewControlsProps) {
|
||||||
|
const [manualAngle, setManualAngle] = useState(0)
|
||||||
|
const [gtFeedback, setGtFeedback] = useState<'correct' | 'incorrect' | null>(null)
|
||||||
|
const [gtNotes, setGtNotes] = useState('')
|
||||||
|
const [gtSaved, setGtSaved] = useState(false)
|
||||||
|
|
||||||
|
const handleGroundTruth = (isCorrect: boolean) => {
|
||||||
|
setGtFeedback(isCorrect ? 'correct' : 'incorrect')
|
||||||
|
if (isCorrect) {
|
||||||
|
onGroundTruth({ is_correct: true })
|
||||||
|
setGtSaved(true)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleGroundTruthIncorrect = () => {
|
||||||
|
onGroundTruth({
|
||||||
|
is_correct: false,
|
||||||
|
corrected_angle: manualAngle !== 0 ? manualAngle : undefined,
|
||||||
|
notes: gtNotes || undefined,
|
||||||
|
})
|
||||||
|
setGtSaved(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="space-y-4">
|
||||||
|
{/* Results */}
|
||||||
|
{deskewResult && (
|
||||||
|
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
||||||
|
<div className="flex flex-wrap items-center gap-3 text-sm">
|
||||||
|
<div>
|
||||||
|
<span className="text-gray-500">Winkel:</span>{' '}
|
||||||
|
<span className="font-mono font-medium">{deskewResult.angle_applied}°</span>
|
||||||
|
</div>
|
||||||
|
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
|
||||||
|
<div>
|
||||||
|
<span className="text-gray-500">Methode:</span>{' '}
|
||||||
|
<span className="inline-flex items-center px-2 py-0.5 rounded-full text-xs font-medium bg-teal-100 text-teal-700 dark:bg-teal-900/40 dark:text-teal-300">
|
||||||
|
{METHOD_LABELS[deskewResult.method_used] || deskewResult.method_used}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
|
||||||
|
<div>
|
||||||
|
<span className="text-gray-500">Konfidenz:</span>{' '}
|
||||||
|
<span className="font-mono">{Math.round(deskewResult.confidence * 100)}%</span>
|
||||||
|
</div>
|
||||||
|
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
|
||||||
|
<div className="text-gray-400 text-xs">
|
||||||
|
Hough: {deskewResult.angle_hough}° | WA: {deskewResult.angle_word_alignment}°
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Toggles */}
|
||||||
|
<div className="flex gap-3 mt-3">
|
||||||
|
<button
|
||||||
|
onClick={onToggleBinarized}
|
||||||
|
className={`text-xs px-3 py-1 rounded-full border transition-colors ${
|
||||||
|
showBinarized
|
||||||
|
? 'bg-teal-100 border-teal-300 text-teal-700 dark:bg-teal-900/40 dark:border-teal-600 dark:text-teal-300'
|
||||||
|
: 'border-gray-300 text-gray-500 dark:border-gray-600 dark:text-gray-400'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
Binarisiert anzeigen
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={onToggleGrid}
|
||||||
|
className={`text-xs px-3 py-1 rounded-full border transition-colors ${
|
||||||
|
showGrid
|
||||||
|
? 'bg-teal-100 border-teal-300 text-teal-700 dark:bg-teal-900/40 dark:border-teal-600 dark:text-teal-300'
|
||||||
|
: 'border-gray-300 text-gray-500 dark:border-gray-600 dark:text-gray-400'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
Raster anzeigen
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Manual angle */}
|
||||||
|
{deskewResult && (
|
||||||
|
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
||||||
|
<div className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">Manuelle Korrektur</div>
|
||||||
|
<div className="flex items-center gap-3">
|
||||||
|
<span className="text-xs text-gray-400 w-8 text-right">-5°</span>
|
||||||
|
<input
|
||||||
|
type="range"
|
||||||
|
min={-5}
|
||||||
|
max={5}
|
||||||
|
step={0.1}
|
||||||
|
value={manualAngle}
|
||||||
|
onChange={(e) => setManualAngle(parseFloat(e.target.value))}
|
||||||
|
className="flex-1 h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer dark:bg-gray-700 accent-teal-500"
|
||||||
|
/>
|
||||||
|
<span className="text-xs text-gray-400 w-8">+5°</span>
|
||||||
|
<span className="font-mono text-sm w-14 text-right">{manualAngle.toFixed(1)}°</span>
|
||||||
|
<button
|
||||||
|
onClick={() => onManualDeskew(manualAngle)}
|
||||||
|
disabled={isApplying}
|
||||||
|
className="px-3 py-1.5 text-sm bg-teal-600 text-white rounded-md hover:bg-teal-700 disabled:opacity-50 transition-colors"
|
||||||
|
>
|
||||||
|
{isApplying ? '...' : 'Anwenden'}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Ground Truth */}
|
||||||
|
{deskewResult && (
|
||||||
|
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
||||||
|
<div className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||||
|
Rotation korrekt?
|
||||||
|
</div>
|
||||||
|
<p className="text-xs text-gray-400 mb-2">Nur die Drehung bewerten — Woelbung/Verzerrung wird im naechsten Schritt korrigiert.</p>
|
||||||
|
{!gtSaved ? (
|
||||||
|
<div className="space-y-3">
|
||||||
|
<div className="flex gap-2">
|
||||||
|
<button
|
||||||
|
onClick={() => handleGroundTruth(true)}
|
||||||
|
className={`px-4 py-1.5 rounded-md text-sm font-medium transition-colors ${
|
||||||
|
gtFeedback === 'correct'
|
||||||
|
? 'bg-green-100 text-green-700 ring-2 ring-green-400'
|
||||||
|
: 'bg-gray-100 text-gray-600 hover:bg-green-50 dark:bg-gray-700 dark:text-gray-300'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
Ja
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={() => handleGroundTruth(false)}
|
||||||
|
className={`px-4 py-1.5 rounded-md text-sm font-medium transition-colors ${
|
||||||
|
gtFeedback === 'incorrect'
|
||||||
|
? 'bg-red-100 text-red-700 ring-2 ring-red-400'
|
||||||
|
: 'bg-gray-100 text-gray-600 hover:bg-red-50 dark:bg-gray-700 dark:text-gray-300'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
Nein
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
{gtFeedback === 'incorrect' && (
|
||||||
|
<div className="space-y-2">
|
||||||
|
<textarea
|
||||||
|
value={gtNotes}
|
||||||
|
onChange={(e) => setGtNotes(e.target.value)}
|
||||||
|
placeholder="Notizen zur Korrektur..."
|
||||||
|
className="w-full text-sm border border-gray-300 dark:border-gray-600 rounded-md p-2 bg-white dark:bg-gray-900 text-gray-800 dark:text-gray-200"
|
||||||
|
rows={2}
|
||||||
|
/>
|
||||||
|
<button
|
||||||
|
onClick={handleGroundTruthIncorrect}
|
||||||
|
className="text-sm px-3 py-1 bg-red-600 text-white rounded-md hover:bg-red-700 transition-colors"
|
||||||
|
>
|
||||||
|
Feedback speichern
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<div className="text-sm text-green-600 dark:text-green-400">
|
||||||
|
Feedback gespeichert
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Next button */}
|
||||||
|
{deskewResult && (
|
||||||
|
<div className="flex justify-end">
|
||||||
|
<button
|
||||||
|
onClick={onNext}
|
||||||
|
className="px-6 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium transition-colors"
|
||||||
|
>
|
||||||
|
Uebernehmen & Weiter →
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
201
admin-lehrer/components/ocr-pipeline/DewarpControls.tsx
Normal file
201
admin-lehrer/components/ocr-pipeline/DewarpControls.tsx
Normal file
@@ -0,0 +1,201 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { useEffect, useState } from 'react'
|
||||||
|
import type { DewarpResult, DewarpGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||||
|
|
||||||
|
interface DewarpControlsProps {
|
||||||
|
dewarpResult: DewarpResult | null
|
||||||
|
showGrid: boolean
|
||||||
|
onToggleGrid: () => void
|
||||||
|
onManualDewarp: (shearDegrees: number) => void
|
||||||
|
onGroundTruth: (gt: DewarpGroundTruth) => void
|
||||||
|
onNext: () => void
|
||||||
|
isApplying: boolean
|
||||||
|
}
|
||||||
|
|
||||||
|
const METHOD_LABELS: Record<string, string> = {
|
||||||
|
vertical_edge: 'Vertikale Kanten',
|
||||||
|
manual: 'Manuell',
|
||||||
|
none: 'Keine Korrektur',
|
||||||
|
}
|
||||||
|
|
||||||
|
export function DewarpControls({
|
||||||
|
dewarpResult,
|
||||||
|
showGrid,
|
||||||
|
onToggleGrid,
|
||||||
|
onManualDewarp,
|
||||||
|
onGroundTruth,
|
||||||
|
onNext,
|
||||||
|
isApplying,
|
||||||
|
}: DewarpControlsProps) {
|
||||||
|
const [manualShear, setManualShear] = useState(0)
|
||||||
|
const [gtFeedback, setGtFeedback] = useState<'correct' | 'incorrect' | null>(null)
|
||||||
|
const [gtNotes, setGtNotes] = useState('')
|
||||||
|
const [gtSaved, setGtSaved] = useState(false)
|
||||||
|
|
||||||
|
// Initialize slider to auto-detected value when result arrives
|
||||||
|
useEffect(() => {
|
||||||
|
if (dewarpResult && dewarpResult.shear_degrees !== undefined) {
|
||||||
|
setManualShear(dewarpResult.shear_degrees)
|
||||||
|
}
|
||||||
|
}, [dewarpResult?.shear_degrees])
|
||||||
|
|
||||||
|
const handleGroundTruth = (isCorrect: boolean) => {
|
||||||
|
setGtFeedback(isCorrect ? 'correct' : 'incorrect')
|
||||||
|
if (isCorrect) {
|
||||||
|
onGroundTruth({ is_correct: true })
|
||||||
|
setGtSaved(true)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleGroundTruthIncorrect = () => {
|
||||||
|
onGroundTruth({
|
||||||
|
is_correct: false,
|
||||||
|
corrected_shear: manualShear !== 0 ? manualShear : undefined,
|
||||||
|
notes: gtNotes || undefined,
|
||||||
|
})
|
||||||
|
setGtSaved(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="space-y-4">
|
||||||
|
{/* Results */}
|
||||||
|
{dewarpResult && (
|
||||||
|
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
||||||
|
<div className="flex flex-wrap items-center gap-3 text-sm">
|
||||||
|
<div>
|
||||||
|
<span className="text-gray-500">Scherung:</span>{' '}
|
||||||
|
<span className="font-mono font-medium">{dewarpResult.shear_degrees}°</span>
|
||||||
|
</div>
|
||||||
|
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
|
||||||
|
<div>
|
||||||
|
<span className="text-gray-500">Methode:</span>{' '}
|
||||||
|
<span className="inline-flex items-center px-2 py-0.5 rounded-full text-xs font-medium bg-teal-100 text-teal-700 dark:bg-teal-900/40 dark:text-teal-300">
|
||||||
|
{METHOD_LABELS[dewarpResult.method_used] || dewarpResult.method_used}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
|
||||||
|
<div>
|
||||||
|
<span className="text-gray-500">Konfidenz:</span>{' '}
|
||||||
|
<span className="font-mono">{Math.round(dewarpResult.confidence * 100)}%</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Toggle */}
|
||||||
|
<div className="flex gap-3 mt-3">
|
||||||
|
<button
|
||||||
|
onClick={onToggleGrid}
|
||||||
|
className={`text-xs px-3 py-1 rounded-full border transition-colors ${
|
||||||
|
showGrid
|
||||||
|
? 'bg-teal-100 border-teal-300 text-teal-700 dark:bg-teal-900/40 dark:border-teal-600 dark:text-teal-300'
|
||||||
|
: 'border-gray-300 text-gray-500 dark:border-gray-600 dark:text-gray-400'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
Raster anzeigen
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Manual shear angle slider */}
|
||||||
|
{dewarpResult && (
|
||||||
|
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
||||||
|
<div className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">Scherwinkel (manuell)</div>
|
||||||
|
<div className="flex items-center gap-3">
|
||||||
|
<span className="text-xs text-gray-400 w-10 text-right">-2.0°</span>
|
||||||
|
<input
|
||||||
|
type="range"
|
||||||
|
min={-200}
|
||||||
|
max={200}
|
||||||
|
step={5}
|
||||||
|
value={Math.round(manualShear * 100)}
|
||||||
|
onChange={(e) => setManualShear(parseInt(e.target.value) / 100)}
|
||||||
|
className="flex-1 h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer dark:bg-gray-700 accent-teal-500"
|
||||||
|
/>
|
||||||
|
<span className="text-xs text-gray-400 w-10">+2.0°</span>
|
||||||
|
<span className="font-mono text-sm w-16 text-right">{manualShear.toFixed(2)}°</span>
|
||||||
|
<button
|
||||||
|
onClick={() => onManualDewarp(manualShear)}
|
||||||
|
disabled={isApplying}
|
||||||
|
className="px-3 py-1.5 text-sm bg-teal-600 text-white rounded-md hover:bg-teal-700 disabled:opacity-50 transition-colors"
|
||||||
|
>
|
||||||
|
{isApplying ? '...' : 'Anwenden'}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
<p className="text-xs text-gray-400 mt-1">
|
||||||
|
Scherung der vertikalen Achse in Grad. Positiv = Spalten nach rechts kippen, negativ = nach links.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Ground Truth */}
|
||||||
|
{dewarpResult && (
|
||||||
|
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
||||||
|
<div className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||||
|
Spalten vertikal ausgerichtet?
|
||||||
|
</div>
|
||||||
|
<p className="text-xs text-gray-400 mb-2">Pruefen ob die Spaltenraender jetzt senkrecht zum Raster stehen.</p>
|
||||||
|
{!gtSaved ? (
|
||||||
|
<div className="space-y-3">
|
||||||
|
<div className="flex gap-2">
|
||||||
|
<button
|
||||||
|
onClick={() => handleGroundTruth(true)}
|
||||||
|
className={`px-4 py-1.5 rounded-md text-sm font-medium transition-colors ${
|
||||||
|
gtFeedback === 'correct'
|
||||||
|
? 'bg-green-100 text-green-700 ring-2 ring-green-400'
|
||||||
|
: 'bg-gray-100 text-gray-600 hover:bg-green-50 dark:bg-gray-700 dark:text-gray-300'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
Ja
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={() => handleGroundTruth(false)}
|
||||||
|
className={`px-4 py-1.5 rounded-md text-sm font-medium transition-colors ${
|
||||||
|
gtFeedback === 'incorrect'
|
||||||
|
? 'bg-red-100 text-red-700 ring-2 ring-red-400'
|
||||||
|
: 'bg-gray-100 text-gray-600 hover:bg-red-50 dark:bg-gray-700 dark:text-gray-300'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
Nein
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
{gtFeedback === 'incorrect' && (
|
||||||
|
<div className="space-y-2">
|
||||||
|
<textarea
|
||||||
|
value={gtNotes}
|
||||||
|
onChange={(e) => setGtNotes(e.target.value)}
|
||||||
|
placeholder="Notizen zur Korrektur..."
|
||||||
|
className="w-full text-sm border border-gray-300 dark:border-gray-600 rounded-md p-2 bg-white dark:bg-gray-900 text-gray-800 dark:text-gray-200"
|
||||||
|
rows={2}
|
||||||
|
/>
|
||||||
|
<button
|
||||||
|
onClick={handleGroundTruthIncorrect}
|
||||||
|
className="text-sm px-3 py-1 bg-red-600 text-white rounded-md hover:bg-red-700 transition-colors"
|
||||||
|
>
|
||||||
|
Feedback speichern
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<div className="text-sm text-green-600 dark:text-green-400">
|
||||||
|
Feedback gespeichert
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Next button */}
|
||||||
|
{dewarpResult && (
|
||||||
|
<div className="flex justify-end">
|
||||||
|
<button
|
||||||
|
onClick={onNext}
|
||||||
|
className="px-6 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium transition-colors"
|
||||||
|
>
|
||||||
|
Uebernehmen & Weiter →
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
143
admin-lehrer/components/ocr-pipeline/ImageCompareView.tsx
Normal file
143
admin-lehrer/components/ocr-pipeline/ImageCompareView.tsx
Normal file
@@ -0,0 +1,143 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { useState } from 'react'
|
||||||
|
|
||||||
|
const A4_WIDTH_MM = 210
|
||||||
|
const A4_HEIGHT_MM = 297
|
||||||
|
|
||||||
|
interface ImageCompareViewProps {
|
||||||
|
originalUrl: string | null
|
||||||
|
deskewedUrl: string | null
|
||||||
|
showGrid: boolean
|
||||||
|
showGridLeft?: boolean
|
||||||
|
showBinarized: boolean
|
||||||
|
binarizedUrl: string | null
|
||||||
|
leftLabel?: string
|
||||||
|
rightLabel?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
function MmGridOverlay() {
|
||||||
|
const lines: React.ReactNode[] = []
|
||||||
|
|
||||||
|
// Vertical lines every 10mm
|
||||||
|
for (let mm = 0; mm <= A4_WIDTH_MM; mm += 10) {
|
||||||
|
const x = (mm / A4_WIDTH_MM) * 100
|
||||||
|
const is50 = mm % 50 === 0
|
||||||
|
lines.push(
|
||||||
|
<line
|
||||||
|
key={`v-${mm}`}
|
||||||
|
x1={x} y1={0} x2={x} y2={100}
|
||||||
|
stroke={is50 ? 'rgba(59, 130, 246, 0.4)' : 'rgba(59, 130, 246, 0.15)'}
|
||||||
|
strokeWidth={is50 ? 0.12 : 0.05}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
// Label every 50mm
|
||||||
|
if (is50 && mm > 0) {
|
||||||
|
lines.push(
|
||||||
|
<text key={`vl-${mm}`} x={x} y={1.2} fill="rgba(59,130,246,0.6)" fontSize="1.2" textAnchor="middle">
|
||||||
|
{mm}
|
||||||
|
</text>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Horizontal lines every 10mm
|
||||||
|
for (let mm = 0; mm <= A4_HEIGHT_MM; mm += 10) {
|
||||||
|
const y = (mm / A4_HEIGHT_MM) * 100
|
||||||
|
const is50 = mm % 50 === 0
|
||||||
|
lines.push(
|
||||||
|
<line
|
||||||
|
key={`h-${mm}`}
|
||||||
|
x1={0} y1={y} x2={100} y2={y}
|
||||||
|
stroke={is50 ? 'rgba(59, 130, 246, 0.4)' : 'rgba(59, 130, 246, 0.15)'}
|
||||||
|
strokeWidth={is50 ? 0.12 : 0.05}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
if (is50 && mm > 0) {
|
||||||
|
lines.push(
|
||||||
|
<text key={`hl-${mm}`} x={0.5} y={y + 0.6} fill="rgba(59,130,246,0.6)" fontSize="1.2">
|
||||||
|
{mm}
|
||||||
|
</text>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<svg
|
||||||
|
viewBox="0 0 100 100"
|
||||||
|
preserveAspectRatio="none"
|
||||||
|
className="absolute inset-0 w-full h-full pointer-events-none"
|
||||||
|
style={{ zIndex: 10 }}
|
||||||
|
>
|
||||||
|
<g style={{ pointerEvents: 'none' }}>{lines}</g>
|
||||||
|
</svg>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
export function ImageCompareView({
|
||||||
|
originalUrl,
|
||||||
|
deskewedUrl,
|
||||||
|
showGrid,
|
||||||
|
showGridLeft,
|
||||||
|
showBinarized,
|
||||||
|
binarizedUrl,
|
||||||
|
leftLabel,
|
||||||
|
rightLabel,
|
||||||
|
}: ImageCompareViewProps) {
|
||||||
|
const [leftError, setLeftError] = useState(false)
|
||||||
|
const [rightError, setRightError] = useState(false)
|
||||||
|
|
||||||
|
const rightUrl = showBinarized && binarizedUrl ? binarizedUrl : deskewedUrl
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="grid grid-cols-1 lg:grid-cols-2 gap-4">
|
||||||
|
{/* Left: Original */}
|
||||||
|
<div className="space-y-2">
|
||||||
|
<h3 className="text-sm font-medium text-gray-500 dark:text-gray-400">{leftLabel || 'Original (unbearbeitet)'}</h3>
|
||||||
|
<div className="relative bg-gray-100 dark:bg-gray-900 rounded-lg overflow-hidden border border-gray-200 dark:border-gray-700"
|
||||||
|
style={{ aspectRatio: '210/297' }}>
|
||||||
|
{originalUrl && !leftError ? (
|
||||||
|
<>
|
||||||
|
<img
|
||||||
|
src={originalUrl}
|
||||||
|
alt="Original Scan"
|
||||||
|
className="w-full h-full object-contain"
|
||||||
|
onError={() => setLeftError(true)}
|
||||||
|
/>
|
||||||
|
{showGridLeft && <MmGridOverlay />}
|
||||||
|
</>
|
||||||
|
) : (
|
||||||
|
<div className="flex items-center justify-center h-full text-gray-400">
|
||||||
|
{leftError ? 'Fehler beim Laden' : 'Noch kein Bild'}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Right: Deskewed with Grid */}
|
||||||
|
<div className="space-y-2">
|
||||||
|
<h3 className="text-sm font-medium text-gray-500 dark:text-gray-400">
|
||||||
|
{rightLabel || `${showBinarized ? 'Binarisiert' : 'Begradigt'}${showGrid ? ' + Raster (mm)' : ''}`}
|
||||||
|
</h3>
|
||||||
|
<div className="relative bg-gray-100 dark:bg-gray-900 rounded-lg overflow-hidden border border-gray-200 dark:border-gray-700"
|
||||||
|
style={{ aspectRatio: '210/297' }}>
|
||||||
|
{rightUrl && !rightError ? (
|
||||||
|
<>
|
||||||
|
<img
|
||||||
|
src={rightUrl}
|
||||||
|
alt="Begradigtes Bild"
|
||||||
|
className="w-full h-full object-contain"
|
||||||
|
onError={() => setRightError(true)}
|
||||||
|
/>
|
||||||
|
{showGrid && <MmGridOverlay />}
|
||||||
|
</>
|
||||||
|
) : (
|
||||||
|
<div className="flex items-center justify-center h-full text-gray-400">
|
||||||
|
{rightError ? 'Fehler beim Laden' : 'Begradigung laeuft...'}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
359
admin-lehrer/components/ocr-pipeline/ManualColumnEditor.tsx
Normal file
359
admin-lehrer/components/ocr-pipeline/ManualColumnEditor.tsx
Normal file
@@ -0,0 +1,359 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { useCallback, useEffect, useRef, useState } from 'react'
|
||||||
|
import type { ColumnTypeKey, PageRegion } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||||
|
|
||||||
|
const COLUMN_TYPES: { value: ColumnTypeKey; label: string }[] = [
|
||||||
|
{ value: 'column_en', label: 'EN' },
|
||||||
|
{ value: 'column_de', label: 'DE' },
|
||||||
|
{ value: 'column_example', label: 'Beispiel' },
|
||||||
|
{ value: 'column_text', label: 'Text' },
|
||||||
|
{ value: 'page_ref', label: 'Seite' },
|
||||||
|
{ value: 'column_marker', label: 'Marker' },
|
||||||
|
{ value: 'column_ignore', label: 'Ignorieren' },
|
||||||
|
]
|
||||||
|
|
||||||
|
const TYPE_OVERLAY_COLORS: Record<string, string> = {
|
||||||
|
column_en: 'rgba(59, 130, 246, 0.12)',
|
||||||
|
column_de: 'rgba(34, 197, 94, 0.12)',
|
||||||
|
column_example: 'rgba(249, 115, 22, 0.12)',
|
||||||
|
column_text: 'rgba(6, 182, 212, 0.12)',
|
||||||
|
page_ref: 'rgba(168, 85, 247, 0.12)',
|
||||||
|
column_marker: 'rgba(239, 68, 68, 0.12)',
|
||||||
|
column_ignore: 'rgba(128, 128, 128, 0.06)',
|
||||||
|
}
|
||||||
|
|
||||||
|
const TYPE_BADGE_COLORS: Record<string, string> = {
|
||||||
|
column_en: 'bg-blue-100 text-blue-700 dark:bg-blue-900/30 dark:text-blue-400',
|
||||||
|
column_de: 'bg-green-100 text-green-700 dark:bg-green-900/30 dark:text-green-400',
|
||||||
|
column_example: 'bg-orange-100 text-orange-700 dark:bg-orange-900/30 dark:text-orange-400',
|
||||||
|
column_text: 'bg-cyan-100 text-cyan-700 dark:bg-cyan-900/30 dark:text-cyan-400',
|
||||||
|
page_ref: 'bg-purple-100 text-purple-700 dark:bg-purple-900/30 dark:text-purple-400',
|
||||||
|
column_marker: 'bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400',
|
||||||
|
column_ignore: 'bg-gray-100 text-gray-500 dark:bg-gray-700/30 dark:text-gray-500',
|
||||||
|
}
|
||||||
|
|
||||||
|
// Default column type sequence for newly created columns
|
||||||
|
const DEFAULT_TYPE_SEQUENCE: ColumnTypeKey[] = [
|
||||||
|
'page_ref', 'column_en', 'column_de', 'column_example', 'column_text',
|
||||||
|
]
|
||||||
|
|
||||||
|
const MIN_DIVIDER_DISTANCE_PERCENT = 2 // Minimum 2% apart
|
||||||
|
|
||||||
|
interface ManualColumnEditorProps {
|
||||||
|
imageUrl: string
|
||||||
|
imageWidth: number
|
||||||
|
imageHeight: number
|
||||||
|
onApply: (columns: PageRegion[]) => void
|
||||||
|
onCancel: () => void
|
||||||
|
applying: boolean
|
||||||
|
mode?: 'manual' | 'ground-truth'
|
||||||
|
layout?: 'two-column' | 'stacked'
|
||||||
|
initialDividers?: number[]
|
||||||
|
initialColumnTypes?: ColumnTypeKey[]
|
||||||
|
}
|
||||||
|
|
||||||
|
export function ManualColumnEditor({
|
||||||
|
imageUrl,
|
||||||
|
imageWidth,
|
||||||
|
imageHeight,
|
||||||
|
onApply,
|
||||||
|
onCancel,
|
||||||
|
applying,
|
||||||
|
mode = 'manual',
|
||||||
|
layout = 'two-column',
|
||||||
|
initialDividers,
|
||||||
|
initialColumnTypes,
|
||||||
|
}: ManualColumnEditorProps) {
|
||||||
|
const containerRef = useRef<HTMLDivElement>(null)
|
||||||
|
const [dividers, setDividers] = useState<number[]>(initialDividers ?? [])
|
||||||
|
const [columnTypes, setColumnTypes] = useState<ColumnTypeKey[]>(initialColumnTypes ?? [])
|
||||||
|
const [dragging, setDragging] = useState<number | null>(null)
|
||||||
|
const [imageLoaded, setImageLoaded] = useState(false)
|
||||||
|
|
||||||
|
const isGT = mode === 'ground-truth'
|
||||||
|
|
||||||
|
// Sync columnTypes length when dividers change
|
||||||
|
useEffect(() => {
|
||||||
|
const numColumns = dividers.length + 1
|
||||||
|
setColumnTypes(prev => {
|
||||||
|
if (prev.length === numColumns) return prev
|
||||||
|
const next = [...prev]
|
||||||
|
while (next.length < numColumns) {
|
||||||
|
const idx = next.length
|
||||||
|
next.push(DEFAULT_TYPE_SEQUENCE[idx] || 'column_text')
|
||||||
|
}
|
||||||
|
while (next.length > numColumns) {
|
||||||
|
next.pop()
|
||||||
|
}
|
||||||
|
return next
|
||||||
|
})
|
||||||
|
}, [dividers.length])
|
||||||
|
|
||||||
|
const getXPercent = useCallback((clientX: number): number => {
|
||||||
|
if (!containerRef.current) return 0
|
||||||
|
const rect = containerRef.current.getBoundingClientRect()
|
||||||
|
const pct = ((clientX - rect.left) / rect.width) * 100
|
||||||
|
return Math.max(0, Math.min(100, pct))
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
const canPlaceDivider = useCallback((xPct: number, excludeIndex?: number): boolean => {
|
||||||
|
for (let i = 0; i < dividers.length; i++) {
|
||||||
|
if (i === excludeIndex) continue
|
||||||
|
if (Math.abs(dividers[i] - xPct) < MIN_DIVIDER_DISTANCE_PERCENT) return false
|
||||||
|
}
|
||||||
|
return xPct > MIN_DIVIDER_DISTANCE_PERCENT && xPct < (100 - MIN_DIVIDER_DISTANCE_PERCENT)
|
||||||
|
}, [dividers])
|
||||||
|
|
||||||
|
// Click on image to add a divider
|
||||||
|
const handleImageClick = useCallback((e: React.MouseEvent) => {
|
||||||
|
if (dragging !== null) return
|
||||||
|
// Don't add if clicking on a divider handle
|
||||||
|
if ((e.target as HTMLElement).dataset.divider) return
|
||||||
|
|
||||||
|
const xPct = getXPercent(e.clientX)
|
||||||
|
if (!canPlaceDivider(xPct)) return
|
||||||
|
|
||||||
|
setDividers(prev => [...prev, xPct].sort((a, b) => a - b))
|
||||||
|
}, [dragging, getXPercent, canPlaceDivider])
|
||||||
|
|
||||||
|
// Drag handlers
|
||||||
|
const handleDividerMouseDown = useCallback((e: React.MouseEvent, index: number) => {
|
||||||
|
e.stopPropagation()
|
||||||
|
e.preventDefault()
|
||||||
|
setDragging(index)
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (dragging === null) return
|
||||||
|
|
||||||
|
const handleMouseMove = (e: MouseEvent) => {
|
||||||
|
const xPct = getXPercent(e.clientX)
|
||||||
|
if (canPlaceDivider(xPct, dragging)) {
|
||||||
|
setDividers(prev => {
|
||||||
|
const next = [...prev]
|
||||||
|
next[dragging] = xPct
|
||||||
|
return next.sort((a, b) => a - b)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleMouseUp = () => {
|
||||||
|
setDragging(null)
|
||||||
|
}
|
||||||
|
|
||||||
|
window.addEventListener('mousemove', handleMouseMove)
|
||||||
|
window.addEventListener('mouseup', handleMouseUp)
|
||||||
|
return () => {
|
||||||
|
window.removeEventListener('mousemove', handleMouseMove)
|
||||||
|
window.removeEventListener('mouseup', handleMouseUp)
|
||||||
|
}
|
||||||
|
}, [dragging, getXPercent, canPlaceDivider])
|
||||||
|
|
||||||
|
const removeDivider = useCallback((index: number) => {
|
||||||
|
setDividers(prev => prev.filter((_, i) => i !== index))
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
const updateColumnType = useCallback((colIndex: number, type: ColumnTypeKey) => {
|
||||||
|
setColumnTypes(prev => {
|
||||||
|
const next = [...prev]
|
||||||
|
next[colIndex] = type
|
||||||
|
return next
|
||||||
|
})
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
const handleApply = useCallback(() => {
|
||||||
|
// Build PageRegion array from dividers
|
||||||
|
const sorted = [...dividers].sort((a, b) => a - b)
|
||||||
|
const columns: PageRegion[] = []
|
||||||
|
|
||||||
|
for (let i = 0; i <= sorted.length; i++) {
|
||||||
|
const leftPct = i === 0 ? 0 : sorted[i - 1]
|
||||||
|
const rightPct = i === sorted.length ? 100 : sorted[i]
|
||||||
|
const x = Math.round((leftPct / 100) * imageWidth)
|
||||||
|
const w = Math.round(((rightPct - leftPct) / 100) * imageWidth)
|
||||||
|
|
||||||
|
columns.push({
|
||||||
|
type: columnTypes[i] || 'column_text',
|
||||||
|
x,
|
||||||
|
y: 0,
|
||||||
|
width: w,
|
||||||
|
height: imageHeight,
|
||||||
|
classification_confidence: 1.0,
|
||||||
|
classification_method: 'manual',
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
onApply(columns)
|
||||||
|
}, [dividers, columnTypes, imageWidth, imageHeight, onApply])
|
||||||
|
|
||||||
|
// Compute column regions for overlay
|
||||||
|
const sorted = [...dividers].sort((a, b) => a - b)
|
||||||
|
const columnRegions = Array.from({ length: sorted.length + 1 }, (_, i) => ({
|
||||||
|
leftPct: i === 0 ? 0 : sorted[i - 1],
|
||||||
|
rightPct: i === sorted.length ? 100 : sorted[i],
|
||||||
|
type: columnTypes[i] || 'column_text',
|
||||||
|
}))
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="space-y-4">
|
||||||
|
{/* Layout: image + controls */}
|
||||||
|
<div className={layout === 'stacked' ? 'space-y-4' : 'grid grid-cols-2 gap-4'}>
|
||||||
|
{/* Left: Interactive image */}
|
||||||
|
<div>
|
||||||
|
<div className="flex items-center justify-between mb-1">
|
||||||
|
<div className="text-xs font-medium text-gray-500 dark:text-gray-400">
|
||||||
|
Klicken um Trennlinien zu setzen
|
||||||
|
</div>
|
||||||
|
<button
|
||||||
|
onClick={onCancel}
|
||||||
|
className="text-xs px-2 py-0.5 text-gray-500 hover:text-gray-700 dark:text-gray-400 dark:hover:text-gray-200"
|
||||||
|
>
|
||||||
|
Abbrechen
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
<div
|
||||||
|
ref={containerRef}
|
||||||
|
className="relative border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900 cursor-crosshair select-none"
|
||||||
|
onClick={handleImageClick}
|
||||||
|
>
|
||||||
|
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||||
|
<img
|
||||||
|
src={imageUrl}
|
||||||
|
alt="Entzerrtes Bild"
|
||||||
|
className="w-full h-auto block"
|
||||||
|
draggable={false}
|
||||||
|
onLoad={() => setImageLoaded(true)}
|
||||||
|
/>
|
||||||
|
|
||||||
|
{imageLoaded && (
|
||||||
|
<>
|
||||||
|
{/* Column overlays */}
|
||||||
|
{columnRegions.map((region, i) => (
|
||||||
|
<div
|
||||||
|
key={`col-${i}`}
|
||||||
|
className="absolute top-0 bottom-0 pointer-events-none"
|
||||||
|
style={{
|
||||||
|
left: `${region.leftPct}%`,
|
||||||
|
width: `${region.rightPct - region.leftPct}%`,
|
||||||
|
backgroundColor: TYPE_OVERLAY_COLORS[region.type] || 'rgba(128,128,128,0.08)',
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<span className="absolute top-1 left-1/2 -translate-x-1/2 text-[10px] font-medium text-gray-600 dark:text-gray-300 bg-white/80 dark:bg-gray-800/80 px-1 rounded">
|
||||||
|
{i + 1}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
|
||||||
|
{/* Divider lines */}
|
||||||
|
{sorted.map((xPct, i) => (
|
||||||
|
<div
|
||||||
|
key={`div-${i}`}
|
||||||
|
data-divider="true"
|
||||||
|
className="absolute top-0 bottom-0 group"
|
||||||
|
style={{
|
||||||
|
left: `${xPct}%`,
|
||||||
|
transform: 'translateX(-50%)',
|
||||||
|
width: '12px',
|
||||||
|
cursor: 'col-resize',
|
||||||
|
zIndex: 10,
|
||||||
|
}}
|
||||||
|
onMouseDown={(e) => handleDividerMouseDown(e, i)}
|
||||||
|
>
|
||||||
|
{/* Visible line */}
|
||||||
|
<div
|
||||||
|
data-divider="true"
|
||||||
|
className="absolute top-0 bottom-0 left-1/2 -translate-x-1/2 w-0.5 border-l-2 border-dashed border-red-500"
|
||||||
|
/>
|
||||||
|
{/* Delete button */}
|
||||||
|
<button
|
||||||
|
data-divider="true"
|
||||||
|
onClick={(e) => {
|
||||||
|
e.stopPropagation()
|
||||||
|
removeDivider(i)
|
||||||
|
}}
|
||||||
|
className="absolute top-2 left-1/2 -translate-x-1/2 w-4 h-4 bg-red-500 text-white rounded-full text-[10px] leading-none flex items-center justify-center opacity-0 group-hover:opacity-100 transition-opacity z-20"
|
||||||
|
title="Linie entfernen"
|
||||||
|
>
|
||||||
|
x
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Right: Column type assignment + actions */}
|
||||||
|
<div className="space-y-4">
|
||||||
|
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||||
|
Spaltentypen
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{dividers.length === 0 ? (
|
||||||
|
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-6 text-center">
|
||||||
|
<div className="text-3xl mb-2">👆</div>
|
||||||
|
<p className="text-sm text-gray-500 dark:text-gray-400">
|
||||||
|
Klicken Sie auf das Bild links, um vertikale Trennlinien zwischen den Spalten zu setzen.
|
||||||
|
</p>
|
||||||
|
<p className="text-xs text-gray-400 dark:text-gray-500 mt-2">
|
||||||
|
Linien koennen per Drag verschoben und per Hover geloescht werden.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-3">
|
||||||
|
<div className="text-sm text-gray-600 dark:text-gray-400">
|
||||||
|
<span className="font-medium text-gray-800 dark:text-gray-200">
|
||||||
|
{dividers.length} Linien = {dividers.length + 1} Spalten
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<div className="grid gap-2">
|
||||||
|
{columnRegions.map((region, i) => (
|
||||||
|
<div key={i} className="flex items-center gap-3">
|
||||||
|
<span className={`w-16 text-center px-2 py-0.5 rounded text-xs font-medium ${TYPE_BADGE_COLORS[region.type] || 'bg-gray-100 text-gray-600'}`}>
|
||||||
|
Spalte {i + 1}
|
||||||
|
</span>
|
||||||
|
<select
|
||||||
|
value={columnTypes[i] || 'column_text'}
|
||||||
|
onChange={(e) => updateColumnType(i, e.target.value as ColumnTypeKey)}
|
||||||
|
className="text-sm border border-gray-200 dark:border-gray-600 rounded px-2 py-1 bg-white dark:bg-gray-700 text-gray-800 dark:text-gray-200"
|
||||||
|
>
|
||||||
|
{COLUMN_TYPES.map(t => (
|
||||||
|
<option key={t.value} value={t.value}>{t.label}</option>
|
||||||
|
))}
|
||||||
|
</select>
|
||||||
|
<span className="text-xs text-gray-400 font-mono">
|
||||||
|
{Math.round(region.rightPct - region.leftPct)}%
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Action buttons */}
|
||||||
|
<div className="flex flex-col gap-2">
|
||||||
|
<button
|
||||||
|
onClick={handleApply}
|
||||||
|
disabled={dividers.length === 0 || applying}
|
||||||
|
className="w-full px-4 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors text-sm font-medium disabled:opacity-50 disabled:cursor-not-allowed"
|
||||||
|
>
|
||||||
|
{applying
|
||||||
|
? 'Wird gespeichert...'
|
||||||
|
: isGT
|
||||||
|
? `${dividers.length + 1} Spalten als Ground Truth speichern`
|
||||||
|
: `${dividers.length + 1} Spalten uebernehmen`}
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={() => setDividers([])}
|
||||||
|
disabled={dividers.length === 0}
|
||||||
|
className="text-xs px-3 py-2 text-gray-500 hover:text-gray-700 dark:text-gray-400 dark:hover:text-gray-200 disabled:opacity-50"
|
||||||
|
>
|
||||||
|
Alle Linien entfernen
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
53
admin-lehrer/components/ocr-pipeline/PipelineStepper.tsx
Normal file
53
admin-lehrer/components/ocr-pipeline/PipelineStepper.tsx
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { PipelineStep } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||||
|
|
||||||
|
interface PipelineStepperProps {
|
||||||
|
steps: PipelineStep[]
|
||||||
|
currentStep: number
|
||||||
|
onStepClick: (index: number) => void
|
||||||
|
}
|
||||||
|
|
||||||
|
export function PipelineStepper({ steps, currentStep, onStepClick }: PipelineStepperProps) {
|
||||||
|
return (
|
||||||
|
<div className="flex items-center justify-between px-4 py-3 bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700">
|
||||||
|
{steps.map((step, index) => {
|
||||||
|
const isActive = index === currentStep
|
||||||
|
const isCompleted = step.status === 'completed'
|
||||||
|
const isFailed = step.status === 'failed'
|
||||||
|
const isClickable = index <= currentStep || isCompleted
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div key={step.id} className="flex items-center">
|
||||||
|
{index > 0 && (
|
||||||
|
<div
|
||||||
|
className={`h-0.5 w-8 mx-1 ${
|
||||||
|
index <= currentStep ? 'bg-teal-400' : 'bg-gray-300 dark:bg-gray-600'
|
||||||
|
}`}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
|
<button
|
||||||
|
onClick={() => isClickable && onStepClick(index)}
|
||||||
|
disabled={!isClickable}
|
||||||
|
className={`flex items-center gap-1.5 px-3 py-1.5 rounded-full text-sm font-medium transition-all ${
|
||||||
|
isActive
|
||||||
|
? 'bg-teal-100 text-teal-700 dark:bg-teal-900/40 dark:text-teal-300 ring-2 ring-teal-400'
|
||||||
|
: isCompleted
|
||||||
|
? 'bg-green-100 text-green-700 dark:bg-green-900/40 dark:text-green-300'
|
||||||
|
: isFailed
|
||||||
|
? 'bg-red-100 text-red-700 dark:bg-red-900/40 dark:text-red-300'
|
||||||
|
: 'text-gray-400 dark:text-gray-500'
|
||||||
|
} ${isClickable ? 'cursor-pointer hover:opacity-80' : 'cursor-default'}`}
|
||||||
|
>
|
||||||
|
<span className="text-base">
|
||||||
|
{isCompleted ? '✓' : isFailed ? '✗' : step.icon}
|
||||||
|
</span>
|
||||||
|
<span className="hidden sm:inline">{step.name}</span>
|
||||||
|
<span className="sm:hidden">{index + 1}</span>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
341
admin-lehrer/components/ocr-pipeline/StepColumnDetection.tsx
Normal file
341
admin-lehrer/components/ocr-pipeline/StepColumnDetection.tsx
Normal file
@@ -0,0 +1,341 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { useCallback, useEffect, useState } from 'react'
|
||||||
|
import type { ColumnResult, ColumnGroundTruth, PageRegion } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||||
|
import { ColumnControls } from './ColumnControls'
|
||||||
|
import { ManualColumnEditor } from './ManualColumnEditor'
|
||||||
|
import type { ColumnTypeKey } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||||
|
|
||||||
|
const KLAUSUR_API = '/klausur-api'
|
||||||
|
|
||||||
|
type ViewMode = 'normal' | 'ground-truth' | 'manual'
|
||||||
|
|
||||||
|
interface StepColumnDetectionProps {
|
||||||
|
sessionId: string | null
|
||||||
|
onNext: () => void
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Convert PageRegion[] to divider percentages + column types for ManualColumnEditor */
|
||||||
|
function columnsToEditorState(
|
||||||
|
columns: PageRegion[],
|
||||||
|
imageWidth: number
|
||||||
|
): { dividers: number[]; columnTypes: ColumnTypeKey[] } {
|
||||||
|
if (!columns.length || !imageWidth) return { dividers: [], columnTypes: [] }
|
||||||
|
|
||||||
|
const sorted = [...columns].sort((a, b) => a.x - b.x)
|
||||||
|
const dividers: number[] = []
|
||||||
|
const columnTypes: ColumnTypeKey[] = sorted.map(c => c.type)
|
||||||
|
|
||||||
|
for (let i = 1; i < sorted.length; i++) {
|
||||||
|
const xPct = (sorted[i].x / imageWidth) * 100
|
||||||
|
dividers.push(xPct)
|
||||||
|
}
|
||||||
|
|
||||||
|
return { dividers, columnTypes }
|
||||||
|
}
|
||||||
|
|
||||||
|
export function StepColumnDetection({ sessionId, onNext }: StepColumnDetectionProps) {
|
||||||
|
const [columnResult, setColumnResult] = useState<ColumnResult | null>(null)
|
||||||
|
const [detecting, setDetecting] = useState(false)
|
||||||
|
const [error, setError] = useState<string | null>(null)
|
||||||
|
const [viewMode, setViewMode] = useState<ViewMode>('normal')
|
||||||
|
const [applying, setApplying] = useState(false)
|
||||||
|
const [imageDimensions, setImageDimensions] = useState<{ width: number; height: number } | null>(null)
|
||||||
|
const [savedGtColumns, setSavedGtColumns] = useState<PageRegion[] | null>(null)
|
||||||
|
|
||||||
|
// Fetch session info (image dimensions) + check for cached column result
|
||||||
|
useEffect(() => {
|
||||||
|
if (!sessionId || imageDimensions) return
|
||||||
|
|
||||||
|
const fetchSessionInfo = async () => {
|
||||||
|
try {
|
||||||
|
const infoRes = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
|
||||||
|
if (infoRes.ok) {
|
||||||
|
const info = await infoRes.json()
|
||||||
|
if (info.image_width && info.image_height) {
|
||||||
|
setImageDimensions({ width: info.image_width, height: info.image_height })
|
||||||
|
}
|
||||||
|
if (info.column_result) {
|
||||||
|
setColumnResult(info.column_result)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to fetch session info:', e)
|
||||||
|
}
|
||||||
|
|
||||||
|
// No cached result - run auto-detection
|
||||||
|
runAutoDetection()
|
||||||
|
}
|
||||||
|
|
||||||
|
fetchSessionInfo()
|
||||||
|
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||||
|
}, [sessionId])
|
||||||
|
|
||||||
|
// Load saved GT if exists
|
||||||
|
useEffect(() => {
|
||||||
|
if (!sessionId) return
|
||||||
|
const fetchGt = async () => {
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/ground-truth/columns`)
|
||||||
|
if (res.ok) {
|
||||||
|
const data = await res.json()
|
||||||
|
const corrected = data.columns_gt?.corrected_columns
|
||||||
|
if (corrected) setSavedGtColumns(corrected)
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// No saved GT - that's fine
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fetchGt()
|
||||||
|
}, [sessionId])
|
||||||
|
|
||||||
|
const runAutoDetection = useCallback(async () => {
|
||||||
|
if (!sessionId) return
|
||||||
|
setDetecting(true)
|
||||||
|
setError(null)
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/columns`, {
|
||||||
|
method: 'POST',
|
||||||
|
})
|
||||||
|
if (!res.ok) {
|
||||||
|
const err = await res.json().catch(() => ({ detail: res.statusText }))
|
||||||
|
throw new Error(err.detail || 'Spaltenerkennung fehlgeschlagen')
|
||||||
|
}
|
||||||
|
const data: ColumnResult = await res.json()
|
||||||
|
setColumnResult(data)
|
||||||
|
} catch (e) {
|
||||||
|
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||||
|
} finally {
|
||||||
|
setDetecting(false)
|
||||||
|
}
|
||||||
|
}, [sessionId])
|
||||||
|
|
||||||
|
const handleRerun = useCallback(() => {
|
||||||
|
runAutoDetection()
|
||||||
|
}, [runAutoDetection])
|
||||||
|
|
||||||
|
const handleGroundTruth = useCallback(async (gt: ColumnGroundTruth) => {
|
||||||
|
if (!sessionId) return
|
||||||
|
try {
|
||||||
|
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/ground-truth/columns`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify(gt),
|
||||||
|
})
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Ground truth save failed:', e)
|
||||||
|
}
|
||||||
|
}, [sessionId])
|
||||||
|
|
||||||
|
const handleManualApply = useCallback(async (columns: PageRegion[]) => {
|
||||||
|
if (!sessionId) return
|
||||||
|
setApplying(true)
|
||||||
|
setError(null)
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/columns/manual`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ columns }),
|
||||||
|
})
|
||||||
|
if (!res.ok) {
|
||||||
|
const err = await res.json().catch(() => ({ detail: res.statusText }))
|
||||||
|
throw new Error(err.detail || 'Manuelle Spalten konnten nicht gespeichert werden')
|
||||||
|
}
|
||||||
|
const data = await res.json()
|
||||||
|
setColumnResult({
|
||||||
|
columns: data.columns,
|
||||||
|
duration_seconds: data.duration_seconds ?? 0,
|
||||||
|
})
|
||||||
|
setViewMode('normal')
|
||||||
|
} catch (e) {
|
||||||
|
setError(e instanceof Error ? e.message : 'Fehler beim Speichern')
|
||||||
|
} finally {
|
||||||
|
setApplying(false)
|
||||||
|
}
|
||||||
|
}, [sessionId])
|
||||||
|
|
||||||
|
const handleGtApply = useCallback(async (columns: PageRegion[]) => {
|
||||||
|
if (!sessionId) return
|
||||||
|
setApplying(true)
|
||||||
|
setError(null)
|
||||||
|
try {
|
||||||
|
const gt: ColumnGroundTruth = {
|
||||||
|
is_correct: false,
|
||||||
|
corrected_columns: columns,
|
||||||
|
}
|
||||||
|
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/ground-truth/columns`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify(gt),
|
||||||
|
})
|
||||||
|
setSavedGtColumns(columns)
|
||||||
|
setViewMode('normal')
|
||||||
|
} catch (e) {
|
||||||
|
setError(e instanceof Error ? e.message : 'Fehler beim Speichern')
|
||||||
|
} finally {
|
||||||
|
setApplying(false)
|
||||||
|
}
|
||||||
|
}, [sessionId])
|
||||||
|
|
||||||
|
if (!sessionId) {
|
||||||
|
return (
|
||||||
|
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||||
|
<div className="text-5xl mb-4">📊</div>
|
||||||
|
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||||
|
Schritt 3: Spaltenerkennung
|
||||||
|
</h3>
|
||||||
|
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||||
|
Bitte zuerst Schritt 1 und 2 abschliessen.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
const dewarpedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/dewarped`
|
||||||
|
const overlayUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/columns-overlay`
|
||||||
|
|
||||||
|
// Pre-compute editor state from saved GT or auto columns for GT mode
|
||||||
|
const gtInitial = savedGtColumns
|
||||||
|
? columnsToEditorState(savedGtColumns, imageDimensions?.width ?? 1000)
|
||||||
|
: undefined
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="space-y-4">
|
||||||
|
{/* Loading indicator */}
|
||||||
|
{detecting && (
|
||||||
|
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
|
||||||
|
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
|
||||||
|
Spaltenerkennung laeuft...
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{viewMode === 'manual' ? (
|
||||||
|
/* Manual column editor - overwrites column_result */
|
||||||
|
<ManualColumnEditor
|
||||||
|
imageUrl={dewarpedUrl}
|
||||||
|
imageWidth={imageDimensions?.width ?? 1000}
|
||||||
|
imageHeight={imageDimensions?.height ?? 1400}
|
||||||
|
onApply={handleManualApply}
|
||||||
|
onCancel={() => setViewMode('normal')}
|
||||||
|
applying={applying}
|
||||||
|
mode="manual"
|
||||||
|
/>
|
||||||
|
) : viewMode === 'ground-truth' ? (
|
||||||
|
/* GT mode: auto result (left, readonly) + GT editor (right) */
|
||||||
|
<div className="grid grid-cols-2 gap-4">
|
||||||
|
{/* Left: Auto result (readonly overlay) */}
|
||||||
|
<div>
|
||||||
|
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||||
|
Auto-Ergebnis (readonly)
|
||||||
|
</div>
|
||||||
|
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900">
|
||||||
|
{columnResult ? (
|
||||||
|
// eslint-disable-next-line @next/next/no-img-element
|
||||||
|
<img
|
||||||
|
src={`${overlayUrl}?t=${Date.now()}`}
|
||||||
|
alt="Auto Spalten-Overlay"
|
||||||
|
className="w-full h-auto"
|
||||||
|
/>
|
||||||
|
) : (
|
||||||
|
<div className="aspect-[3/4] flex items-center justify-center text-gray-400 text-sm">
|
||||||
|
Keine Auto-Daten
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
{/* Auto column list */}
|
||||||
|
{columnResult && (
|
||||||
|
<div className="mt-2 space-y-1">
|
||||||
|
<div className="text-xs font-medium text-gray-500 dark:text-gray-400">
|
||||||
|
Auto: {columnResult.columns.length} Spalten
|
||||||
|
</div>
|
||||||
|
{columnResult.columns
|
||||||
|
.filter(c => c.type.startsWith('column') || c.type === 'page_ref')
|
||||||
|
.map((col, i) => (
|
||||||
|
<div key={i} className="text-xs text-gray-500 dark:text-gray-400 font-mono">
|
||||||
|
{i + 1}. {col.type} x={col.x} w={col.width}
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Right: GT editor */}
|
||||||
|
<div>
|
||||||
|
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||||
|
Ground Truth Editor
|
||||||
|
</div>
|
||||||
|
<ManualColumnEditor
|
||||||
|
imageUrl={dewarpedUrl}
|
||||||
|
imageWidth={imageDimensions?.width ?? 1000}
|
||||||
|
imageHeight={imageDimensions?.height ?? 1400}
|
||||||
|
onApply={handleGtApply}
|
||||||
|
onCancel={() => setViewMode('normal')}
|
||||||
|
applying={applying}
|
||||||
|
mode="ground-truth"
|
||||||
|
layout="stacked"
|
||||||
|
initialDividers={gtInitial?.dividers}
|
||||||
|
initialColumnTypes={gtInitial?.columnTypes}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
/* Normal mode: overlay (left) vs clean (right) */
|
||||||
|
<div className="grid grid-cols-2 gap-4">
|
||||||
|
<div>
|
||||||
|
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||||
|
Mit Spalten-Overlay
|
||||||
|
</div>
|
||||||
|
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900">
|
||||||
|
{columnResult ? (
|
||||||
|
// eslint-disable-next-line @next/next/no-img-element
|
||||||
|
<img
|
||||||
|
src={`${overlayUrl}?t=${Date.now()}`}
|
||||||
|
alt="Spalten-Overlay"
|
||||||
|
className="w-full h-auto"
|
||||||
|
/>
|
||||||
|
) : (
|
||||||
|
<div className="aspect-[3/4] flex items-center justify-center text-gray-400 text-sm">
|
||||||
|
{detecting ? 'Erkenne Spalten...' : 'Keine Daten'}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||||
|
Entzerrtes Bild
|
||||||
|
</div>
|
||||||
|
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900">
|
||||||
|
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||||
|
<img
|
||||||
|
src={dewarpedUrl}
|
||||||
|
alt="Entzerrt"
|
||||||
|
className="w-full h-auto"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Controls */}
|
||||||
|
{viewMode === 'normal' && (
|
||||||
|
<ColumnControls
|
||||||
|
columnResult={columnResult}
|
||||||
|
onRerun={handleRerun}
|
||||||
|
onManualMode={() => setViewMode('manual')}
|
||||||
|
onGtMode={() => setViewMode('ground-truth')}
|
||||||
|
onGroundTruth={handleGroundTruth}
|
||||||
|
onNext={onNext}
|
||||||
|
isDetecting={detecting}
|
||||||
|
savedGtColumns={savedGtColumns}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{error && (
|
||||||
|
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
|
||||||
|
{error}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
19
admin-lehrer/components/ocr-pipeline/StepCoordinates.tsx
Normal file
19
admin-lehrer/components/ocr-pipeline/StepCoordinates.tsx
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
export function StepCoordinates() {
|
||||||
|
return (
|
||||||
|
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||||
|
<div className="text-5xl mb-4">📍</div>
|
||||||
|
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||||
|
Schritt 5: Koordinatenzuweisung
|
||||||
|
</h3>
|
||||||
|
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||||
|
Exakte Positionszuweisung fuer jedes Wort auf der Seite.
|
||||||
|
Dieser Schritt wird in einer zukuenftigen Version implementiert.
|
||||||
|
</p>
|
||||||
|
<div className="mt-6 px-4 py-2 bg-amber-100 dark:bg-amber-900/30 text-amber-700 dark:text-amber-400 rounded-full text-sm font-medium">
|
||||||
|
Kommt bald
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
277
admin-lehrer/components/ocr-pipeline/StepDeskew.tsx
Normal file
277
admin-lehrer/components/ocr-pipeline/StepDeskew.tsx
Normal file
@@ -0,0 +1,277 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { useCallback, useEffect, useState } from 'react'
|
||||||
|
import type { DeskewGroundTruth, DeskewResult, SessionInfo } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||||
|
import { DeskewControls } from './DeskewControls'
|
||||||
|
import { ImageCompareView } from './ImageCompareView'
|
||||||
|
|
||||||
|
const KLAUSUR_API = '/klausur-api'
|
||||||
|
|
||||||
|
interface StepDeskewProps {
|
||||||
|
sessionId?: string | null
|
||||||
|
onNext: (sessionId: string) => void
|
||||||
|
}
|
||||||
|
|
||||||
|
export function StepDeskew({ sessionId: existingSessionId, onNext }: StepDeskewProps) {
|
||||||
|
const [session, setSession] = useState<SessionInfo | null>(null)
|
||||||
|
const [deskewResult, setDeskewResult] = useState<DeskewResult | null>(null)
|
||||||
|
const [uploading, setUploading] = useState(false)
|
||||||
|
const [deskewing, setDeskewing] = useState(false)
|
||||||
|
const [applying, setApplying] = useState(false)
|
||||||
|
const [showBinarized, setShowBinarized] = useState(false)
|
||||||
|
const [showGrid, setShowGrid] = useState(true)
|
||||||
|
const [error, setError] = useState<string | null>(null)
|
||||||
|
const [dragOver, setDragOver] = useState(false)
|
||||||
|
const [sessionName, setSessionName] = useState('')
|
||||||
|
|
||||||
|
// Reload session data when navigating back from a later step
|
||||||
|
useEffect(() => {
|
||||||
|
if (!existingSessionId || session) return
|
||||||
|
|
||||||
|
const loadSession = async () => {
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${existingSessionId}`)
|
||||||
|
if (!res.ok) return
|
||||||
|
const data = await res.json()
|
||||||
|
|
||||||
|
const sessionInfo: SessionInfo = {
|
||||||
|
session_id: data.session_id,
|
||||||
|
filename: data.filename,
|
||||||
|
image_width: data.image_width,
|
||||||
|
image_height: data.image_height,
|
||||||
|
original_image_url: `${KLAUSUR_API}${data.original_image_url}`,
|
||||||
|
}
|
||||||
|
setSession(sessionInfo)
|
||||||
|
|
||||||
|
// Reconstruct deskew result from session data
|
||||||
|
if (data.deskew_result) {
|
||||||
|
const dr: DeskewResult = {
|
||||||
|
...data.deskew_result,
|
||||||
|
deskewed_image_url: `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${existingSessionId}/image/deskewed`,
|
||||||
|
binarized_image_url: `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${existingSessionId}/image/binarized`,
|
||||||
|
}
|
||||||
|
setDeskewResult(dr)
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to reload session:', e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
loadSession()
|
||||||
|
}, [existingSessionId, session])
|
||||||
|
|
||||||
|
const handleUpload = useCallback(async (file: File) => {
|
||||||
|
setUploading(true)
|
||||||
|
setError(null)
|
||||||
|
setDeskewResult(null)
|
||||||
|
|
||||||
|
try {
|
||||||
|
const formData = new FormData()
|
||||||
|
formData.append('file', file)
|
||||||
|
if (sessionName.trim()) {
|
||||||
|
formData.append('name', sessionName.trim())
|
||||||
|
}
|
||||||
|
|
||||||
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`, {
|
||||||
|
method: 'POST',
|
||||||
|
body: formData,
|
||||||
|
})
|
||||||
|
|
||||||
|
if (!res.ok) {
|
||||||
|
const err = await res.json().catch(() => ({ detail: res.statusText }))
|
||||||
|
throw new Error(err.detail || 'Upload fehlgeschlagen')
|
||||||
|
}
|
||||||
|
|
||||||
|
const data: SessionInfo = await res.json()
|
||||||
|
// Prepend API prefix to relative URLs
|
||||||
|
data.original_image_url = `${KLAUSUR_API}${data.original_image_url}`
|
||||||
|
setSession(data)
|
||||||
|
|
||||||
|
// Auto-trigger deskew
|
||||||
|
setDeskewing(true)
|
||||||
|
const deskewRes = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${data.session_id}/deskew`, {
|
||||||
|
method: 'POST',
|
||||||
|
})
|
||||||
|
|
||||||
|
if (!deskewRes.ok) {
|
||||||
|
throw new Error('Begradigung fehlgeschlagen')
|
||||||
|
}
|
||||||
|
|
||||||
|
const deskewData: DeskewResult = await deskewRes.json()
|
||||||
|
deskewData.deskewed_image_url = `${KLAUSUR_API}${deskewData.deskewed_image_url}`
|
||||||
|
deskewData.binarized_image_url = `${KLAUSUR_API}${deskewData.binarized_image_url}`
|
||||||
|
setDeskewResult(deskewData)
|
||||||
|
} catch (e) {
|
||||||
|
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||||
|
} finally {
|
||||||
|
setUploading(false)
|
||||||
|
setDeskewing(false)
|
||||||
|
}
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
const handleManualDeskew = useCallback(async (angle: number) => {
|
||||||
|
if (!session) return
|
||||||
|
setApplying(true)
|
||||||
|
setError(null)
|
||||||
|
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${session.session_id}/deskew/manual`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ angle }),
|
||||||
|
})
|
||||||
|
|
||||||
|
if (!res.ok) throw new Error('Manuelle Begradigung fehlgeschlagen')
|
||||||
|
|
||||||
|
const data = await res.json()
|
||||||
|
setDeskewResult((prev) =>
|
||||||
|
prev
|
||||||
|
? {
|
||||||
|
...prev,
|
||||||
|
angle_applied: data.angle_applied,
|
||||||
|
method_used: data.method_used,
|
||||||
|
// Force reload by appending timestamp
|
||||||
|
deskewed_image_url: `${KLAUSUR_API}${data.deskewed_image_url}?t=${Date.now()}`,
|
||||||
|
}
|
||||||
|
: null,
|
||||||
|
)
|
||||||
|
} catch (e) {
|
||||||
|
setError(e instanceof Error ? e.message : 'Fehler')
|
||||||
|
} finally {
|
||||||
|
setApplying(false)
|
||||||
|
}
|
||||||
|
}, [session])
|
||||||
|
|
||||||
|
const handleGroundTruth = useCallback(async (gt: DeskewGroundTruth) => {
|
||||||
|
if (!session) return
|
||||||
|
try {
|
||||||
|
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${session.session_id}/ground-truth/deskew`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify(gt),
|
||||||
|
})
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Ground truth save failed:', e)
|
||||||
|
}
|
||||||
|
}, [session])
|
||||||
|
|
||||||
|
const handleDrop = useCallback((e: React.DragEvent) => {
|
||||||
|
e.preventDefault()
|
||||||
|
setDragOver(false)
|
||||||
|
const file = e.dataTransfer.files[0]
|
||||||
|
if (file) handleUpload(file)
|
||||||
|
}, [handleUpload])
|
||||||
|
|
||||||
|
const handleFileInput = useCallback((e: React.ChangeEvent<HTMLInputElement>) => {
|
||||||
|
const file = e.target.files?.[0]
|
||||||
|
if (file) handleUpload(file)
|
||||||
|
}, [handleUpload])
|
||||||
|
|
||||||
|
// Upload area (no session yet)
|
||||||
|
if (!session) {
|
||||||
|
return (
|
||||||
|
<div className="space-y-4">
|
||||||
|
{/* Session name input */}
|
||||||
|
<div>
|
||||||
|
<label className="block text-sm font-medium text-gray-600 dark:text-gray-400 mb-1">
|
||||||
|
Session-Name (optional)
|
||||||
|
</label>
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
value={sessionName}
|
||||||
|
onChange={(e) => setSessionName(e.target.value)}
|
||||||
|
placeholder="z.B. Unit 3 Seite 42"
|
||||||
|
className="w-full max-w-sm px-3 py-2 text-sm border rounded-lg dark:bg-gray-800 dark:border-gray-600 dark:text-gray-200 focus:outline-none focus:ring-2 focus:ring-teal-500"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div
|
||||||
|
onDragOver={(e) => { e.preventDefault(); setDragOver(true) }}
|
||||||
|
onDragLeave={() => setDragOver(false)}
|
||||||
|
onDrop={handleDrop}
|
||||||
|
className={`border-2 border-dashed rounded-xl p-12 text-center transition-colors ${
|
||||||
|
dragOver
|
||||||
|
? 'border-teal-400 bg-teal-50 dark:bg-teal-900/20'
|
||||||
|
: 'border-gray-300 dark:border-gray-600 hover:border-teal-400'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
{uploading ? (
|
||||||
|
<div className="text-gray-500">
|
||||||
|
<div className="animate-spin inline-block w-8 h-8 border-2 border-teal-500 border-t-transparent rounded-full mb-3" />
|
||||||
|
<p>Wird hochgeladen...</p>
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<>
|
||||||
|
<div className="text-4xl mb-3">📄</div>
|
||||||
|
<p className="text-gray-600 dark:text-gray-400 mb-2">
|
||||||
|
PDF oder Bild hierher ziehen
|
||||||
|
</p>
|
||||||
|
<p className="text-sm text-gray-400 mb-4">oder</p>
|
||||||
|
<label className="inline-block px-4 py-2 bg-teal-600 text-white rounded-lg cursor-pointer hover:bg-teal-700 transition-colors">
|
||||||
|
Datei auswaehlen
|
||||||
|
<input
|
||||||
|
type="file"
|
||||||
|
accept=".pdf,.png,.jpg,.jpeg,.tiff,.tif"
|
||||||
|
onChange={handleFileInput}
|
||||||
|
className="hidden"
|
||||||
|
/>
|
||||||
|
</label>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
{error && (
|
||||||
|
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
|
||||||
|
{error}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Session active: show comparison + controls
|
||||||
|
return (
|
||||||
|
<div className="space-y-4">
|
||||||
|
{/* Filename */}
|
||||||
|
<div className="text-sm text-gray-500 dark:text-gray-400">
|
||||||
|
Datei: <span className="font-medium text-gray-700 dark:text-gray-300">{session.filename}</span>
|
||||||
|
{' '}({session.image_width} x {session.image_height} px)
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Loading indicator */}
|
||||||
|
{deskewing && (
|
||||||
|
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
|
||||||
|
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
|
||||||
|
Begradigung laeuft (beide Methoden)...
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Image comparison */}
|
||||||
|
<ImageCompareView
|
||||||
|
originalUrl={session.original_image_url}
|
||||||
|
deskewedUrl={deskewResult?.deskewed_image_url ?? null}
|
||||||
|
showGrid={showGrid}
|
||||||
|
showBinarized={showBinarized}
|
||||||
|
binarizedUrl={deskewResult?.binarized_image_url ?? null}
|
||||||
|
/>
|
||||||
|
|
||||||
|
{/* Controls */}
|
||||||
|
<DeskewControls
|
||||||
|
deskewResult={deskewResult}
|
||||||
|
showBinarized={showBinarized}
|
||||||
|
onToggleBinarized={() => setShowBinarized((v) => !v)}
|
||||||
|
showGrid={showGrid}
|
||||||
|
onToggleGrid={() => setShowGrid((v) => !v)}
|
||||||
|
onManualDeskew={handleManualDeskew}
|
||||||
|
onGroundTruth={handleGroundTruth}
|
||||||
|
onNext={() => session && onNext(session.session_id)}
|
||||||
|
isApplying={applying}
|
||||||
|
/>
|
||||||
|
|
||||||
|
{error && (
|
||||||
|
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
|
||||||
|
{error}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
151
admin-lehrer/components/ocr-pipeline/StepDewarp.tsx
Normal file
151
admin-lehrer/components/ocr-pipeline/StepDewarp.tsx
Normal file
@@ -0,0 +1,151 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { useCallback, useEffect, useState } from 'react'
|
||||||
|
import type { DewarpResult, DewarpGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||||
|
import { DewarpControls } from './DewarpControls'
|
||||||
|
import { ImageCompareView } from './ImageCompareView'
|
||||||
|
|
||||||
|
const KLAUSUR_API = '/klausur-api'
|
||||||
|
|
||||||
|
interface StepDewarpProps {
|
||||||
|
sessionId: string | null
|
||||||
|
onNext: () => void
|
||||||
|
}
|
||||||
|
|
||||||
|
export function StepDewarp({ sessionId, onNext }: StepDewarpProps) {
|
||||||
|
const [dewarpResult, setDewarpResult] = useState<DewarpResult | null>(null)
|
||||||
|
const [dewarping, setDewarping] = useState(false)
|
||||||
|
const [applying, setApplying] = useState(false)
|
||||||
|
const [showGrid, setShowGrid] = useState(true)
|
||||||
|
const [error, setError] = useState<string | null>(null)
|
||||||
|
|
||||||
|
// Auto-trigger dewarp when component mounts with a sessionId
|
||||||
|
useEffect(() => {
|
||||||
|
if (!sessionId || dewarpResult) return
|
||||||
|
|
||||||
|
const runDewarp = async () => {
|
||||||
|
setDewarping(true)
|
||||||
|
setError(null)
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/dewarp`, {
|
||||||
|
method: 'POST',
|
||||||
|
})
|
||||||
|
if (!res.ok) {
|
||||||
|
const err = await res.json().catch(() => ({ detail: res.statusText }))
|
||||||
|
throw new Error(err.detail || 'Entzerrung fehlgeschlagen')
|
||||||
|
}
|
||||||
|
const data: DewarpResult = await res.json()
|
||||||
|
data.dewarped_image_url = `${KLAUSUR_API}${data.dewarped_image_url}`
|
||||||
|
setDewarpResult(data)
|
||||||
|
} catch (e) {
|
||||||
|
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||||
|
} finally {
|
||||||
|
setDewarping(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
runDewarp()
|
||||||
|
}, [sessionId, dewarpResult])
|
||||||
|
|
||||||
|
const handleManualDewarp = useCallback(async (shearDegrees: number) => {
|
||||||
|
if (!sessionId) return
|
||||||
|
setApplying(true)
|
||||||
|
setError(null)
|
||||||
|
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/dewarp/manual`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ shear_degrees: shearDegrees }),
|
||||||
|
})
|
||||||
|
if (!res.ok) throw new Error('Manuelle Entzerrung fehlgeschlagen')
|
||||||
|
|
||||||
|
const data = await res.json()
|
||||||
|
setDewarpResult((prev) =>
|
||||||
|
prev
|
||||||
|
? {
|
||||||
|
...prev,
|
||||||
|
method_used: data.method_used,
|
||||||
|
shear_degrees: data.shear_degrees,
|
||||||
|
dewarped_image_url: `${KLAUSUR_API}${data.dewarped_image_url}?t=${Date.now()}`,
|
||||||
|
}
|
||||||
|
: null,
|
||||||
|
)
|
||||||
|
} catch (e) {
|
||||||
|
setError(e instanceof Error ? e.message : 'Fehler')
|
||||||
|
} finally {
|
||||||
|
setApplying(false)
|
||||||
|
}
|
||||||
|
}, [sessionId])
|
||||||
|
|
||||||
|
const handleGroundTruth = useCallback(async (gt: DewarpGroundTruth) => {
|
||||||
|
if (!sessionId) return
|
||||||
|
try {
|
||||||
|
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/ground-truth/dewarp`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify(gt),
|
||||||
|
})
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Ground truth save failed:', e)
|
||||||
|
}
|
||||||
|
}, [sessionId])
|
||||||
|
|
||||||
|
if (!sessionId) {
|
||||||
|
return (
|
||||||
|
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||||
|
<div className="text-5xl mb-4">🔧</div>
|
||||||
|
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||||
|
Schritt 2: Entzerrung (Dewarp)
|
||||||
|
</h3>
|
||||||
|
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||||
|
Bitte zuerst Schritt 1 (Begradigung) abschliessen.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
const deskewedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/deskewed`
|
||||||
|
const dewarpedUrl = dewarpResult?.dewarped_image_url ?? null
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="space-y-4">
|
||||||
|
{/* Loading indicator */}
|
||||||
|
{dewarping && (
|
||||||
|
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
|
||||||
|
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
|
||||||
|
Entzerrung laeuft (beide Methoden)...
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Image comparison: deskewed (left) vs dewarped (right) */}
|
||||||
|
<ImageCompareView
|
||||||
|
originalUrl={deskewedUrl}
|
||||||
|
deskewedUrl={dewarpedUrl}
|
||||||
|
showGrid={showGrid}
|
||||||
|
showGridLeft={showGrid}
|
||||||
|
showBinarized={false}
|
||||||
|
binarizedUrl={null}
|
||||||
|
leftLabel={`Begradigt (nach Deskew)${showGrid ? ' + Raster' : ''}`}
|
||||||
|
rightLabel={`Entzerrt${showGrid ? ' + Raster (mm)' : ''}`}
|
||||||
|
/>
|
||||||
|
|
||||||
|
{/* Controls */}
|
||||||
|
<DewarpControls
|
||||||
|
dewarpResult={dewarpResult}
|
||||||
|
showGrid={showGrid}
|
||||||
|
onToggleGrid={() => setShowGrid((v) => !v)}
|
||||||
|
onManualDewarp={handleManualDewarp}
|
||||||
|
onGroundTruth={handleGroundTruth}
|
||||||
|
onNext={onNext}
|
||||||
|
isApplying={applying}
|
||||||
|
/>
|
||||||
|
|
||||||
|
{error && (
|
||||||
|
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
|
||||||
|
{error}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
19
admin-lehrer/components/ocr-pipeline/StepGroundTruth.tsx
Normal file
19
admin-lehrer/components/ocr-pipeline/StepGroundTruth.tsx
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
export function StepGroundTruth() {
|
||||||
|
return (
|
||||||
|
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||||
|
<div className="text-5xl mb-4">✅</div>
|
||||||
|
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||||
|
Schritt 7: Ground Truth Validierung
|
||||||
|
</h3>
|
||||||
|
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||||
|
Gesamtpruefung der rekonstruierten Seite gegen das Original.
|
||||||
|
Dieser Schritt wird in einer zukuenftigen Version implementiert.
|
||||||
|
</p>
|
||||||
|
<div className="mt-6 px-4 py-2 bg-amber-100 dark:bg-amber-900/30 text-amber-700 dark:text-amber-400 rounded-full text-sm font-medium">
|
||||||
|
Kommt bald
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
19
admin-lehrer/components/ocr-pipeline/StepReconstruction.tsx
Normal file
19
admin-lehrer/components/ocr-pipeline/StepReconstruction.tsx
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
export function StepReconstruction() {
|
||||||
|
return (
|
||||||
|
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||||
|
<div className="text-5xl mb-4">🏗️</div>
|
||||||
|
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||||
|
Schritt 6: Seitenrekonstruktion
|
||||||
|
</h3>
|
||||||
|
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||||
|
Nachbau der Originalseite aus erkannten Woertern und Positionen.
|
||||||
|
Dieser Schritt wird in einer zukuenftigen Version implementiert.
|
||||||
|
</p>
|
||||||
|
<div className="mt-6 px-4 py-2 bg-amber-100 dark:bg-amber-900/30 text-amber-700 dark:text-amber-400 rounded-full text-sm font-medium">
|
||||||
|
Kommt bald
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
19
admin-lehrer/components/ocr-pipeline/StepWordRecognition.tsx
Normal file
19
admin-lehrer/components/ocr-pipeline/StepWordRecognition.tsx
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
export function StepWordRecognition() {
|
||||||
|
return (
|
||||||
|
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||||
|
<div className="text-5xl mb-4">🔤</div>
|
||||||
|
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||||
|
Schritt 4: Worterkennung
|
||||||
|
</h3>
|
||||||
|
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||||
|
OCR mit Bounding Boxes fuer jedes erkannte Wort.
|
||||||
|
Dieser Schritt wird in einer zukuenftigen Version implementiert.
|
||||||
|
</p>
|
||||||
|
<div className="mt-6 px-4 py-2 bg-amber-100 dark:bg-amber-900/30 text-amber-700 dark:text-amber-400 rounded-full text-sm font-medium">
|
||||||
|
Kommt bald
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
@@ -127,6 +127,15 @@ export const navigation: NavCategory[] = [
|
|||||||
audience: ['Entwickler', 'Data Scientists', 'Lehrer'],
|
audience: ['Entwickler', 'Data Scientists', 'Lehrer'],
|
||||||
subgroup: 'KI-Werkzeuge',
|
subgroup: 'KI-Werkzeuge',
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
id: 'ocr-pipeline',
|
||||||
|
name: 'OCR Pipeline',
|
||||||
|
href: '/ai/ocr-pipeline',
|
||||||
|
description: 'Schrittweise Seitenrekonstruktion',
|
||||||
|
purpose: 'Schrittweise Seitenrekonstruktion: Scan begradigen, Spalten erkennen, Woerter lokalisieren und die Seite Wort fuer Wort nachbauen. 6-Schritt-Pipeline mit Ground Truth Validierung.',
|
||||||
|
audience: ['Entwickler', 'Data Scientists'],
|
||||||
|
subgroup: 'KI-Werkzeuge',
|
||||||
|
},
|
||||||
{
|
{
|
||||||
id: 'test-quality',
|
id: 'test-quality',
|
||||||
name: 'Test Quality (BQAS)',
|
name: 'Test Quality (BQAS)',
|
||||||
|
|||||||
@@ -1,323 +0,0 @@
|
|||||||
# =========================================================
|
|
||||||
# BreakPilot Lehrer — KI-Lehrerplattform (Coolify)
|
|
||||||
# =========================================================
|
|
||||||
# Requires: breakpilot-core must be running
|
|
||||||
# Deployed via Coolify. SSL termination handled by Traefik.
|
|
||||||
# External services (managed separately in Coolify):
|
|
||||||
# - PostgreSQL, Qdrant, S3-compatible storage
|
|
||||||
# =========================================================
|
|
||||||
|
|
||||||
networks:
|
|
||||||
breakpilot-network:
|
|
||||||
external: true
|
|
||||||
name: breakpilot-network
|
|
||||||
|
|
||||||
volumes:
|
|
||||||
klausur_uploads:
|
|
||||||
eh_uploads:
|
|
||||||
ocr_labeling:
|
|
||||||
paddle_models:
|
|
||||||
lehrer_backend_data:
|
|
||||||
opensearch_data:
|
|
||||||
|
|
||||||
services:
|
|
||||||
|
|
||||||
# =========================================================
|
|
||||||
# FRONTEND
|
|
||||||
# =========================================================
|
|
||||||
admin-lehrer:
|
|
||||||
build:
|
|
||||||
context: ./admin-lehrer
|
|
||||||
dockerfile: Dockerfile
|
|
||||||
args:
|
|
||||||
NEXT_PUBLIC_API_URL: ${NEXT_PUBLIC_API_URL:-https://api-lehrer.breakpilot.ai}
|
|
||||||
NEXT_PUBLIC_OLD_ADMIN_URL: ${NEXT_PUBLIC_OLD_ADMIN_URL:-}
|
|
||||||
NEXT_PUBLIC_KLAUSUR_SERVICE_URL: ${NEXT_PUBLIC_KLAUSUR_SERVICE_URL:-https://klausur.breakpilot.ai}
|
|
||||||
NEXT_PUBLIC_VOICE_SERVICE_URL: ${NEXT_PUBLIC_VOICE_SERVICE_URL:-wss://voice.breakpilot.ai}
|
|
||||||
container_name: bp-lehrer-admin
|
|
||||||
expose:
|
|
||||||
- "3000"
|
|
||||||
volumes:
|
|
||||||
- lehrer_backend_data:/app/data
|
|
||||||
environment:
|
|
||||||
NODE_ENV: production
|
|
||||||
BACKEND_URL: http://backend-lehrer:8001
|
|
||||||
CONSENT_SERVICE_URL: http://bp-core-consent-service:8081
|
|
||||||
KLAUSUR_SERVICE_URL: http://klausur-service:8086
|
|
||||||
OLLAMA_URL: ${OLLAMA_URL:-}
|
|
||||||
depends_on:
|
|
||||||
backend-lehrer:
|
|
||||||
condition: service_started
|
|
||||||
labels:
|
|
||||||
- "traefik.enable=true"
|
|
||||||
- "traefik.http.routers.admin-lehrer.rule=Host(`admin-lehrer.breakpilot.ai`)"
|
|
||||||
- "traefik.http.routers.admin-lehrer.entrypoints=https"
|
|
||||||
- "traefik.http.routers.admin-lehrer.tls=true"
|
|
||||||
- "traefik.http.routers.admin-lehrer.tls.certresolver=letsencrypt"
|
|
||||||
- "traefik.http.services.admin-lehrer.loadbalancer.server.port=3000"
|
|
||||||
restart: unless-stopped
|
|
||||||
networks:
|
|
||||||
- breakpilot-network
|
|
||||||
|
|
||||||
studio-v2:
|
|
||||||
build:
|
|
||||||
context: ./studio-v2
|
|
||||||
dockerfile: Dockerfile
|
|
||||||
args:
|
|
||||||
NEXT_PUBLIC_VOICE_SERVICE_URL: ${NEXT_PUBLIC_VOICE_SERVICE_URL:-wss://voice.breakpilot.ai}
|
|
||||||
NEXT_PUBLIC_KLAUSUR_SERVICE_URL: ${NEXT_PUBLIC_KLAUSUR_SERVICE_URL:-https://klausur.breakpilot.ai}
|
|
||||||
container_name: bp-lehrer-studio-v2
|
|
||||||
expose:
|
|
||||||
- "3001"
|
|
||||||
environment:
|
|
||||||
NODE_ENV: production
|
|
||||||
BACKEND_URL: http://backend-lehrer:8001
|
|
||||||
depends_on:
|
|
||||||
- backend-lehrer
|
|
||||||
labels:
|
|
||||||
- "traefik.enable=true"
|
|
||||||
- "traefik.http.routers.studio.rule=Host(`app.breakpilot.ai`)"
|
|
||||||
- "traefik.http.routers.studio.entrypoints=https"
|
|
||||||
- "traefik.http.routers.studio.tls=true"
|
|
||||||
- "traefik.http.routers.studio.tls.certresolver=letsencrypt"
|
|
||||||
- "traefik.http.services.studio.loadbalancer.server.port=3001"
|
|
||||||
restart: unless-stopped
|
|
||||||
networks:
|
|
||||||
- breakpilot-network
|
|
||||||
|
|
||||||
website:
|
|
||||||
build:
|
|
||||||
context: ./website
|
|
||||||
dockerfile: Dockerfile
|
|
||||||
args:
|
|
||||||
NEXT_PUBLIC_BILLING_API_URL: ${NEXT_PUBLIC_BILLING_API_URL:-https://api-core.breakpilot.ai}
|
|
||||||
NEXT_PUBLIC_APP_URL: ${NEXT_PUBLIC_APP_URL:-https://app.breakpilot.ai}
|
|
||||||
NEXT_PUBLIC_KLAUSUR_SERVICE_URL: ${NEXT_PUBLIC_KLAUSUR_SERVICE_URL:-https://klausur.breakpilot.ai}
|
|
||||||
NEXT_PUBLIC_VOICE_SERVICE_URL: ${NEXT_PUBLIC_VOICE_SERVICE_URL:-wss://voice.breakpilot.ai}
|
|
||||||
NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY: ${NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY:-}
|
|
||||||
container_name: bp-lehrer-website
|
|
||||||
expose:
|
|
||||||
- "3000"
|
|
||||||
environment:
|
|
||||||
NODE_ENV: production
|
|
||||||
VAST_API_KEY: ${VAST_API_KEY:-}
|
|
||||||
CONTROL_API_KEY: ${CONTROL_API_KEY:-}
|
|
||||||
BACKEND_URL: http://backend-lehrer:8001
|
|
||||||
CONSENT_SERVICE_URL: http://bp-core-consent-service:8081
|
|
||||||
EDU_SEARCH_URL: ${EDU_SEARCH_URL:-}
|
|
||||||
EDU_SEARCH_API_KEY: ${EDU_SEARCH_API_KEY:-}
|
|
||||||
depends_on:
|
|
||||||
- backend-lehrer
|
|
||||||
labels:
|
|
||||||
- "traefik.enable=true"
|
|
||||||
- "traefik.http.routers.website.rule=Host(`www.breakpilot.ai`)"
|
|
||||||
- "traefik.http.routers.website.entrypoints=https"
|
|
||||||
- "traefik.http.routers.website.tls=true"
|
|
||||||
- "traefik.http.routers.website.tls.certresolver=letsencrypt"
|
|
||||||
- "traefik.http.services.website.loadbalancer.server.port=3000"
|
|
||||||
restart: unless-stopped
|
|
||||||
networks:
|
|
||||||
- breakpilot-network
|
|
||||||
|
|
||||||
# =========================================================
|
|
||||||
# BACKEND
|
|
||||||
# =========================================================
|
|
||||||
backend-lehrer:
|
|
||||||
build:
|
|
||||||
context: ./backend-lehrer
|
|
||||||
dockerfile: Dockerfile
|
|
||||||
container_name: bp-lehrer-backend
|
|
||||||
user: "0:0"
|
|
||||||
expose:
|
|
||||||
- "8001"
|
|
||||||
volumes:
|
|
||||||
- lehrer_backend_data:/app/data
|
|
||||||
environment:
|
|
||||||
PORT: 8001
|
|
||||||
DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT:-5432}/${POSTGRES_DB}?options=-csearch_path%3Dlehrer,core,public
|
|
||||||
JWT_SECRET: ${JWT_SECRET}
|
|
||||||
ENVIRONMENT: production
|
|
||||||
CONSENT_SERVICE_URL: http://bp-core-consent-service:8081
|
|
||||||
KLAUSUR_SERVICE_URL: http://klausur-service:8086
|
|
||||||
TROCR_SERVICE_URL: ${TROCR_SERVICE_URL:-}
|
|
||||||
CAMUNDA_URL: ${CAMUNDA_URL:-}
|
|
||||||
VALKEY_URL: redis://bp-core-valkey:6379/0
|
|
||||||
SESSION_TTL_HOURS: ${SESSION_TTL_HOURS:-24}
|
|
||||||
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
|
|
||||||
DEBUG: "false"
|
|
||||||
ALERTS_AGENT_ENABLED: ${ALERTS_AGENT_ENABLED:-false}
|
|
||||||
VAST_API_KEY: ${VAST_API_KEY:-}
|
|
||||||
VAST_INSTANCE_ID: ${VAST_INSTANCE_ID:-}
|
|
||||||
CONTROL_API_KEY: ${CONTROL_API_KEY:-}
|
|
||||||
OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-}
|
|
||||||
OLLAMA_ENABLED: ${OLLAMA_ENABLED:-false}
|
|
||||||
OLLAMA_DEFAULT_MODEL: ${OLLAMA_DEFAULT_MODEL:-}
|
|
||||||
OLLAMA_VISION_MODEL: ${OLLAMA_VISION_MODEL:-}
|
|
||||||
OLLAMA_CORRECTION_MODEL: ${OLLAMA_CORRECTION_MODEL:-}
|
|
||||||
OLLAMA_TIMEOUT: ${OLLAMA_TIMEOUT:-120}
|
|
||||||
GAME_USE_DATABASE: ${GAME_USE_DATABASE:-true}
|
|
||||||
GAME_REQUIRE_AUTH: ${GAME_REQUIRE_AUTH:-true}
|
|
||||||
GAME_REQUIRE_BILLING: ${GAME_REQUIRE_BILLING:-true}
|
|
||||||
GAME_LLM_MODEL: ${GAME_LLM_MODEL:-}
|
|
||||||
SMTP_HOST: ${SMTP_HOST}
|
|
||||||
SMTP_PORT: ${SMTP_PORT:-587}
|
|
||||||
SMTP_USERNAME: ${SMTP_USERNAME}
|
|
||||||
SMTP_PASSWORD: ${SMTP_PASSWORD}
|
|
||||||
SMTP_FROM_NAME: ${SMTP_FROM_NAME:-BreakPilot}
|
|
||||||
SMTP_FROM_ADDR: ${SMTP_FROM_ADDR:-noreply@breakpilot.ai}
|
|
||||||
RAG_SERVICE_URL: http://bp-core-rag-service:8097
|
|
||||||
labels:
|
|
||||||
- "traefik.enable=true"
|
|
||||||
- "traefik.http.routers.backend-lehrer.rule=Host(`api-lehrer.breakpilot.ai`)"
|
|
||||||
- "traefik.http.routers.backend-lehrer.entrypoints=https"
|
|
||||||
- "traefik.http.routers.backend-lehrer.tls=true"
|
|
||||||
- "traefik.http.routers.backend-lehrer.tls.certresolver=letsencrypt"
|
|
||||||
- "traefik.http.services.backend-lehrer.loadbalancer.server.port=8001"
|
|
||||||
restart: unless-stopped
|
|
||||||
networks:
|
|
||||||
- breakpilot-network
|
|
||||||
|
|
||||||
# =========================================================
|
|
||||||
# MICROSERVICES
|
|
||||||
# =========================================================
|
|
||||||
klausur-service:
|
|
||||||
build:
|
|
||||||
context: ./klausur-service
|
|
||||||
dockerfile: Dockerfile
|
|
||||||
container_name: bp-lehrer-klausur-service
|
|
||||||
expose:
|
|
||||||
- "8086"
|
|
||||||
volumes:
|
|
||||||
- klausur_uploads:/app/uploads
|
|
||||||
- eh_uploads:/app/eh-uploads
|
|
||||||
- ocr_labeling:/app/ocr-labeling
|
|
||||||
- paddle_models:/root/.paddlex
|
|
||||||
environment:
|
|
||||||
JWT_SECRET: ${JWT_SECRET}
|
|
||||||
BACKEND_URL: http://backend-lehrer:8001
|
|
||||||
SCHOOL_SERVICE_URL: http://school-service:8084
|
|
||||||
ENVIRONMENT: production
|
|
||||||
DATABASE_URL: postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT:-5432}/${POSTGRES_DB}
|
|
||||||
EMBEDDING_SERVICE_URL: http://bp-core-embedding-service:8087
|
|
||||||
QDRANT_URL: ${QDRANT_URL}
|
|
||||||
MINIO_ENDPOINT: ${S3_ENDPOINT}
|
|
||||||
MINIO_ACCESS_KEY: ${S3_ACCESS_KEY}
|
|
||||||
MINIO_SECRET_KEY: ${S3_SECRET_KEY}
|
|
||||||
MINIO_BUCKET: ${S3_BUCKET:-breakpilot-rag}
|
|
||||||
MINIO_SECURE: ${S3_SECURE:-true}
|
|
||||||
PADDLEOCR_SERVICE_URL: ${PADDLEOCR_SERVICE_URL:-}
|
|
||||||
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
|
|
||||||
OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-}
|
|
||||||
OLLAMA_ENABLED: ${OLLAMA_ENABLED:-false}
|
|
||||||
OLLAMA_DEFAULT_MODEL: ${OLLAMA_DEFAULT_MODEL:-}
|
|
||||||
OLLAMA_VISION_MODEL: ${OLLAMA_VISION_MODEL:-}
|
|
||||||
OLLAMA_CORRECTION_MODEL: ${OLLAMA_CORRECTION_MODEL:-}
|
|
||||||
RAG_SERVICE_URL: http://bp-core-rag-service:8097
|
|
||||||
depends_on:
|
|
||||||
school-service:
|
|
||||||
condition: service_started
|
|
||||||
healthcheck:
|
|
||||||
test: ["CMD", "curl", "-f", "http://127.0.0.1:8086/health"]
|
|
||||||
interval: 30s
|
|
||||||
timeout: 30s
|
|
||||||
retries: 3
|
|
||||||
start_period: 10s
|
|
||||||
labels:
|
|
||||||
- "traefik.enable=true"
|
|
||||||
- "traefik.http.routers.klausur.rule=Host(`klausur.breakpilot.ai`)"
|
|
||||||
- "traefik.http.routers.klausur.entrypoints=https"
|
|
||||||
- "traefik.http.routers.klausur.tls=true"
|
|
||||||
- "traefik.http.routers.klausur.tls.certresolver=letsencrypt"
|
|
||||||
- "traefik.http.services.klausur.loadbalancer.server.port=8086"
|
|
||||||
restart: unless-stopped
|
|
||||||
networks:
|
|
||||||
- breakpilot-network
|
|
||||||
|
|
||||||
school-service:
|
|
||||||
build:
|
|
||||||
context: ./school-service
|
|
||||||
dockerfile: Dockerfile
|
|
||||||
container_name: bp-lehrer-school-service
|
|
||||||
expose:
|
|
||||||
- "8084"
|
|
||||||
environment:
|
|
||||||
DATABASE_URL: postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT:-5432}/${POSTGRES_DB}
|
|
||||||
JWT_SECRET: ${JWT_SECRET}
|
|
||||||
PORT: 8084
|
|
||||||
ENVIRONMENT: production
|
|
||||||
ALLOWED_ORIGINS: "*"
|
|
||||||
LLM_GATEWAY_URL: http://backend-lehrer:8001/llm
|
|
||||||
restart: unless-stopped
|
|
||||||
networks:
|
|
||||||
- breakpilot-network
|
|
||||||
|
|
||||||
# =========================================================
|
|
||||||
# EDU SEARCH
|
|
||||||
# =========================================================
|
|
||||||
opensearch:
|
|
||||||
image: opensearchproject/opensearch:2.11.1
|
|
||||||
container_name: bp-lehrer-opensearch
|
|
||||||
environment:
|
|
||||||
- cluster.name=edu-search-cluster
|
|
||||||
- node.name=opensearch-node1
|
|
||||||
- discovery.type=single-node
|
|
||||||
- bootstrap.memory_lock=true
|
|
||||||
- "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m"
|
|
||||||
- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_PASSWORD:-Admin123!}
|
|
||||||
- plugins.security.disabled=true
|
|
||||||
ulimits:
|
|
||||||
memlock:
|
|
||||||
soft: -1
|
|
||||||
hard: -1
|
|
||||||
nofile:
|
|
||||||
soft: 65536
|
|
||||||
hard: 65536
|
|
||||||
volumes:
|
|
||||||
- opensearch_data:/usr/share/opensearch/data
|
|
||||||
healthcheck:
|
|
||||||
test: ["CMD-SHELL", "curl -s http://localhost:9200 >/dev/null || exit 1"]
|
|
||||||
interval: 30s
|
|
||||||
timeout: 10s
|
|
||||||
retries: 5
|
|
||||||
start_period: 60s
|
|
||||||
restart: unless-stopped
|
|
||||||
networks:
|
|
||||||
- breakpilot-network
|
|
||||||
|
|
||||||
edu-search-service:
|
|
||||||
build:
|
|
||||||
context: ./edu-search-service
|
|
||||||
dockerfile: Dockerfile
|
|
||||||
container_name: bp-lehrer-edu-search
|
|
||||||
expose:
|
|
||||||
- "8088"
|
|
||||||
environment:
|
|
||||||
PORT: 8088
|
|
||||||
OPENSEARCH_URL: http://opensearch:9200
|
|
||||||
OPENSEARCH_USERNAME: admin
|
|
||||||
OPENSEARCH_PASSWORD: ${OPENSEARCH_PASSWORD:-Admin123!}
|
|
||||||
INDEX_NAME: bp_documents_v1
|
|
||||||
EDU_SEARCH_API_KEY: ${EDU_SEARCH_API_KEY:-}
|
|
||||||
USER_AGENT: "BreakpilotEduCrawler/1.0 (+contact: security@breakpilot.com)"
|
|
||||||
RATE_LIMIT_PER_SEC: "0.2"
|
|
||||||
MAX_DEPTH: "4"
|
|
||||||
MAX_PAGES_PER_RUN: "500"
|
|
||||||
DB_HOST: ${POSTGRES_HOST}
|
|
||||||
DB_PORT: ${POSTGRES_PORT:-5432}
|
|
||||||
DB_USER: ${POSTGRES_USER}
|
|
||||||
DB_PASSWORD: ${POSTGRES_PASSWORD}
|
|
||||||
DB_NAME: ${POSTGRES_DB}
|
|
||||||
DB_SSLMODE: disable
|
|
||||||
STAFF_CRAWLER_EMAIL: crawler@breakpilot.de
|
|
||||||
depends_on:
|
|
||||||
opensearch:
|
|
||||||
condition: service_healthy
|
|
||||||
healthcheck:
|
|
||||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8088/v1/health"]
|
|
||||||
interval: 30s
|
|
||||||
timeout: 3s
|
|
||||||
start_period: 10s
|
|
||||||
retries: 3
|
|
||||||
restart: unless-stopped
|
|
||||||
networks:
|
|
||||||
- breakpilot-network
|
|
||||||
@@ -8,24 +8,15 @@ RUN npm install
|
|||||||
COPY frontend/ ./
|
COPY frontend/ ./
|
||||||
RUN npm run build
|
RUN npm run build
|
||||||
|
|
||||||
# Production stage
|
# Production stage — uses pre-built base with Tesseract + Python deps.
|
||||||
FROM python:3.11-slim
|
# Base image contains: python:3.11-slim + tesseract-ocr + all pip packages.
|
||||||
|
# Rebuild base only when requirements.txt or system deps change:
|
||||||
|
# docker build -f klausur-service/Dockerfile.base -t klausur-base:latest klausur-service/
|
||||||
|
FROM klausur-base:latest
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Install system dependencies (incl. Tesseract OCR for bounding-box extraction)
|
# Copy backend code (this is the only layer that changes on code edits)
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
||||||
curl \
|
|
||||||
tesseract-ocr \
|
|
||||||
tesseract-ocr-deu \
|
|
||||||
tesseract-ocr-eng \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# Install Python dependencies
|
|
||||||
COPY backend/requirements.txt ./
|
|
||||||
RUN pip install --no-cache-dir -r requirements.txt
|
|
||||||
|
|
||||||
# Copy backend code
|
|
||||||
COPY backend/ ./
|
COPY backend/ ./
|
||||||
|
|
||||||
# Copy built frontend to the expected path
|
# Copy built frontend to the expected path
|
||||||
|
|||||||
24
klausur-service/Dockerfile.base
Normal file
24
klausur-service/Dockerfile.base
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
# Base image with system dependencies + Python packages.
|
||||||
|
# These change rarely — build once, reuse on every --no-cache.
|
||||||
|
#
|
||||||
|
# Rebuild manually when requirements.txt or system deps change:
|
||||||
|
# docker build -f klausur-service/Dockerfile.base -t klausur-base:latest klausur-service/
|
||||||
|
#
|
||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# System dependencies (Tesseract OCR, curl for healthcheck)
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
curl \
|
||||||
|
tesseract-ocr \
|
||||||
|
tesseract-ocr-deu \
|
||||||
|
tesseract-ocr-eng \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Python dependencies
|
||||||
|
COPY backend/requirements.txt ./
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Clean up pip cache
|
||||||
|
RUN rm -rf /root/.cache/pip
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -42,6 +42,8 @@ try:
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
trocr_router = None
|
trocr_router = None
|
||||||
from vocab_worksheet_api import router as vocab_router, set_db_pool as set_vocab_db_pool, _init_vocab_table, _load_all_sessions, DATABASE_URL as VOCAB_DATABASE_URL
|
from vocab_worksheet_api import router as vocab_router, set_db_pool as set_vocab_db_pool, _init_vocab_table, _load_all_sessions, DATABASE_URL as VOCAB_DATABASE_URL
|
||||||
|
from ocr_pipeline_api import router as ocr_pipeline_router
|
||||||
|
from ocr_pipeline_session_store import init_ocr_pipeline_tables
|
||||||
try:
|
try:
|
||||||
from dsfa_rag_api import router as dsfa_rag_router, set_db_pool as set_dsfa_db_pool
|
from dsfa_rag_api import router as dsfa_rag_router, set_db_pool as set_dsfa_db_pool
|
||||||
from dsfa_corpus_ingestion import DSFAQdrantService, DATABASE_URL as DSFA_DATABASE_URL
|
from dsfa_corpus_ingestion import DSFAQdrantService, DATABASE_URL as DSFA_DATABASE_URL
|
||||||
@@ -75,6 +77,13 @@ async def lifespan(app: FastAPI):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Warning: Vocab sessions database initialization failed: {e}")
|
print(f"Warning: Vocab sessions database initialization failed: {e}")
|
||||||
|
|
||||||
|
# Initialize OCR Pipeline session tables
|
||||||
|
try:
|
||||||
|
await init_ocr_pipeline_tables()
|
||||||
|
print("OCR Pipeline session tables initialized")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Warning: OCR Pipeline tables initialization failed: {e}")
|
||||||
|
|
||||||
# Initialize database pool for DSFA RAG
|
# Initialize database pool for DSFA RAG
|
||||||
dsfa_db_pool = None
|
dsfa_db_pool = None
|
||||||
if DSFA_DATABASE_URL and set_dsfa_db_pool:
|
if DSFA_DATABASE_URL and set_dsfa_db_pool:
|
||||||
@@ -150,6 +159,7 @@ app.include_router(mail_router) # Unified Inbox Mail
|
|||||||
if trocr_router:
|
if trocr_router:
|
||||||
app.include_router(trocr_router) # TrOCR Handwriting OCR
|
app.include_router(trocr_router) # TrOCR Handwriting OCR
|
||||||
app.include_router(vocab_router) # Vocabulary Worksheet Generator
|
app.include_router(vocab_router) # Vocabulary Worksheet Generator
|
||||||
|
app.include_router(ocr_pipeline_router) # OCR Pipeline (step-by-step)
|
||||||
if dsfa_rag_router:
|
if dsfa_rag_router:
|
||||||
app.include_router(dsfa_rag_router) # DSFA RAG Corpus Search
|
app.include_router(dsfa_rag_router) # DSFA RAG Corpus Search
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,28 @@
|
|||||||
|
-- OCR Pipeline Sessions - Persistent session storage
|
||||||
|
-- Applied automatically by ocr_pipeline_session_store.init_ocr_pipeline_tables()
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS ocr_pipeline_sessions (
|
||||||
|
id UUID PRIMARY KEY,
|
||||||
|
name VARCHAR(255) NOT NULL,
|
||||||
|
filename VARCHAR(255),
|
||||||
|
status VARCHAR(50) DEFAULT 'active',
|
||||||
|
current_step INT DEFAULT 1,
|
||||||
|
original_png BYTEA,
|
||||||
|
deskewed_png BYTEA,
|
||||||
|
binarized_png BYTEA,
|
||||||
|
dewarped_png BYTEA,
|
||||||
|
deskew_result JSONB,
|
||||||
|
dewarp_result JSONB,
|
||||||
|
column_result JSONB,
|
||||||
|
ground_truth JSONB DEFAULT '{}',
|
||||||
|
auto_shear_degrees FLOAT,
|
||||||
|
created_at TIMESTAMP DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMP DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Index for listing sessions
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_ocr_pipeline_sessions_created
|
||||||
|
ON ocr_pipeline_sessions (created_at DESC);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_ocr_pipeline_sessions_status
|
||||||
|
ON ocr_pipeline_sessions (status);
|
||||||
809
klausur-service/backend/ocr_pipeline_api.py
Normal file
809
klausur-service/backend/ocr_pipeline_api.py
Normal file
@@ -0,0 +1,809 @@
|
|||||||
|
"""
|
||||||
|
OCR Pipeline API - Schrittweise Seitenrekonstruktion.
|
||||||
|
|
||||||
|
Zerlegt den OCR-Prozess in 7 einzelne Schritte:
|
||||||
|
1. Deskewing - Scan begradigen
|
||||||
|
2. Dewarping - Buchwoelbung entzerren
|
||||||
|
3. Spaltenerkennung - Unsichtbare Spalten finden
|
||||||
|
4. Worterkennung - OCR mit Bounding Boxes
|
||||||
|
5. Koordinatenzuweisung - Exakte Positionen
|
||||||
|
6. Seitenrekonstruktion - Seite nachbauen
|
||||||
|
7. Ground Truth Validierung - Gesamtpruefung
|
||||||
|
|
||||||
|
Lizenz: Apache 2.0
|
||||||
|
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
|
from dataclasses import asdict
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from fastapi import APIRouter, File, Form, HTTPException, UploadFile
|
||||||
|
from fastapi.responses import Response
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from cv_vocab_pipeline import (
|
||||||
|
analyze_layout,
|
||||||
|
analyze_layout_by_words,
|
||||||
|
create_ocr_image,
|
||||||
|
deskew_image,
|
||||||
|
deskew_image_by_word_alignment,
|
||||||
|
dewarp_image,
|
||||||
|
dewarp_image_manual,
|
||||||
|
render_image_high_res,
|
||||||
|
render_pdf_high_res,
|
||||||
|
)
|
||||||
|
from ocr_pipeline_session_store import (
|
||||||
|
create_session_db,
|
||||||
|
delete_session_db,
|
||||||
|
get_session_db,
|
||||||
|
get_session_image,
|
||||||
|
init_ocr_pipeline_tables,
|
||||||
|
list_sessions_db,
|
||||||
|
update_session_db,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["ocr-pipeline"])
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# In-memory cache for active sessions (BGR numpy arrays for processing)
|
||||||
|
# DB is source of truth, cache holds BGR arrays during active processing.
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_cache: Dict[str, Dict[str, Any]] = {}
|
||||||
|
|
||||||
|
|
||||||
|
async def _load_session_to_cache(session_id: str) -> Dict[str, Any]:
|
||||||
|
"""Load session from DB into cache, decoding PNGs to BGR arrays."""
|
||||||
|
session = await get_session_db(session_id)
|
||||||
|
if not session:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||||
|
|
||||||
|
if session_id in _cache:
|
||||||
|
return _cache[session_id]
|
||||||
|
|
||||||
|
cache_entry: Dict[str, Any] = {
|
||||||
|
"id": session_id,
|
||||||
|
**session,
|
||||||
|
"original_bgr": None,
|
||||||
|
"deskewed_bgr": None,
|
||||||
|
"dewarped_bgr": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Decode images from DB into BGR numpy arrays
|
||||||
|
for img_type, bgr_key in [
|
||||||
|
("original", "original_bgr"),
|
||||||
|
("deskewed", "deskewed_bgr"),
|
||||||
|
("dewarped", "dewarped_bgr"),
|
||||||
|
]:
|
||||||
|
png_data = await get_session_image(session_id, img_type)
|
||||||
|
if png_data:
|
||||||
|
arr = np.frombuffer(png_data, dtype=np.uint8)
|
||||||
|
bgr = cv2.imdecode(arr, cv2.IMREAD_COLOR)
|
||||||
|
cache_entry[bgr_key] = bgr
|
||||||
|
|
||||||
|
_cache[session_id] = cache_entry
|
||||||
|
return cache_entry
|
||||||
|
|
||||||
|
|
||||||
|
def _get_cached(session_id: str) -> Dict[str, Any]:
|
||||||
|
"""Get from cache or raise 404."""
|
||||||
|
entry = _cache.get(session_id)
|
||||||
|
if not entry:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not in cache — reload first")
|
||||||
|
return entry
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Pydantic Models
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class ManualDeskewRequest(BaseModel):
|
||||||
|
angle: float
|
||||||
|
|
||||||
|
|
||||||
|
class DeskewGroundTruthRequest(BaseModel):
|
||||||
|
is_correct: bool
|
||||||
|
corrected_angle: Optional[float] = None
|
||||||
|
notes: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
class ManualDewarpRequest(BaseModel):
|
||||||
|
shear_degrees: float
|
||||||
|
|
||||||
|
|
||||||
|
class DewarpGroundTruthRequest(BaseModel):
|
||||||
|
is_correct: bool
|
||||||
|
corrected_shear: Optional[float] = None
|
||||||
|
notes: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
class RenameSessionRequest(BaseModel):
|
||||||
|
name: str
|
||||||
|
|
||||||
|
|
||||||
|
class ManualColumnsRequest(BaseModel):
|
||||||
|
columns: List[Dict[str, Any]]
|
||||||
|
|
||||||
|
|
||||||
|
class ColumnGroundTruthRequest(BaseModel):
|
||||||
|
is_correct: bool
|
||||||
|
corrected_columns: Optional[List[Dict[str, Any]]] = None
|
||||||
|
notes: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Session Management Endpoints
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@router.get("/sessions")
|
||||||
|
async def list_sessions():
|
||||||
|
"""List all OCR pipeline sessions."""
|
||||||
|
sessions = await list_sessions_db()
|
||||||
|
return {"sessions": sessions}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/sessions")
|
||||||
|
async def create_session(
|
||||||
|
file: UploadFile = File(...),
|
||||||
|
name: Optional[str] = Form(None),
|
||||||
|
):
|
||||||
|
"""Upload a PDF or image file and create a pipeline session."""
|
||||||
|
file_data = await file.read()
|
||||||
|
filename = file.filename or "upload"
|
||||||
|
content_type = file.content_type or ""
|
||||||
|
|
||||||
|
session_id = str(uuid.uuid4())
|
||||||
|
is_pdf = content_type == "application/pdf" or filename.lower().endswith(".pdf")
|
||||||
|
|
||||||
|
try:
|
||||||
|
if is_pdf:
|
||||||
|
img_bgr = render_pdf_high_res(file_data, page_number=0, zoom=3.0)
|
||||||
|
else:
|
||||||
|
img_bgr = render_image_high_res(file_data)
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=400, detail=f"Could not process file: {e}")
|
||||||
|
|
||||||
|
# Encode original as PNG bytes
|
||||||
|
success, png_buf = cv2.imencode(".png", img_bgr)
|
||||||
|
if not success:
|
||||||
|
raise HTTPException(status_code=500, detail="Failed to encode image")
|
||||||
|
|
||||||
|
original_png = png_buf.tobytes()
|
||||||
|
session_name = name or filename
|
||||||
|
|
||||||
|
# Persist to DB
|
||||||
|
await create_session_db(
|
||||||
|
session_id=session_id,
|
||||||
|
name=session_name,
|
||||||
|
filename=filename,
|
||||||
|
original_png=original_png,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Cache BGR array for immediate processing
|
||||||
|
_cache[session_id] = {
|
||||||
|
"id": session_id,
|
||||||
|
"filename": filename,
|
||||||
|
"name": session_name,
|
||||||
|
"original_bgr": img_bgr,
|
||||||
|
"deskewed_bgr": None,
|
||||||
|
"dewarped_bgr": None,
|
||||||
|
"deskew_result": None,
|
||||||
|
"dewarp_result": None,
|
||||||
|
"ground_truth": {},
|
||||||
|
"current_step": 1,
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(f"OCR Pipeline: created session {session_id} from {filename} "
|
||||||
|
f"({img_bgr.shape[1]}x{img_bgr.shape[0]})")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"session_id": session_id,
|
||||||
|
"filename": filename,
|
||||||
|
"name": session_name,
|
||||||
|
"image_width": img_bgr.shape[1],
|
||||||
|
"image_height": img_bgr.shape[0],
|
||||||
|
"original_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/original",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/sessions/{session_id}")
|
||||||
|
async def get_session_info(session_id: str):
|
||||||
|
"""Get session info including deskew/dewarp/column results for step navigation."""
|
||||||
|
session = await get_session_db(session_id)
|
||||||
|
if not session:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||||
|
|
||||||
|
# Get image dimensions from original PNG
|
||||||
|
original_png = await get_session_image(session_id, "original")
|
||||||
|
if original_png:
|
||||||
|
arr = np.frombuffer(original_png, dtype=np.uint8)
|
||||||
|
img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
|
||||||
|
img_w, img_h = img.shape[1], img.shape[0] if img is not None else (0, 0)
|
||||||
|
else:
|
||||||
|
img_w, img_h = 0, 0
|
||||||
|
|
||||||
|
result = {
|
||||||
|
"session_id": session["id"],
|
||||||
|
"filename": session.get("filename", ""),
|
||||||
|
"name": session.get("name", ""),
|
||||||
|
"image_width": img_w,
|
||||||
|
"image_height": img_h,
|
||||||
|
"original_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/original",
|
||||||
|
"current_step": session.get("current_step", 1),
|
||||||
|
}
|
||||||
|
|
||||||
|
if session.get("deskew_result"):
|
||||||
|
result["deskew_result"] = session["deskew_result"]
|
||||||
|
if session.get("dewarp_result"):
|
||||||
|
result["dewarp_result"] = session["dewarp_result"]
|
||||||
|
if session.get("column_result"):
|
||||||
|
result["column_result"] = session["column_result"]
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
@router.put("/sessions/{session_id}")
|
||||||
|
async def rename_session(session_id: str, req: RenameSessionRequest):
|
||||||
|
"""Rename a session."""
|
||||||
|
updated = await update_session_db(session_id, name=req.name)
|
||||||
|
if not updated:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||||
|
return {"session_id": session_id, "name": req.name}
|
||||||
|
|
||||||
|
|
||||||
|
@router.delete("/sessions/{session_id}")
|
||||||
|
async def delete_session(session_id: str):
|
||||||
|
"""Delete a session."""
|
||||||
|
_cache.pop(session_id, None)
|
||||||
|
deleted = await delete_session_db(session_id)
|
||||||
|
if not deleted:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||||
|
return {"session_id": session_id, "deleted": True}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Image Endpoints
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@router.get("/sessions/{session_id}/image/{image_type}")
|
||||||
|
async def get_image(session_id: str, image_type: str):
|
||||||
|
"""Serve session images: original, deskewed, dewarped, binarized, or columns-overlay."""
|
||||||
|
valid_types = {"original", "deskewed", "dewarped", "binarized", "columns-overlay"}
|
||||||
|
if image_type not in valid_types:
|
||||||
|
raise HTTPException(status_code=400, detail=f"Unknown image type: {image_type}")
|
||||||
|
|
||||||
|
if image_type == "columns-overlay":
|
||||||
|
return await _get_columns_overlay(session_id)
|
||||||
|
|
||||||
|
# Try cache first for fast serving
|
||||||
|
cached = _cache.get(session_id)
|
||||||
|
if cached:
|
||||||
|
png_key = f"{image_type}_png" if image_type != "original" else None
|
||||||
|
bgr_key = f"{image_type}_bgr" if image_type != "binarized" else None
|
||||||
|
|
||||||
|
# For binarized, check if we have it cached as PNG
|
||||||
|
if image_type == "binarized" and cached.get("binarized_png"):
|
||||||
|
return Response(content=cached["binarized_png"], media_type="image/png")
|
||||||
|
|
||||||
|
# Load from DB
|
||||||
|
data = await get_session_image(session_id, image_type)
|
||||||
|
if not data:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Image '{image_type}' not available yet")
|
||||||
|
|
||||||
|
return Response(content=data, media_type="image/png")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Deskew Endpoints
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@router.post("/sessions/{session_id}/deskew")
|
||||||
|
async def auto_deskew(session_id: str):
|
||||||
|
"""Run both deskew methods and pick the best one."""
|
||||||
|
# Ensure session is in cache
|
||||||
|
if session_id not in _cache:
|
||||||
|
await _load_session_to_cache(session_id)
|
||||||
|
cached = _get_cached(session_id)
|
||||||
|
|
||||||
|
img_bgr = cached.get("original_bgr")
|
||||||
|
if img_bgr is None:
|
||||||
|
raise HTTPException(status_code=400, detail="Original image not available")
|
||||||
|
|
||||||
|
t0 = time.time()
|
||||||
|
|
||||||
|
# Method 1: Hough Lines
|
||||||
|
try:
|
||||||
|
deskewed_hough, angle_hough = deskew_image(img_bgr.copy())
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Hough deskew failed: {e}")
|
||||||
|
deskewed_hough, angle_hough = img_bgr, 0.0
|
||||||
|
|
||||||
|
# Method 2: Word Alignment (needs image bytes)
|
||||||
|
success_enc, png_orig = cv2.imencode(".png", img_bgr)
|
||||||
|
orig_bytes = png_orig.tobytes() if success_enc else b""
|
||||||
|
|
||||||
|
try:
|
||||||
|
deskewed_wa_bytes, angle_wa = deskew_image_by_word_alignment(orig_bytes)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Word alignment deskew failed: {e}")
|
||||||
|
deskewed_wa_bytes, angle_wa = orig_bytes, 0.0
|
||||||
|
|
||||||
|
duration = time.time() - t0
|
||||||
|
|
||||||
|
# Pick best method
|
||||||
|
if abs(angle_wa) >= abs(angle_hough) or abs(angle_hough) < 0.1:
|
||||||
|
method_used = "word_alignment"
|
||||||
|
angle_applied = angle_wa
|
||||||
|
wa_array = np.frombuffer(deskewed_wa_bytes, dtype=np.uint8)
|
||||||
|
deskewed_bgr = cv2.imdecode(wa_array, cv2.IMREAD_COLOR)
|
||||||
|
if deskewed_bgr is None:
|
||||||
|
deskewed_bgr = deskewed_hough
|
||||||
|
method_used = "hough"
|
||||||
|
angle_applied = angle_hough
|
||||||
|
else:
|
||||||
|
method_used = "hough"
|
||||||
|
angle_applied = angle_hough
|
||||||
|
deskewed_bgr = deskewed_hough
|
||||||
|
|
||||||
|
# Encode as PNG
|
||||||
|
success, deskewed_png_buf = cv2.imencode(".png", deskewed_bgr)
|
||||||
|
deskewed_png = deskewed_png_buf.tobytes() if success else b""
|
||||||
|
|
||||||
|
# Create binarized version
|
||||||
|
binarized_png = None
|
||||||
|
try:
|
||||||
|
binarized = create_ocr_image(deskewed_bgr)
|
||||||
|
success_bin, bin_buf = cv2.imencode(".png", binarized)
|
||||||
|
binarized_png = bin_buf.tobytes() if success_bin else None
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Binarization failed: {e}")
|
||||||
|
|
||||||
|
confidence = max(0.5, 1.0 - abs(angle_applied) / 5.0)
|
||||||
|
|
||||||
|
deskew_result = {
|
||||||
|
"angle_hough": round(angle_hough, 3),
|
||||||
|
"angle_word_alignment": round(angle_wa, 3),
|
||||||
|
"angle_applied": round(angle_applied, 3),
|
||||||
|
"method_used": method_used,
|
||||||
|
"confidence": round(confidence, 2),
|
||||||
|
"duration_seconds": round(duration, 2),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Update cache
|
||||||
|
cached["deskewed_bgr"] = deskewed_bgr
|
||||||
|
cached["binarized_png"] = binarized_png
|
||||||
|
cached["deskew_result"] = deskew_result
|
||||||
|
|
||||||
|
# Persist to DB
|
||||||
|
db_update = {
|
||||||
|
"deskewed_png": deskewed_png,
|
||||||
|
"deskew_result": deskew_result,
|
||||||
|
"current_step": 2,
|
||||||
|
}
|
||||||
|
if binarized_png:
|
||||||
|
db_update["binarized_png"] = binarized_png
|
||||||
|
await update_session_db(session_id, **db_update)
|
||||||
|
|
||||||
|
logger.info(f"OCR Pipeline: deskew session {session_id}: "
|
||||||
|
f"hough={angle_hough:.2f} wa={angle_wa:.2f} -> {method_used} {angle_applied:.2f}")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"session_id": session_id,
|
||||||
|
**deskew_result,
|
||||||
|
"deskewed_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/deskewed",
|
||||||
|
"binarized_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/binarized",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/sessions/{session_id}/deskew/manual")
|
||||||
|
async def manual_deskew(session_id: str, req: ManualDeskewRequest):
|
||||||
|
"""Apply a manual rotation angle to the original image."""
|
||||||
|
if session_id not in _cache:
|
||||||
|
await _load_session_to_cache(session_id)
|
||||||
|
cached = _get_cached(session_id)
|
||||||
|
|
||||||
|
img_bgr = cached.get("original_bgr")
|
||||||
|
if img_bgr is None:
|
||||||
|
raise HTTPException(status_code=400, detail="Original image not available")
|
||||||
|
|
||||||
|
angle = max(-5.0, min(5.0, req.angle))
|
||||||
|
|
||||||
|
h, w = img_bgr.shape[:2]
|
||||||
|
center = (w // 2, h // 2)
|
||||||
|
M = cv2.getRotationMatrix2D(center, angle, 1.0)
|
||||||
|
rotated = cv2.warpAffine(img_bgr, M, (w, h),
|
||||||
|
flags=cv2.INTER_LINEAR,
|
||||||
|
borderMode=cv2.BORDER_REPLICATE)
|
||||||
|
|
||||||
|
success, png_buf = cv2.imencode(".png", rotated)
|
||||||
|
deskewed_png = png_buf.tobytes() if success else b""
|
||||||
|
|
||||||
|
# Binarize
|
||||||
|
binarized_png = None
|
||||||
|
try:
|
||||||
|
binarized = create_ocr_image(rotated)
|
||||||
|
success_bin, bin_buf = cv2.imencode(".png", binarized)
|
||||||
|
binarized_png = bin_buf.tobytes() if success_bin else None
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
deskew_result = {
|
||||||
|
**(cached.get("deskew_result") or {}),
|
||||||
|
"angle_applied": round(angle, 3),
|
||||||
|
"method_used": "manual",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Update cache
|
||||||
|
cached["deskewed_bgr"] = rotated
|
||||||
|
cached["binarized_png"] = binarized_png
|
||||||
|
cached["deskew_result"] = deskew_result
|
||||||
|
|
||||||
|
# Persist to DB
|
||||||
|
db_update = {
|
||||||
|
"deskewed_png": deskewed_png,
|
||||||
|
"deskew_result": deskew_result,
|
||||||
|
}
|
||||||
|
if binarized_png:
|
||||||
|
db_update["binarized_png"] = binarized_png
|
||||||
|
await update_session_db(session_id, **db_update)
|
||||||
|
|
||||||
|
logger.info(f"OCR Pipeline: manual deskew session {session_id}: {angle:.2f}")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"session_id": session_id,
|
||||||
|
"angle_applied": round(angle, 3),
|
||||||
|
"method_used": "manual",
|
||||||
|
"deskewed_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/deskewed",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/sessions/{session_id}/ground-truth/deskew")
|
||||||
|
async def save_deskew_ground_truth(session_id: str, req: DeskewGroundTruthRequest):
|
||||||
|
"""Save ground truth feedback for the deskew step."""
|
||||||
|
session = await get_session_db(session_id)
|
||||||
|
if not session:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||||
|
|
||||||
|
ground_truth = session.get("ground_truth") or {}
|
||||||
|
gt = {
|
||||||
|
"is_correct": req.is_correct,
|
||||||
|
"corrected_angle": req.corrected_angle,
|
||||||
|
"notes": req.notes,
|
||||||
|
"saved_at": datetime.utcnow().isoformat(),
|
||||||
|
"deskew_result": session.get("deskew_result"),
|
||||||
|
}
|
||||||
|
ground_truth["deskew"] = gt
|
||||||
|
|
||||||
|
await update_session_db(session_id, ground_truth=ground_truth)
|
||||||
|
|
||||||
|
# Update cache
|
||||||
|
if session_id in _cache:
|
||||||
|
_cache[session_id]["ground_truth"] = ground_truth
|
||||||
|
|
||||||
|
logger.info(f"OCR Pipeline: ground truth deskew session {session_id}: "
|
||||||
|
f"correct={req.is_correct}, corrected_angle={req.corrected_angle}")
|
||||||
|
|
||||||
|
return {"session_id": session_id, "ground_truth": gt}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Dewarp Endpoints
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@router.post("/sessions/{session_id}/dewarp")
|
||||||
|
async def auto_dewarp(session_id: str):
|
||||||
|
"""Detect and correct vertical shear on the deskewed image."""
|
||||||
|
if session_id not in _cache:
|
||||||
|
await _load_session_to_cache(session_id)
|
||||||
|
cached = _get_cached(session_id)
|
||||||
|
|
||||||
|
deskewed_bgr = cached.get("deskewed_bgr")
|
||||||
|
if deskewed_bgr is None:
|
||||||
|
raise HTTPException(status_code=400, detail="Deskew must be completed before dewarp")
|
||||||
|
|
||||||
|
t0 = time.time()
|
||||||
|
dewarped_bgr, dewarp_info = dewarp_image(deskewed_bgr)
|
||||||
|
duration = time.time() - t0
|
||||||
|
|
||||||
|
# Encode as PNG
|
||||||
|
success, png_buf = cv2.imencode(".png", dewarped_bgr)
|
||||||
|
dewarped_png = png_buf.tobytes() if success else b""
|
||||||
|
|
||||||
|
dewarp_result = {
|
||||||
|
"method_used": dewarp_info["method"],
|
||||||
|
"shear_degrees": dewarp_info["shear_degrees"],
|
||||||
|
"confidence": dewarp_info["confidence"],
|
||||||
|
"duration_seconds": round(duration, 2),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Update cache
|
||||||
|
cached["dewarped_bgr"] = dewarped_bgr
|
||||||
|
cached["dewarp_result"] = dewarp_result
|
||||||
|
|
||||||
|
# Persist to DB
|
||||||
|
await update_session_db(
|
||||||
|
session_id,
|
||||||
|
dewarped_png=dewarped_png,
|
||||||
|
dewarp_result=dewarp_result,
|
||||||
|
auto_shear_degrees=dewarp_info.get("shear_degrees", 0.0),
|
||||||
|
current_step=3,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"OCR Pipeline: dewarp session {session_id}: "
|
||||||
|
f"method={dewarp_info['method']} shear={dewarp_info['shear_degrees']:.3f} "
|
||||||
|
f"conf={dewarp_info['confidence']:.2f} ({duration:.2f}s)")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"session_id": session_id,
|
||||||
|
**dewarp_result,
|
||||||
|
"dewarped_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/dewarped",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/sessions/{session_id}/dewarp/manual")
|
||||||
|
async def manual_dewarp(session_id: str, req: ManualDewarpRequest):
|
||||||
|
"""Apply shear correction with a manual angle."""
|
||||||
|
if session_id not in _cache:
|
||||||
|
await _load_session_to_cache(session_id)
|
||||||
|
cached = _get_cached(session_id)
|
||||||
|
|
||||||
|
deskewed_bgr = cached.get("deskewed_bgr")
|
||||||
|
if deskewed_bgr is None:
|
||||||
|
raise HTTPException(status_code=400, detail="Deskew must be completed before dewarp")
|
||||||
|
|
||||||
|
shear_deg = max(-2.0, min(2.0, req.shear_degrees))
|
||||||
|
|
||||||
|
if abs(shear_deg) < 0.001:
|
||||||
|
dewarped_bgr = deskewed_bgr
|
||||||
|
else:
|
||||||
|
dewarped_bgr = dewarp_image_manual(deskewed_bgr, shear_deg)
|
||||||
|
|
||||||
|
success, png_buf = cv2.imencode(".png", dewarped_bgr)
|
||||||
|
dewarped_png = png_buf.tobytes() if success else b""
|
||||||
|
|
||||||
|
dewarp_result = {
|
||||||
|
**(cached.get("dewarp_result") or {}),
|
||||||
|
"method_used": "manual",
|
||||||
|
"shear_degrees": round(shear_deg, 3),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Update cache
|
||||||
|
cached["dewarped_bgr"] = dewarped_bgr
|
||||||
|
cached["dewarp_result"] = dewarp_result
|
||||||
|
|
||||||
|
# Persist to DB
|
||||||
|
await update_session_db(
|
||||||
|
session_id,
|
||||||
|
dewarped_png=dewarped_png,
|
||||||
|
dewarp_result=dewarp_result,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"OCR Pipeline: manual dewarp session {session_id}: shear={shear_deg:.3f}")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"session_id": session_id,
|
||||||
|
"shear_degrees": round(shear_deg, 3),
|
||||||
|
"method_used": "manual",
|
||||||
|
"dewarped_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/dewarped",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/sessions/{session_id}/ground-truth/dewarp")
|
||||||
|
async def save_dewarp_ground_truth(session_id: str, req: DewarpGroundTruthRequest):
|
||||||
|
"""Save ground truth feedback for the dewarp step."""
|
||||||
|
session = await get_session_db(session_id)
|
||||||
|
if not session:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||||
|
|
||||||
|
ground_truth = session.get("ground_truth") or {}
|
||||||
|
gt = {
|
||||||
|
"is_correct": req.is_correct,
|
||||||
|
"corrected_shear": req.corrected_shear,
|
||||||
|
"notes": req.notes,
|
||||||
|
"saved_at": datetime.utcnow().isoformat(),
|
||||||
|
"dewarp_result": session.get("dewarp_result"),
|
||||||
|
}
|
||||||
|
ground_truth["dewarp"] = gt
|
||||||
|
|
||||||
|
await update_session_db(session_id, ground_truth=ground_truth)
|
||||||
|
|
||||||
|
if session_id in _cache:
|
||||||
|
_cache[session_id]["ground_truth"] = ground_truth
|
||||||
|
|
||||||
|
logger.info(f"OCR Pipeline: ground truth dewarp session {session_id}: "
|
||||||
|
f"correct={req.is_correct}, corrected_shear={req.corrected_shear}")
|
||||||
|
|
||||||
|
return {"session_id": session_id, "ground_truth": gt}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Column Detection Endpoints (Step 3)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@router.post("/sessions/{session_id}/columns")
|
||||||
|
async def detect_columns(session_id: str):
|
||||||
|
"""Run column detection on the dewarped image."""
|
||||||
|
if session_id not in _cache:
|
||||||
|
await _load_session_to_cache(session_id)
|
||||||
|
cached = _get_cached(session_id)
|
||||||
|
|
||||||
|
dewarped_bgr = cached.get("dewarped_bgr")
|
||||||
|
if dewarped_bgr is None:
|
||||||
|
raise HTTPException(status_code=400, detail="Dewarp must be completed before column detection")
|
||||||
|
|
||||||
|
t0 = time.time()
|
||||||
|
|
||||||
|
# Binarized image for layout analysis
|
||||||
|
ocr_img = create_ocr_image(dewarped_bgr)
|
||||||
|
|
||||||
|
# Word-based detection (with automatic fallback to projection profiles)
|
||||||
|
regions = analyze_layout_by_words(ocr_img, dewarped_bgr)
|
||||||
|
duration = time.time() - t0
|
||||||
|
|
||||||
|
columns = [asdict(r) for r in regions]
|
||||||
|
|
||||||
|
# Determine classification methods used
|
||||||
|
methods = list(set(
|
||||||
|
c.get("classification_method", "") for c in columns
|
||||||
|
if c.get("classification_method")
|
||||||
|
))
|
||||||
|
|
||||||
|
column_result = {
|
||||||
|
"columns": columns,
|
||||||
|
"classification_methods": methods,
|
||||||
|
"duration_seconds": round(duration, 2),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Persist to DB
|
||||||
|
await update_session_db(
|
||||||
|
session_id,
|
||||||
|
column_result=column_result,
|
||||||
|
current_step=3,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Update cache
|
||||||
|
cached["column_result"] = column_result
|
||||||
|
|
||||||
|
col_count = len([c for c in columns if c["type"].startswith("column")])
|
||||||
|
logger.info(f"OCR Pipeline: columns session {session_id}: "
|
||||||
|
f"{col_count} columns detected ({duration:.2f}s)")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"session_id": session_id,
|
||||||
|
**column_result,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/sessions/{session_id}/columns/manual")
|
||||||
|
async def set_manual_columns(session_id: str, req: ManualColumnsRequest):
|
||||||
|
"""Override detected columns with manual definitions."""
|
||||||
|
column_result = {
|
||||||
|
"columns": req.columns,
|
||||||
|
"duration_seconds": 0,
|
||||||
|
"method": "manual",
|
||||||
|
}
|
||||||
|
|
||||||
|
await update_session_db(session_id, column_result=column_result)
|
||||||
|
|
||||||
|
if session_id in _cache:
|
||||||
|
_cache[session_id]["column_result"] = column_result
|
||||||
|
|
||||||
|
logger.info(f"OCR Pipeline: manual columns session {session_id}: "
|
||||||
|
f"{len(req.columns)} columns set")
|
||||||
|
|
||||||
|
return {"session_id": session_id, **column_result}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/sessions/{session_id}/ground-truth/columns")
|
||||||
|
async def save_column_ground_truth(session_id: str, req: ColumnGroundTruthRequest):
|
||||||
|
"""Save ground truth feedback for the column detection step."""
|
||||||
|
session = await get_session_db(session_id)
|
||||||
|
if not session:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||||
|
|
||||||
|
ground_truth = session.get("ground_truth") or {}
|
||||||
|
gt = {
|
||||||
|
"is_correct": req.is_correct,
|
||||||
|
"corrected_columns": req.corrected_columns,
|
||||||
|
"notes": req.notes,
|
||||||
|
"saved_at": datetime.utcnow().isoformat(),
|
||||||
|
"column_result": session.get("column_result"),
|
||||||
|
}
|
||||||
|
ground_truth["columns"] = gt
|
||||||
|
|
||||||
|
await update_session_db(session_id, ground_truth=ground_truth)
|
||||||
|
|
||||||
|
if session_id in _cache:
|
||||||
|
_cache[session_id]["ground_truth"] = ground_truth
|
||||||
|
|
||||||
|
return {"session_id": session_id, "ground_truth": gt}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/sessions/{session_id}/ground-truth/columns")
|
||||||
|
async def get_column_ground_truth(session_id: str):
|
||||||
|
"""Retrieve saved ground truth for column detection, including auto vs GT diff."""
|
||||||
|
session = await get_session_db(session_id)
|
||||||
|
if not session:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||||
|
|
||||||
|
ground_truth = session.get("ground_truth") or {}
|
||||||
|
columns_gt = ground_truth.get("columns")
|
||||||
|
if not columns_gt:
|
||||||
|
raise HTTPException(status_code=404, detail="No column ground truth saved")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"session_id": session_id,
|
||||||
|
"columns_gt": columns_gt,
|
||||||
|
"columns_auto": session.get("column_result"),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def _get_columns_overlay(session_id: str) -> Response:
|
||||||
|
"""Generate dewarped image with column borders drawn on it."""
|
||||||
|
session = await get_session_db(session_id)
|
||||||
|
if not session:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||||
|
|
||||||
|
column_result = session.get("column_result")
|
||||||
|
if not column_result or not column_result.get("columns"):
|
||||||
|
raise HTTPException(status_code=404, detail="No column data available")
|
||||||
|
|
||||||
|
# Load dewarped image
|
||||||
|
dewarped_png = await get_session_image(session_id, "dewarped")
|
||||||
|
if not dewarped_png:
|
||||||
|
raise HTTPException(status_code=404, detail="Dewarped image not available")
|
||||||
|
|
||||||
|
arr = np.frombuffer(dewarped_png, dtype=np.uint8)
|
||||||
|
img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
|
||||||
|
if img is None:
|
||||||
|
raise HTTPException(status_code=500, detail="Failed to decode image")
|
||||||
|
|
||||||
|
# Color map for region types (BGR)
|
||||||
|
colors = {
|
||||||
|
"column_en": (255, 180, 0), # Blue
|
||||||
|
"column_de": (0, 200, 0), # Green
|
||||||
|
"column_example": (0, 140, 255), # Orange
|
||||||
|
"column_text": (200, 200, 0), # Cyan/Turquoise
|
||||||
|
"page_ref": (200, 0, 200), # Purple
|
||||||
|
"column_marker": (0, 0, 220), # Red
|
||||||
|
"column_ignore": (180, 180, 180), # Light Gray
|
||||||
|
"header": (128, 128, 128), # Gray
|
||||||
|
"footer": (128, 128, 128), # Gray
|
||||||
|
}
|
||||||
|
|
||||||
|
overlay = img.copy()
|
||||||
|
for col in column_result["columns"]:
|
||||||
|
x, y = col["x"], col["y"]
|
||||||
|
w, h = col["width"], col["height"]
|
||||||
|
color = colors.get(col.get("type", ""), (200, 200, 200))
|
||||||
|
|
||||||
|
# Semi-transparent fill
|
||||||
|
cv2.rectangle(overlay, (x, y), (x + w, y + h), color, -1)
|
||||||
|
|
||||||
|
# Solid border
|
||||||
|
cv2.rectangle(img, (x, y), (x + w, y + h), color, 3)
|
||||||
|
|
||||||
|
# Label with confidence
|
||||||
|
label = col.get("type", "unknown").replace("column_", "").upper()
|
||||||
|
conf = col.get("classification_confidence")
|
||||||
|
if conf is not None and conf < 1.0:
|
||||||
|
label = f"{label} {int(conf * 100)}%"
|
||||||
|
cv2.putText(img, label, (x + 10, y + 30),
|
||||||
|
cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
|
||||||
|
|
||||||
|
# Blend overlay at 20% opacity
|
||||||
|
cv2.addWeighted(overlay, 0.2, img, 0.8, 0, img)
|
||||||
|
|
||||||
|
success, result_png = cv2.imencode(".png", img)
|
||||||
|
if not success:
|
||||||
|
raise HTTPException(status_code=500, detail="Failed to encode overlay image")
|
||||||
|
|
||||||
|
return Response(content=result_png.tobytes(), media_type="image/png")
|
||||||
228
klausur-service/backend/ocr_pipeline_session_store.py
Normal file
228
klausur-service/backend/ocr_pipeline_session_store.py
Normal file
@@ -0,0 +1,228 @@
|
|||||||
|
"""
|
||||||
|
OCR Pipeline Session Store - PostgreSQL persistence for OCR pipeline sessions.
|
||||||
|
|
||||||
|
Replaces in-memory storage with database persistence.
|
||||||
|
See migrations/002_ocr_pipeline_sessions.sql for schema.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import uuid
|
||||||
|
import logging
|
||||||
|
import json
|
||||||
|
from typing import Optional, List, Dict, Any
|
||||||
|
|
||||||
|
import asyncpg
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Database configuration (same as vocab_session_store)
|
||||||
|
DATABASE_URL = os.getenv(
|
||||||
|
"DATABASE_URL",
|
||||||
|
"postgresql://breakpilot:breakpilot@postgres:5432/breakpilot_db"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Connection pool (initialized lazily)
|
||||||
|
_pool: Optional[asyncpg.Pool] = None
|
||||||
|
|
||||||
|
|
||||||
|
async def get_pool() -> asyncpg.Pool:
|
||||||
|
"""Get or create the database connection pool."""
|
||||||
|
global _pool
|
||||||
|
if _pool is None:
|
||||||
|
_pool = await asyncpg.create_pool(DATABASE_URL, min_size=2, max_size=10)
|
||||||
|
return _pool
|
||||||
|
|
||||||
|
|
||||||
|
async def init_ocr_pipeline_tables():
|
||||||
|
"""Initialize OCR pipeline tables if they don't exist."""
|
||||||
|
pool = await get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
tables_exist = await conn.fetchval("""
|
||||||
|
SELECT EXISTS (
|
||||||
|
SELECT FROM information_schema.tables
|
||||||
|
WHERE table_name = 'ocr_pipeline_sessions'
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
|
||||||
|
if not tables_exist:
|
||||||
|
logger.info("Creating OCR pipeline tables...")
|
||||||
|
migration_path = os.path.join(
|
||||||
|
os.path.dirname(__file__),
|
||||||
|
"migrations/002_ocr_pipeline_sessions.sql"
|
||||||
|
)
|
||||||
|
if os.path.exists(migration_path):
|
||||||
|
with open(migration_path, "r") as f:
|
||||||
|
sql = f.read()
|
||||||
|
await conn.execute(sql)
|
||||||
|
logger.info("OCR pipeline tables created successfully")
|
||||||
|
else:
|
||||||
|
logger.warning(f"Migration file not found: {migration_path}")
|
||||||
|
else:
|
||||||
|
logger.debug("OCR pipeline tables already exist")
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# SESSION CRUD
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
async def create_session_db(
|
||||||
|
session_id: str,
|
||||||
|
name: str,
|
||||||
|
filename: str,
|
||||||
|
original_png: bytes,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Create a new OCR pipeline session."""
|
||||||
|
pool = await get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
row = await conn.fetchrow("""
|
||||||
|
INSERT INTO ocr_pipeline_sessions (
|
||||||
|
id, name, filename, original_png, status, current_step
|
||||||
|
) VALUES ($1, $2, $3, $4, 'active', 1)
|
||||||
|
RETURNING id, name, filename, status, current_step,
|
||||||
|
deskew_result, dewarp_result, column_result,
|
||||||
|
ground_truth, auto_shear_degrees,
|
||||||
|
created_at, updated_at
|
||||||
|
""", uuid.UUID(session_id), name, filename, original_png)
|
||||||
|
|
||||||
|
return _row_to_dict(row)
|
||||||
|
|
||||||
|
|
||||||
|
async def get_session_db(session_id: str) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Get session metadata (without images)."""
|
||||||
|
pool = await get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
row = await conn.fetchrow("""
|
||||||
|
SELECT id, name, filename, status, current_step,
|
||||||
|
deskew_result, dewarp_result, column_result,
|
||||||
|
ground_truth, auto_shear_degrees,
|
||||||
|
created_at, updated_at
|
||||||
|
FROM ocr_pipeline_sessions WHERE id = $1
|
||||||
|
""", uuid.UUID(session_id))
|
||||||
|
|
||||||
|
if row:
|
||||||
|
return _row_to_dict(row)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def get_session_image(session_id: str, image_type: str) -> Optional[bytes]:
|
||||||
|
"""Load a single image (BYTEA) from the session."""
|
||||||
|
column_map = {
|
||||||
|
"original": "original_png",
|
||||||
|
"deskewed": "deskewed_png",
|
||||||
|
"binarized": "binarized_png",
|
||||||
|
"dewarped": "dewarped_png",
|
||||||
|
}
|
||||||
|
column = column_map.get(image_type)
|
||||||
|
if not column:
|
||||||
|
return None
|
||||||
|
|
||||||
|
pool = await get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
return await conn.fetchval(
|
||||||
|
f"SELECT {column} FROM ocr_pipeline_sessions WHERE id = $1",
|
||||||
|
uuid.UUID(session_id)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def update_session_db(session_id: str, **kwargs) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Update session fields dynamically."""
|
||||||
|
pool = await get_pool()
|
||||||
|
|
||||||
|
fields = []
|
||||||
|
values = []
|
||||||
|
param_idx = 1
|
||||||
|
|
||||||
|
allowed_fields = {
|
||||||
|
'name', 'filename', 'status', 'current_step',
|
||||||
|
'original_png', 'deskewed_png', 'binarized_png', 'dewarped_png',
|
||||||
|
'deskew_result', 'dewarp_result', 'column_result',
|
||||||
|
'ground_truth', 'auto_shear_degrees',
|
||||||
|
}
|
||||||
|
|
||||||
|
jsonb_fields = {'deskew_result', 'dewarp_result', 'column_result', 'ground_truth'}
|
||||||
|
|
||||||
|
for key, value in kwargs.items():
|
||||||
|
if key in allowed_fields:
|
||||||
|
fields.append(f"{key} = ${param_idx}")
|
||||||
|
if key in jsonb_fields and value is not None and not isinstance(value, str):
|
||||||
|
value = json.dumps(value)
|
||||||
|
values.append(value)
|
||||||
|
param_idx += 1
|
||||||
|
|
||||||
|
if not fields:
|
||||||
|
return await get_session_db(session_id)
|
||||||
|
|
||||||
|
# Always update updated_at
|
||||||
|
fields.append(f"updated_at = NOW()")
|
||||||
|
|
||||||
|
values.append(uuid.UUID(session_id))
|
||||||
|
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
row = await conn.fetchrow(f"""
|
||||||
|
UPDATE ocr_pipeline_sessions
|
||||||
|
SET {', '.join(fields)}
|
||||||
|
WHERE id = ${param_idx}
|
||||||
|
RETURNING id, name, filename, status, current_step,
|
||||||
|
deskew_result, dewarp_result, column_result,
|
||||||
|
ground_truth, auto_shear_degrees,
|
||||||
|
created_at, updated_at
|
||||||
|
""", *values)
|
||||||
|
|
||||||
|
if row:
|
||||||
|
return _row_to_dict(row)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def list_sessions_db(limit: int = 50) -> List[Dict[str, Any]]:
|
||||||
|
"""List all sessions (metadata only, no images)."""
|
||||||
|
pool = await get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
rows = await conn.fetch("""
|
||||||
|
SELECT id, name, filename, status, current_step,
|
||||||
|
created_at, updated_at
|
||||||
|
FROM ocr_pipeline_sessions
|
||||||
|
ORDER BY created_at DESC
|
||||||
|
LIMIT $1
|
||||||
|
""", limit)
|
||||||
|
|
||||||
|
return [_row_to_dict(row) for row in rows]
|
||||||
|
|
||||||
|
|
||||||
|
async def delete_session_db(session_id: str) -> bool:
|
||||||
|
"""Delete a session."""
|
||||||
|
pool = await get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
result = await conn.execute("""
|
||||||
|
DELETE FROM ocr_pipeline_sessions WHERE id = $1
|
||||||
|
""", uuid.UUID(session_id))
|
||||||
|
return result == "DELETE 1"
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# HELPER
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
def _row_to_dict(row: asyncpg.Record) -> Dict[str, Any]:
|
||||||
|
"""Convert asyncpg Record to JSON-serializable dict."""
|
||||||
|
if row is None:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
result = dict(row)
|
||||||
|
|
||||||
|
# UUID → string
|
||||||
|
for key in ['id', 'session_id']:
|
||||||
|
if key in result and result[key] is not None:
|
||||||
|
result[key] = str(result[key])
|
||||||
|
|
||||||
|
# datetime → ISO string
|
||||||
|
for key in ['created_at', 'updated_at']:
|
||||||
|
if key in result and result[key] is not None:
|
||||||
|
result[key] = result[key].isoformat()
|
||||||
|
|
||||||
|
# JSONB → parsed (asyncpg returns str for JSONB)
|
||||||
|
for key in ['deskew_result', 'dewarp_result', 'column_result', 'ground_truth']:
|
||||||
|
if key in result and result[key] is not None:
|
||||||
|
if isinstance(result[key], str):
|
||||||
|
result[key] = json.loads(result[key])
|
||||||
|
|
||||||
|
return result
|
||||||
@@ -31,8 +31,8 @@ WORKDIR /app
|
|||||||
ENV NODE_ENV=production
|
ENV NODE_ENV=production
|
||||||
|
|
||||||
# Create non-root user
|
# Create non-root user
|
||||||
RUN addgroup -S -g 1001 nodejs
|
RUN addgroup --system --gid 1001 nodejs
|
||||||
RUN adduser -S -u 1001 -G nodejs nextjs
|
RUN adduser --system --uid 1001 nextjs
|
||||||
|
|
||||||
# Copy built application
|
# Copy built application
|
||||||
COPY --from=builder /app/public ./public
|
COPY --from=builder /app/public ./public
|
||||||
|
|||||||
@@ -34,8 +34,8 @@ WORKDIR /app
|
|||||||
ENV NODE_ENV=production
|
ENV NODE_ENV=production
|
||||||
|
|
||||||
# Create non-root user
|
# Create non-root user
|
||||||
RUN addgroup -S -g 1001 nodejs
|
RUN addgroup --system --gid 1001 nodejs
|
||||||
RUN adduser -S -u 1001 -G nodejs nextjs
|
RUN adduser --system --uid 1001 nextjs
|
||||||
|
|
||||||
# Copy built assets
|
# Copy built assets
|
||||||
COPY --from=builder /app/public ./public
|
COPY --from=builder /app/public ./public
|
||||||
|
|||||||
Reference in New Issue
Block a user