Compare commits
415 Commits
coolify
...
f31a7175a2
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f31a7175a2 | ||
|
|
bacbfd88f1 | ||
|
|
2c63beff04 | ||
|
|
82433b4bad | ||
|
|
d889a6959e | ||
|
|
bc1804ad18 | ||
|
|
45b83560fd | ||
|
|
e4fa634a63 | ||
|
|
76ba83eecb | ||
|
|
04092a0a66 | ||
|
|
7fafd297e7 | ||
|
|
7ac09b5941 | ||
|
|
1f7989cfc2 | ||
|
|
ef5aed6a98 | ||
|
|
7dc00e737a | ||
|
|
a579c31ddb | ||
|
|
0f9c0d2ad0 | ||
|
|
278067fe20 | ||
|
|
d76fb2a9c8 | ||
|
|
9681fcbd05 | ||
|
|
4290f70885 | ||
|
|
5c935eec23 | ||
|
|
c4a5cd2d8a | ||
|
|
bc5ab29c06 | ||
|
|
7c5d95b858 | ||
|
|
65059471cf | ||
|
|
58c9565ba5 | ||
|
|
92a7b85c2d | ||
|
|
5f89913a9a | ||
|
|
3c7fc43f43 | ||
|
|
6bfa9eed86 | ||
|
|
7750b2a05f | ||
|
|
e3395ae8cf | ||
|
|
df30d4eae3 | ||
|
|
2e6ab3a646 | ||
|
|
cc5ee74921 | ||
|
|
21d37b5da1 | ||
|
|
19cbbf310a | ||
|
|
fc0ab84e40 | ||
|
|
050d410ba0 | ||
|
|
038eaf783c | ||
|
|
432eee3694 | ||
|
|
8e4cbd84c2 | ||
|
|
f9d71d50d1 | ||
|
|
c09838e91c | ||
|
|
3fd6523872 | ||
|
|
e56391b0c3 | ||
|
|
a3e2a7f994 | ||
|
|
f655db30e4 | ||
|
|
c894a0feeb | ||
|
|
8ef4c089cf | ||
|
|
821e5481c2 | ||
|
|
b98ea33a3a | ||
|
|
f139d0903e | ||
|
|
962bbbe9f6 | ||
|
|
9da45c2a59 | ||
|
|
64447ad352 | ||
|
|
00cbf266cb | ||
|
|
f9bad7beaa | ||
|
|
143e41ec76 | ||
|
|
ec287fd12e | ||
|
|
98f7f7d7d5 | ||
|
|
a19bca6060 | ||
|
|
7a76697f95 | ||
|
|
5359a4cc2b | ||
|
|
a25214126d | ||
|
|
fd79d5e4fa | ||
|
|
19b93f7762 | ||
|
|
a079ffe8e9 | ||
|
|
6e1d715d0d | ||
|
|
d66efdecf5 | ||
|
|
d36972b464 | ||
|
|
f30e526917 | ||
|
|
438a4495c7 | ||
|
|
902de027f4 | ||
|
|
b1cdb2531c | ||
|
|
ab30e8b17a | ||
|
|
b0e1fbc8d6 | ||
|
|
872b47f691 | ||
|
|
bbf0a5720e | ||
|
|
29d3c1caf5 | ||
|
|
aae8a96aa2 | ||
|
|
2b73d9beec | ||
|
|
324f39a9cc | ||
|
|
febd0a2f84 | ||
|
|
43b1f8be58 | ||
|
|
43dec5dd91 | ||
|
|
dfce8415d7 | ||
|
|
92a52a3199 | ||
|
|
427fecdce0 | ||
|
|
9fb3229270 | ||
|
|
91625a2646 | ||
|
|
02ae6249ca | ||
|
|
cf995f2d52 | ||
|
|
0340204c1f | ||
|
|
729ebff63c | ||
|
|
6668661895 | ||
|
|
eeee61108a | ||
|
|
1653e7cff4 | ||
|
|
86ae71fd65 | ||
|
|
ba513968c5 | ||
|
|
f717e1c0df | ||
|
|
934b5648a2 | ||
|
|
fe7339c7a1 | ||
|
|
3aa4a63257 | ||
|
|
6b9b280ba3 | ||
|
|
1d34785e2b | ||
|
|
5b5213c2b9 | ||
|
|
fbbec6cf5e | ||
|
|
a6951940b9 | ||
|
|
4a8d43fd71 | ||
|
|
bcd55e12d7 | ||
|
|
2bd63ec402 | ||
|
|
39a4d8564c | ||
|
|
1162eac7b4 | ||
|
|
28352f5bab | ||
|
|
c3f1547e32 | ||
|
|
4a15d46dfd | ||
|
|
b83b38e7f2 | ||
|
|
a994ddee83 | ||
|
|
c2c082d4b4 | ||
|
|
d6f51e4418 | ||
|
|
703e110bab | ||
|
|
41ff7671cd | ||
|
|
8e42e36ee4 | ||
|
|
24e1e93b5b | ||
|
|
846292f632 | ||
|
|
4280298e02 | ||
|
|
4f2fb0e94c | ||
|
|
61c8169f9e | ||
|
|
e9ccd1e35c | ||
|
|
d335a7bbf3 | ||
|
|
1f527fcd49 | ||
|
|
8349c28f54 | ||
|
|
71a1b5f058 | ||
|
|
c743a38eaf | ||
|
|
90c1efd9b0 | ||
|
|
06d63d18f9 | ||
|
|
3e65b14b83 | ||
|
|
40ac593d28 | ||
|
|
ea69239e06 | ||
|
|
bb90d1ba94 | ||
|
|
685d135be5 | ||
|
|
e2c2acdf86 | ||
|
|
3cc496f7f3 | ||
|
|
a6069631cc | ||
|
|
ced5bb3dd3 | ||
|
|
2fdf3ff868 | ||
|
|
2e21a4b6d0 | ||
|
|
d98dba9098 | ||
|
|
cd13eca290 | ||
|
|
aa7db43f02 | ||
|
|
4afd5bd8e8 | ||
|
|
7d19145edb | ||
|
|
35f2706098 | ||
|
|
0ee92e7210 | ||
|
|
4949863bd7 | ||
|
|
efbe15f895 | ||
|
|
c3da131129 | ||
|
|
b81baa1d16 | ||
|
|
2010cab894 | ||
|
|
bc13978bc1 | ||
|
|
2f51ac617f | ||
|
|
8a5f2aa188 | ||
|
|
d182d87f26 | ||
|
|
87efc1b4ba | ||
|
|
dd7087cd6d | ||
|
|
7282a220d6 | ||
|
|
b5d5371f72 | ||
|
|
41e47baf13 | ||
|
|
8a60f4bf30 | ||
|
|
e3ee1de790 | ||
|
|
b91f799ccf | ||
|
|
2df2a01a8b | ||
|
|
e2ad93fd57 | ||
|
|
2cbdfc56f3 | ||
|
|
840918df2a | ||
|
|
eb3fc05cdc | ||
|
|
9dbb5fa708 | ||
|
|
f468c30112 | ||
|
|
618c82ef42 | ||
|
|
080fcb5e3c | ||
|
|
bcd97e7d78 | ||
|
|
7f8615b8c1 | ||
|
|
2055597ba4 | ||
|
|
ad28f9420a | ||
|
|
6314e60464 | ||
|
|
d530738b12 | ||
|
|
ca7d44e543 | ||
|
|
e44e319ccf | ||
|
|
6bb023bdc1 | ||
|
|
13553fc5e6 | ||
|
|
964c916a81 | ||
|
|
13510b62cc | ||
|
|
3a791179af | ||
|
|
f65bd11919 | ||
|
|
785b4d7655 | ||
|
|
2716495250 | ||
|
|
23b7840ea7 | ||
|
|
34adb437d0 | ||
|
|
ceaef9c6a6 | ||
|
|
9047339f0d | ||
|
|
2592ef233b | ||
|
|
256efef3ea | ||
|
|
4610137ecc | ||
|
|
fb46450802 | ||
|
|
11126c4436 | ||
|
|
7a0ded7562 | ||
|
|
04be24a89e | ||
|
|
cf9dde9876 | ||
|
|
60c4138660 | ||
|
|
7005b18561 | ||
|
|
e60254bc75 | ||
|
|
156a818246 | ||
|
|
eb45bb4879 | ||
|
|
2763631711 | ||
|
|
9a5a35bff1 | ||
|
|
931ab92c92 | ||
|
|
853638b03c | ||
|
|
d98359fceb | ||
|
|
e1ae5d5fa9 | ||
|
|
4e8ea77140 | ||
|
|
e8ba5ec073 | ||
|
|
02631dc4e0 | ||
|
|
a5635e0c43 | ||
|
|
7a1bd5e82d | ||
|
|
b0bfc0a960 | ||
|
|
a5df2b6e15 | ||
|
|
14c8bb5da0 | ||
|
|
4532f68173 | ||
|
|
391449fedf | ||
|
|
cb2b924a7b | ||
|
|
8f3a50b981 | ||
|
|
0f821afb23 | ||
|
|
2ad391e4e4 | ||
|
|
e0decac7a0 | ||
|
|
d39d249daa | ||
|
|
538d5c732e | ||
|
|
b9c3c47a37 | ||
|
|
9912997187 | ||
|
|
2ec4d8aabd | ||
|
|
24366880ad | ||
|
|
20b341d839 | ||
|
|
d5be7b6f77 | ||
|
|
b7ae36e92b | ||
|
|
9ea77ba157 | ||
|
|
4f9cf3b9e8 | ||
|
|
b8a9493310 | ||
|
|
68a6b97654 | ||
|
|
af1b12c97d | ||
|
|
770aea611f | ||
|
|
1a2efbf075 | ||
|
|
cd12755da6 | ||
|
|
40cfc1acdd | ||
|
|
aa136a9f80 | ||
|
|
e6858010c2 | ||
|
|
1cc69d6b5e | ||
|
|
293e7914d8 | ||
|
|
a58dfca1d8 | ||
|
|
fd99d4f875 | ||
|
|
1e0c6bb4b5 | ||
|
|
e6dc3fcdd7 | ||
|
|
edbdac3203 | ||
|
|
99573a46ef | ||
|
|
6ad4b84584 | ||
|
|
f94a3836f8 | ||
|
|
34c649c8be | ||
|
|
dd16c88007 | ||
|
|
9cbf0fb278 | ||
|
|
90ecb46bed | ||
|
|
bb0e23303c | ||
|
|
604da26b24 | ||
|
|
113a1c10e5 | ||
|
|
e4bdb3cc24 | ||
|
|
d0e7966925 | ||
|
|
68d230c297 | ||
|
|
16dc77e5c2 | ||
|
|
29c74a9962 | ||
|
|
00a74b3144 | ||
|
|
489835a279 | ||
|
|
f0726d9a2b | ||
|
|
ae1f9f7494 | ||
|
|
e4aff2b27e | ||
|
|
9dd77ab54a | ||
|
|
e426de937c | ||
|
|
0d3f001acb | ||
|
|
c484a89b78 | ||
|
|
d5f2ce4659 | ||
|
|
ab3ecc7c08 | ||
|
|
970ec1f548 | ||
|
|
a610bc75ba | ||
|
|
153f41358b | ||
|
|
d1c8075da2 | ||
|
|
f3d61a9394 | ||
|
|
ab2423bd10 | ||
|
|
b914b6f49d | ||
|
|
123b7ada0b | ||
|
|
cb61fab77b | ||
|
|
6623a5d10e | ||
|
|
21ea458fcf | ||
|
|
b1f7fee284 | ||
|
|
dc5d76ecf5 | ||
|
|
1ac47cd9b7 | ||
|
|
fa8e38db2d | ||
|
|
f1b6246838 | ||
|
|
2fce92d7b1 | ||
|
|
7eb03ca8d1 | ||
|
|
50e1c964ee | ||
|
|
2e0f8632f8 | ||
|
|
606bef0591 | ||
|
|
ccba2bb887 | ||
|
|
75bca1f02d | ||
|
|
4d428980c1 | ||
|
|
dea3349b23 | ||
|
|
0d72f2c836 | ||
|
|
d6a8c1d821 | ||
|
|
6527beae03 | ||
|
|
3904ddb493 | ||
|
|
6e1a349eed | ||
|
|
7252f9a956 | ||
|
|
f13116345b | ||
|
|
991984d9c3 | ||
|
|
1a246eb059 | ||
|
|
0532b2a797 | ||
|
|
f1fcc67357 | ||
|
|
c8981423d4 | ||
|
|
f615c5f66d | ||
|
|
a052f73de3 | ||
|
|
34ccdd5fd1 | ||
|
|
e718353d9f | ||
|
|
c3a924a620 | ||
|
|
650f15bc1b | ||
|
|
40a77a82f6 | ||
|
|
87931c35e4 | ||
|
|
29b1d95acc | ||
|
|
dbf0db0c13 | ||
|
|
2a493890b6 | ||
|
|
e171a736e7 | ||
|
|
938d1d69cf | ||
|
|
e9f368d3ec | ||
|
|
3028f421b4 | ||
|
|
2b1c499d54 | ||
|
|
72cc77dcf4 | ||
|
|
e3f939a628 | ||
|
|
6bca3370e0 | ||
|
|
befc44d2dd | ||
|
|
6db3c02db4 | ||
|
|
8f2c2e8f68 | ||
|
|
50ad06f43a | ||
|
|
2c4160e4c4 | ||
|
|
9bbde1c03e | ||
|
|
77869e32f4 | ||
|
|
89b5f49918 | ||
|
|
7f27783008 | ||
|
|
a666e883da | ||
|
|
27b895a848 | ||
|
|
3bcb7aa638 | ||
|
|
c4f2e6554e | ||
|
|
8e861e5a4d | ||
|
|
4970ca903e | ||
|
|
97d4355aa9 | ||
|
|
8ad5823fd8 | ||
|
|
ec47045c15 | ||
|
|
ba65e47654 | ||
|
|
8507e2e035 | ||
|
|
854d8b431b | ||
|
|
f2521d2b9e | ||
|
|
954d21e469 | ||
|
|
010616be5a | ||
|
|
e3aa8e899e | ||
|
|
266b9dfad3 | ||
|
|
ab294d5a6f | ||
|
|
b48cd8bb46 | ||
|
|
d481e0087b | ||
|
|
f7e0f2bb4f | ||
|
|
e7fb9d59f1 | ||
|
|
859342300e | ||
|
|
8c42fefa77 | ||
|
|
984dfab975 | ||
|
|
45435f226f | ||
|
|
4ec7c20490 | ||
|
|
17604b8eb2 | ||
|
|
f39314fb27 | ||
|
|
356d39d6ee | ||
|
|
491df4e1b0 | ||
|
|
954103cdf2 | ||
|
|
47dc2e6f7a | ||
|
|
203b3c0e2d | ||
|
|
b58aecd081 | ||
|
|
04b83d5f46 | ||
|
|
c7ae44ff17 | ||
|
|
ce0815007e | ||
|
|
b03cb0a1e6 | ||
|
|
5a45cbf605 | ||
|
|
164b35c06a | ||
|
|
2297f66edb | ||
|
|
db8327f039 | ||
|
|
587b066a40 | ||
|
|
03fa186fec | ||
|
|
1040729874 | ||
|
|
4f37afa222 | ||
|
|
bb879a03a8 | ||
|
|
f535d3c967 | ||
|
|
7a3570fe46 | ||
|
|
1393a994f9 | ||
|
|
cf27a95308 | ||
|
|
aa06ae0f61 | ||
|
|
09b820efbe | ||
|
|
ff2bb79a91 | ||
|
|
fb496c5e34 | ||
|
|
9df745574b | ||
|
|
44e8c573af | ||
|
|
589d2f811a | ||
|
|
d552fd8b6b | ||
|
|
e7b6654b85 |
@@ -6,22 +6,31 @@
|
||||
|
||||
| Geraet | Rolle | Aufgaben |
|
||||
|--------|-------|----------|
|
||||
| **MacBook** | Client | Claude Terminal, Browser (Frontend-Tests) |
|
||||
| **Mac Mini** | Server | Docker, alle Services, Code-Ausfuehrung, Tests, Git |
|
||||
| **MacBook** | Entwicklung | Claude Terminal, Code-Entwicklung, Browser (Frontend-Tests) |
|
||||
| **Mac Mini** | Server | Docker, alle Services, Tests, Builds, Deployment |
|
||||
|
||||
**WICHTIG:** Die Entwicklung findet vollstaendig auf dem **Mac Mini** statt!
|
||||
**WICHTIG:** Code wird direkt auf dem MacBook in diesem Repo bearbeitet. Docker und Services laufen auf dem Mac Mini.
|
||||
|
||||
### SSH-Verbindung
|
||||
### Entwicklungsworkflow
|
||||
|
||||
```bash
|
||||
ssh macmini
|
||||
# Projektverzeichnis:
|
||||
cd /Users/benjaminadmin/Projekte/breakpilot-lehrer
|
||||
# 1. Code auf MacBook bearbeiten (dieses Verzeichnis)
|
||||
# 2. Committen und pushen:
|
||||
git push origin main && git push gitea main
|
||||
|
||||
# Einzelbefehle (BEVORZUGT):
|
||||
ssh macmini "cd /Users/benjaminadmin/Projekte/breakpilot-lehrer && <cmd>"
|
||||
# 3. Auf Mac Mini pullen und Container neu bauen:
|
||||
ssh macmini "git -C /Users/benjaminadmin/Projekte/breakpilot-lehrer pull --no-rebase origin main"
|
||||
ssh macmini "/usr/local/bin/docker compose -f /Users/benjaminadmin/Projekte/breakpilot-lehrer/docker-compose.yml build --no-cache <service>"
|
||||
ssh macmini "/usr/local/bin/docker compose -f /Users/benjaminadmin/Projekte/breakpilot-lehrer/docker-compose.yml up -d <service>"
|
||||
```
|
||||
|
||||
### SSH-Verbindung (fuer Docker/Tests)
|
||||
|
||||
**WICHTIG:** `cd` in SSH-Kommandos funktioniert NICHT zuverlaessig! Stattdessen:
|
||||
- Git: `git -C /Users/benjaminadmin/Projekte/breakpilot-lehrer <cmd>`
|
||||
- Docker: `/usr/local/bin/docker compose -f /Users/benjaminadmin/Projekte/breakpilot-lehrer/docker-compose.yml <cmd>`
|
||||
- Logs: `/usr/local/bin/docker logs -f bp-lehrer-<service>`
|
||||
|
||||
---
|
||||
|
||||
## Voraussetzung
|
||||
@@ -163,10 +172,10 @@ breakpilot-lehrer/
|
||||
|
||||
```bash
|
||||
# Lehrer-Services starten (Core muss laufen!)
|
||||
ssh macmini "cd /Users/benjaminadmin/Projekte/breakpilot-lehrer && /usr/local/bin/docker compose up -d"
|
||||
ssh macmini "/usr/local/bin/docker compose -f /Users/benjaminadmin/Projekte/breakpilot-lehrer/docker-compose.yml up -d"
|
||||
|
||||
# Einzelnen Service neu bauen
|
||||
ssh macmini "cd /Users/benjaminadmin/Projekte/breakpilot-lehrer && /usr/local/bin/docker compose build --no-cache <service>"
|
||||
ssh macmini "/usr/local/bin/docker compose -f /Users/benjaminadmin/Projekte/breakpilot-lehrer/docker-compose.yml build --no-cache <service>"
|
||||
|
||||
# Logs
|
||||
ssh macmini "/usr/local/bin/docker logs -f bp-lehrer-<service>"
|
||||
@@ -176,6 +185,7 @@ ssh macmini "/usr/local/bin/docker ps --filter name=bp-lehrer"
|
||||
```
|
||||
|
||||
**WICHTIG:** Docker-Pfad auf Mac Mini ist `/usr/local/bin/docker` (nicht im Standard-SSH-PATH).
|
||||
**WICHTIG:** Immer `-f` mit vollem Pfad zur docker-compose.yml nutzen, `cd` in SSH funktioniert nicht!
|
||||
|
||||
### Frontend-Entwicklung
|
||||
|
||||
|
||||
@@ -1,79 +0,0 @@
|
||||
# =========================================================
|
||||
# BreakPilot Lehrer — Coolify Environment Variables
|
||||
# =========================================================
|
||||
# Copy these into Coolify's environment variable UI
|
||||
# for the breakpilot-lehrer Docker Compose resource.
|
||||
# =========================================================
|
||||
|
||||
# --- External PostgreSQL (Coolify-managed, same as Core) ---
|
||||
POSTGRES_HOST=<coolify-postgres-hostname>
|
||||
POSTGRES_PORT=5432
|
||||
POSTGRES_USER=breakpilot
|
||||
POSTGRES_PASSWORD=CHANGE_ME_SAME_AS_CORE
|
||||
POSTGRES_DB=breakpilot_db
|
||||
|
||||
# --- Security ---
|
||||
JWT_SECRET=CHANGE_ME_SAME_AS_CORE
|
||||
|
||||
# --- External S3 Storage (same as Core) ---
|
||||
S3_ENDPOINT=<s3-endpoint-host:port>
|
||||
S3_ACCESS_KEY=CHANGE_ME_SAME_AS_CORE
|
||||
S3_SECRET_KEY=CHANGE_ME_SAME_AS_CORE
|
||||
S3_BUCKET=breakpilot-rag
|
||||
S3_SECURE=true
|
||||
|
||||
# --- External Qdrant (Coolify-managed, same as Core) ---
|
||||
QDRANT_URL=http://<coolify-qdrant-hostname>:6333
|
||||
|
||||
# --- Session ---
|
||||
SESSION_TTL_HOURS=24
|
||||
|
||||
# --- SMTP (Real mail server) ---
|
||||
SMTP_HOST=smtp.example.com
|
||||
SMTP_PORT=587
|
||||
SMTP_USERNAME=noreply@breakpilot.ai
|
||||
SMTP_PASSWORD=CHANGE_ME_SMTP_PASSWORD
|
||||
SMTP_FROM_NAME=BreakPilot
|
||||
SMTP_FROM_ADDR=noreply@breakpilot.ai
|
||||
|
||||
# --- LLM / Ollama (optional) ---
|
||||
OLLAMA_BASE_URL=
|
||||
OLLAMA_URL=
|
||||
OLLAMA_ENABLED=false
|
||||
OLLAMA_DEFAULT_MODEL=
|
||||
OLLAMA_VISION_MODEL=
|
||||
OLLAMA_CORRECTION_MODEL=
|
||||
OLLAMA_TIMEOUT=120
|
||||
|
||||
# --- Anthropic (optional) ---
|
||||
ANTHROPIC_API_KEY=
|
||||
|
||||
# --- vast.ai GPU (optional) ---
|
||||
VAST_API_KEY=
|
||||
VAST_INSTANCE_ID=
|
||||
|
||||
# --- Game Settings ---
|
||||
GAME_USE_DATABASE=true
|
||||
GAME_REQUIRE_AUTH=true
|
||||
GAME_REQUIRE_BILLING=true
|
||||
GAME_LLM_MODEL=
|
||||
|
||||
# --- Frontend URLs (build args) ---
|
||||
NEXT_PUBLIC_API_URL=https://api-lehrer.breakpilot.ai
|
||||
NEXT_PUBLIC_KLAUSUR_SERVICE_URL=https://klausur.breakpilot.ai
|
||||
NEXT_PUBLIC_VOICE_SERVICE_URL=wss://voice.breakpilot.ai
|
||||
NEXT_PUBLIC_BILLING_API_URL=https://api-core.breakpilot.ai
|
||||
NEXT_PUBLIC_APP_URL=https://app.breakpilot.ai
|
||||
NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=
|
||||
|
||||
# --- Edu Search ---
|
||||
EDU_SEARCH_URL=
|
||||
EDU_SEARCH_API_KEY=
|
||||
OPENSEARCH_PASSWORD=CHANGE_ME_OPENSEARCH_PASSWORD
|
||||
|
||||
# --- Misc ---
|
||||
CONTROL_API_KEY=
|
||||
ALERTS_AGENT_ENABLED=false
|
||||
PADDLEOCR_SERVICE_URL=
|
||||
TROCR_SERVICE_URL=
|
||||
CAMUNDA_URL=
|
||||
17
.env.example
17
.env.example
@@ -30,6 +30,23 @@ OLLAMA_VISION_MODEL=llama3.2-vision
|
||||
OLLAMA_CORRECTION_MODEL=llama3.2
|
||||
OLLAMA_TIMEOUT=120
|
||||
|
||||
# OCR-Pipeline: LLM-Review (Schritt 6)
|
||||
# Kleine Modelle reichen fuer Zeichen-Korrekturen (0->O, 1->l, 5->S)
|
||||
# Optionen: qwen3:0.6b, qwen3:1.7b, gemma3:1b, qwen3.5:35b-a3b
|
||||
OLLAMA_REVIEW_MODEL=qwen3:0.6b
|
||||
# Eintraege pro Ollama-Call. Groesser = weniger HTTP-Overhead.
|
||||
OLLAMA_REVIEW_BATCH_SIZE=20
|
||||
|
||||
# OCR-Pipeline: Engine fuer Schritt 5 (Worterkennung)
|
||||
# Optionen: auto (bevorzugt RapidOCR), rapid, tesseract,
|
||||
# trocr-printed, trocr-handwritten, lighton
|
||||
OCR_ENGINE=auto
|
||||
|
||||
# Klausur-HTR: Primaerem Modell fuer Handschriftenerkennung (qwen2.5vl bereits auf Mac Mini)
|
||||
OLLAMA_HTR_MODEL=qwen2.5vl:32b
|
||||
# HTR Fallback: genutzt wenn Ollama nicht erreichbar (auto-download ~340 MB)
|
||||
HTR_FALLBACK_MODEL=trocr-large
|
||||
|
||||
# Anthropic (optional)
|
||||
ANTHROPIC_API_KEY=
|
||||
|
||||
|
||||
@@ -1,32 +0,0 @@
|
||||
name: Deploy to Coolify
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- coolify
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Wait for Core deployment
|
||||
run: |
|
||||
echo "Waiting 30s for Core services to stabilize..."
|
||||
sleep 30
|
||||
|
||||
- name: Deploy via Coolify API
|
||||
run: |
|
||||
echo "Deploying breakpilot-lehrer to Coolify..."
|
||||
HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" \
|
||||
-X POST \
|
||||
-H "Authorization: Bearer ${{ secrets.COOLIFY_API_TOKEN }}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"uuid": "${{ secrets.COOLIFY_RESOURCE_UUID }}", "force_rebuild": true}' \
|
||||
"${{ secrets.COOLIFY_BASE_URL }}/api/v1/deploy")
|
||||
|
||||
echo "HTTP Status: $HTTP_STATUS"
|
||||
if [ "$HTTP_STATUS" -ne 200 ] && [ "$HTTP_STATUS" -ne 201 ]; then
|
||||
echo "Deployment failed with status $HTTP_STATUS"
|
||||
exit 1
|
||||
fi
|
||||
echo "Deployment triggered successfully!"
|
||||
@@ -34,8 +34,8 @@ WORKDIR /app
|
||||
ENV NODE_ENV=production
|
||||
|
||||
# Create non-root user
|
||||
RUN addgroup -S -g 1001 nodejs
|
||||
RUN adduser -S -u 1001 -G nodejs nextjs
|
||||
RUN addgroup --system --gid 1001 nodejs
|
||||
RUN adduser --system --uid 1001 nextjs
|
||||
|
||||
# Copy built assets
|
||||
COPY --from=builder /app/public ./public
|
||||
|
||||
@@ -273,52 +273,6 @@ Dein Ziel ist die rechtzeitige Erkennung und Kommunikation relevanter Ereignisse
|
||||
createdAt: '2024-12-01T00:00:00Z',
|
||||
updatedAt: '2025-01-12T02:00:00Z'
|
||||
},
|
||||
'compliance-advisor': {
|
||||
id: 'compliance-advisor',
|
||||
name: 'Compliance Advisor',
|
||||
description: 'DSGVO/Compliance-Berater fuer SDK-Nutzer',
|
||||
soulFile: 'compliance-advisor.soul.md',
|
||||
soulContent: `# Compliance Advisor Agent
|
||||
|
||||
## Identitaet
|
||||
Du bist der BreakPilot Compliance-Berater. Du hilfst Nutzern des AI Compliance SDK,
|
||||
Datenschutz- und Compliance-Fragen in verstaendlicher Sprache zu beantworten.
|
||||
Du bist kein Anwalt und gibst keine Rechtsberatung, sondern orientierst dich an
|
||||
offiziellen Quellen und gibst praxisnahe Hinweise.
|
||||
|
||||
## Kernprinzipien
|
||||
- **Quellenbasiert**: Verweise immer auf konkrete Rechtsgrundlagen (DSGVO-Artikel, BDSG-Paragraphen)
|
||||
- **Verstaendlich**: Erklaere rechtliche Konzepte in einfacher, praxisnaher Sprache
|
||||
- **Ehrlich**: Bei Unsicherheit empfehle professionelle Rechtsberatung
|
||||
- **Kontextbewusst**: Nutze das RAG-System fuer aktuelle Rechtstexte und Leitfaeden
|
||||
- **Scope-bewusst**: Nutze alle verfuegbaren RAG-Quellen AUSSER NIBIS-Dokumenten
|
||||
|
||||
## Kompetenzbereich
|
||||
- DSGVO Art. 1-99 + Erwaegsgruende
|
||||
- BDSG (Bundesdatenschutzgesetz)
|
||||
- AI Act (EU KI-Verordnung)
|
||||
- TTDSG, ePrivacy-Richtlinie
|
||||
- DSK-Kurzpapiere (Nr. 1-20)
|
||||
- SDM V3.0, BSI-Grundschutz, BSI-TR-03161
|
||||
- EDPB Guidelines, Bundes-/Laender-Muss-Listen
|
||||
- ISO 27001/27701 (Ueberblick)
|
||||
|
||||
## Kommunikationsstil
|
||||
- Sachlich, aber verstaendlich
|
||||
- Deutsch als Hauptsprache
|
||||
- Strukturierte Antworten mit Quellenangabe
|
||||
- Praxisbeispiele wo hilfreich`,
|
||||
color: '#6366f1',
|
||||
status: 'running',
|
||||
activeSessions: 0,
|
||||
totalProcessed: 0,
|
||||
avgResponseTime: 0,
|
||||
errorRate: 0,
|
||||
lastRestart: new Date().toISOString(),
|
||||
version: '1.0.0',
|
||||
createdAt: new Date().toISOString(),
|
||||
updatedAt: new Date().toISOString()
|
||||
},
|
||||
'orchestrator': {
|
||||
id: 'orchestrator',
|
||||
name: 'Orchestrator',
|
||||
|
||||
@@ -94,19 +94,6 @@ const mockAgents: AgentConfig[] = [
|
||||
totalProcessed: 8934,
|
||||
avgResponseTime: 12,
|
||||
lastActivity: 'just now'
|
||||
},
|
||||
{
|
||||
id: 'compliance-advisor',
|
||||
name: 'Compliance Advisor',
|
||||
description: 'DSGVO/Compliance-Berater fuer SDK-Nutzer',
|
||||
soulFile: 'compliance-advisor.soul.md',
|
||||
color: '#6366f1',
|
||||
icon: 'message',
|
||||
status: 'running',
|
||||
activeSessions: 0,
|
||||
totalProcessed: 0,
|
||||
avgResponseTime: 0,
|
||||
lastActivity: new Date().toISOString()
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
@@ -179,7 +179,6 @@ export default function GPUInfrastructurePage() {
|
||||
databases: ['PostgreSQL (Logs)'],
|
||||
}}
|
||||
relatedPages={[
|
||||
{ name: 'LLM Vergleich', href: '/ai/llm-compare', description: 'KI-Provider testen' },
|
||||
{ name: 'Test Quality (BQAS)', href: '/ai/test-quality', description: 'Golden Suite & Tests' },
|
||||
{ name: 'Magic Help', href: '/ai/magic-help', description: 'TrOCR Testing' },
|
||||
]}
|
||||
|
||||
@@ -1,503 +0,0 @@
|
||||
'use client'
|
||||
|
||||
/**
|
||||
* LLM Comparison Tool
|
||||
*
|
||||
* Vergleicht Antworten von verschiedenen LLM-Providern:
|
||||
* - OpenAI/ChatGPT
|
||||
* - Claude
|
||||
* - Self-hosted + Tavily
|
||||
* - Self-hosted + EduSearch
|
||||
*/
|
||||
|
||||
import { useState, useEffect, useCallback } from 'react'
|
||||
import { PagePurpose } from '@/components/common/PagePurpose'
|
||||
import { AIToolsSidebarResponsive } from '@/components/ai/AIToolsSidebar'
|
||||
|
||||
interface LLMResponse {
|
||||
provider: string
|
||||
model: string
|
||||
response: string
|
||||
latency_ms: number
|
||||
tokens_used?: number
|
||||
search_results?: Array<{
|
||||
title: string
|
||||
url: string
|
||||
content: string
|
||||
score?: number
|
||||
}>
|
||||
error?: string
|
||||
timestamp: string
|
||||
}
|
||||
|
||||
interface ComparisonResult {
|
||||
comparison_id: string
|
||||
prompt: string
|
||||
system_prompt?: string
|
||||
responses: LLMResponse[]
|
||||
created_at: string
|
||||
}
|
||||
|
||||
const providerColors: Record<string, { bg: string; border: string; text: string }> = {
|
||||
openai: { bg: 'bg-emerald-50', border: 'border-emerald-300', text: 'text-emerald-700' },
|
||||
claude: { bg: 'bg-orange-50', border: 'border-orange-300', text: 'text-orange-700' },
|
||||
selfhosted_tavily: { bg: 'bg-blue-50', border: 'border-blue-300', text: 'text-blue-700' },
|
||||
selfhosted_edusearch: { bg: 'bg-purple-50', border: 'border-purple-300', text: 'text-purple-700' },
|
||||
}
|
||||
|
||||
const providerLabels: Record<string, string> = {
|
||||
openai: 'OpenAI GPT-4o-mini',
|
||||
claude: 'Claude 3.5 Sonnet',
|
||||
selfhosted_tavily: 'Self-hosted + Tavily',
|
||||
selfhosted_edusearch: 'Self-hosted + EduSearch',
|
||||
}
|
||||
|
||||
export default function LLMComparePage() {
|
||||
// State
|
||||
const [prompt, setPrompt] = useState('')
|
||||
const [systemPrompt, setSystemPrompt] = useState('Du bist ein hilfreicher Assistent fuer Lehrkraefte in Deutschland.')
|
||||
|
||||
// Provider toggles
|
||||
const [enableOpenAI, setEnableOpenAI] = useState(true)
|
||||
const [enableClaude, setEnableClaude] = useState(true)
|
||||
const [enableTavily, setEnableTavily] = useState(true)
|
||||
const [enableEduSearch, setEnableEduSearch] = useState(true)
|
||||
|
||||
// Parameters
|
||||
const [model, setModel] = useState('llama3.2:3b')
|
||||
const [temperature, setTemperature] = useState(0.7)
|
||||
const [maxTokens, setMaxTokens] = useState(2048)
|
||||
|
||||
// Results
|
||||
const [isLoading, setIsLoading] = useState(false)
|
||||
const [result, setResult] = useState<ComparisonResult | null>(null)
|
||||
const [history, setHistory] = useState<ComparisonResult[]>([])
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
|
||||
// UI State
|
||||
const [showSettings, setShowSettings] = useState(false)
|
||||
const [showHistory, setShowHistory] = useState(false)
|
||||
|
||||
// API Base URL
|
||||
const API_URL = process.env.NEXT_PUBLIC_LLM_GATEWAY_URL || 'http://localhost:8082'
|
||||
const API_KEY = process.env.NEXT_PUBLIC_LLM_API_KEY || 'dev-key'
|
||||
|
||||
// Load history
|
||||
const loadHistory = useCallback(async () => {
|
||||
try {
|
||||
const response = await fetch(`${API_URL}/v1/comparison/history?limit=20`, {
|
||||
headers: { Authorization: `Bearer ${API_KEY}` },
|
||||
})
|
||||
if (response.ok) {
|
||||
const data = await response.json()
|
||||
setHistory(data.comparisons || [])
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to load history:', e)
|
||||
}
|
||||
}, [API_URL, API_KEY])
|
||||
|
||||
useEffect(() => {
|
||||
loadHistory()
|
||||
}, [loadHistory])
|
||||
|
||||
const runComparison = async () => {
|
||||
if (!prompt.trim()) {
|
||||
setError('Bitte geben Sie einen Prompt ein')
|
||||
return
|
||||
}
|
||||
|
||||
setIsLoading(true)
|
||||
setError(null)
|
||||
setResult(null)
|
||||
|
||||
try {
|
||||
const response = await fetch(`${API_URL}/v1/comparison/run`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
Authorization: `Bearer ${API_KEY}`,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
prompt,
|
||||
system_prompt: systemPrompt || undefined,
|
||||
enable_openai: enableOpenAI,
|
||||
enable_claude: enableClaude,
|
||||
enable_selfhosted_tavily: enableTavily,
|
||||
enable_selfhosted_edusearch: enableEduSearch,
|
||||
selfhosted_model: model,
|
||||
temperature,
|
||||
max_tokens: maxTokens,
|
||||
}),
|
||||
})
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`API Error: ${response.status}`)
|
||||
}
|
||||
|
||||
const data = await response.json()
|
||||
setResult(data)
|
||||
loadHistory()
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||
} finally {
|
||||
setIsLoading(false)
|
||||
}
|
||||
}
|
||||
|
||||
const ResponseCard = ({ response }: { response: LLMResponse }) => {
|
||||
const colors = providerColors[response.provider] || {
|
||||
bg: 'bg-slate-50',
|
||||
border: 'border-slate-300',
|
||||
text: 'text-slate-700',
|
||||
}
|
||||
const label = providerLabels[response.provider] || response.provider
|
||||
|
||||
return (
|
||||
<div className={`rounded-xl border-2 ${colors.border} ${colors.bg} overflow-hidden`}>
|
||||
<div className={`px-4 py-3 border-b ${colors.border} flex items-center justify-between`}>
|
||||
<div>
|
||||
<h3 className={`font-semibold ${colors.text}`}>{label}</h3>
|
||||
<p className="text-xs text-slate-500">{response.model}</p>
|
||||
</div>
|
||||
<div className="text-right text-xs text-slate-500">
|
||||
<div>{response.latency_ms}ms</div>
|
||||
{response.tokens_used && <div>{response.tokens_used} tokens</div>}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="p-4">
|
||||
{response.error ? (
|
||||
<div className="text-red-600 text-sm">
|
||||
<strong>Fehler:</strong> {response.error}
|
||||
</div>
|
||||
) : (
|
||||
<pre className="whitespace-pre-wrap text-sm text-slate-700 font-sans">
|
||||
{response.response}
|
||||
</pre>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{response.search_results && response.search_results.length > 0 && (
|
||||
<div className="px-4 pb-4">
|
||||
<details className="text-xs">
|
||||
<summary className="cursor-pointer text-slate-500 hover:text-slate-700">
|
||||
{response.search_results.length} Suchergebnisse anzeigen
|
||||
</summary>
|
||||
<ul className="mt-2 space-y-2">
|
||||
{response.search_results.map((sr, idx) => (
|
||||
<li key={idx} className="bg-white rounded p-2 border border-slate-200">
|
||||
<a
|
||||
href={sr.url}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="text-blue-600 hover:underline font-medium"
|
||||
>
|
||||
{sr.title || 'Untitled'}
|
||||
</a>
|
||||
<p className="text-slate-500 truncate">{sr.content}</p>
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</details>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
return (
|
||||
<div>
|
||||
{/* Page Purpose */}
|
||||
<PagePurpose
|
||||
title="LLM Vergleich"
|
||||
purpose="Vergleichen Sie Antworten verschiedener KI-Provider (OpenAI, Claude, Self-hosted) fuer Qualitaetssicherung. Optimieren Sie Parameter und System Prompts fuer beste Ergebnisse. Standalone-Werkzeug ohne direkten Datenfluss zur KI-Pipeline."
|
||||
audience={['Entwickler', 'Data Scientists', 'QA']}
|
||||
architecture={{
|
||||
services: ['llm-gateway (Python)', 'Ollama', 'OpenAI API', 'Claude API'],
|
||||
databases: ['PostgreSQL (History)', 'Qdrant (RAG)'],
|
||||
}}
|
||||
relatedPages={[
|
||||
{ name: 'Test Quality (BQAS)', href: '/ai/test-quality', description: 'Golden Suite & Synthetic Tests' },
|
||||
{ name: 'GPU Infrastruktur', href: '/ai/gpu', description: 'GPU-Ressourcen verwalten' },
|
||||
{ name: 'Agent Management', href: '/ai/agents', description: 'Multi-Agent System' },
|
||||
]}
|
||||
collapsible={true}
|
||||
defaultCollapsed={true}
|
||||
/>
|
||||
|
||||
{/* KI-Werkzeuge Sidebar */}
|
||||
<AIToolsSidebarResponsive currentTool="llm-compare" />
|
||||
|
||||
<div className="grid grid-cols-1 lg:grid-cols-3 gap-6">
|
||||
{/* Left Column: Input & Settings */}
|
||||
<div className="lg:col-span-1 space-y-4">
|
||||
{/* Prompt Input */}
|
||||
<div className="bg-white rounded-xl border border-slate-200 p-4">
|
||||
<h2 className="font-semibold text-slate-900 mb-3">Prompt</h2>
|
||||
|
||||
{/* System Prompt */}
|
||||
<div className="mb-3">
|
||||
<label className="block text-sm text-slate-600 mb-1">System Prompt</label>
|
||||
<textarea
|
||||
value={systemPrompt}
|
||||
onChange={(e) => setSystemPrompt(e.target.value)}
|
||||
rows={3}
|
||||
className="w-full px-3 py-2 border border-slate-300 rounded-lg text-sm resize-none"
|
||||
placeholder="System Prompt (optional)"
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* User Prompt */}
|
||||
<div className="mb-3">
|
||||
<label className="block text-sm text-slate-600 mb-1">User Prompt</label>
|
||||
<textarea
|
||||
value={prompt}
|
||||
onChange={(e) => setPrompt(e.target.value)}
|
||||
rows={4}
|
||||
className="w-full px-3 py-2 border border-slate-300 rounded-lg text-sm resize-none"
|
||||
placeholder="z.B.: Erstelle ein Arbeitsblatt zum Thema Bruchrechnung fuer Klasse 6..."
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Provider Toggles */}
|
||||
<div className="mb-4">
|
||||
<label className="block text-sm text-slate-600 mb-2">Provider</label>
|
||||
<div className="grid grid-cols-2 gap-2">
|
||||
<label className="flex items-center gap-2 text-sm">
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={enableOpenAI}
|
||||
onChange={(e) => setEnableOpenAI(e.target.checked)}
|
||||
className="rounded"
|
||||
/>
|
||||
OpenAI
|
||||
</label>
|
||||
<label className="flex items-center gap-2 text-sm">
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={enableClaude}
|
||||
onChange={(e) => setEnableClaude(e.target.checked)}
|
||||
className="rounded"
|
||||
/>
|
||||
Claude
|
||||
</label>
|
||||
<label className="flex items-center gap-2 text-sm">
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={enableTavily}
|
||||
onChange={(e) => setEnableTavily(e.target.checked)}
|
||||
className="rounded"
|
||||
/>
|
||||
Self + Tavily
|
||||
</label>
|
||||
<label className="flex items-center gap-2 text-sm">
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={enableEduSearch}
|
||||
onChange={(e) => setEnableEduSearch(e.target.checked)}
|
||||
className="rounded"
|
||||
/>
|
||||
Self + EduSearch
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Run Button */}
|
||||
<button
|
||||
onClick={runComparison}
|
||||
disabled={isLoading || !prompt.trim()}
|
||||
className="w-full py-3 bg-teal-600 text-white rounded-lg font-medium hover:bg-teal-700 disabled:opacity-50 disabled:cursor-not-allowed"
|
||||
>
|
||||
{isLoading ? (
|
||||
<span className="flex items-center justify-center gap-2">
|
||||
<svg className="animate-spin w-5 h-5" fill="none" viewBox="0 0 24 24">
|
||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" />
|
||||
</svg>
|
||||
Vergleiche...
|
||||
</span>
|
||||
) : (
|
||||
'Vergleich starten'
|
||||
)}
|
||||
</button>
|
||||
|
||||
{error && (
|
||||
<div className="mt-3 p-3 bg-red-50 border border-red-200 rounded-lg text-red-700 text-sm">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Settings Panel */}
|
||||
<div className="bg-white rounded-xl border border-slate-200 overflow-hidden">
|
||||
<button
|
||||
onClick={() => setShowSettings(!showSettings)}
|
||||
className="w-full px-4 py-3 flex items-center justify-between hover:bg-slate-50"
|
||||
>
|
||||
<span className="font-semibold text-slate-900">Parameter</span>
|
||||
<svg
|
||||
className={`w-5 h-5 transition-transform ${showSettings ? 'rotate-180' : ''}`}
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
viewBox="0 0 24 24"
|
||||
>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 9l-7 7-7-7" />
|
||||
</svg>
|
||||
</button>
|
||||
|
||||
{showSettings && (
|
||||
<div className="p-4 border-t border-slate-200 space-y-4">
|
||||
<div>
|
||||
<label className="block text-sm text-slate-600 mb-1">Self-hosted Modell</label>
|
||||
<select
|
||||
value={model}
|
||||
onChange={(e) => setModel(e.target.value)}
|
||||
className="w-full px-3 py-2 border border-slate-300 rounded-lg text-sm"
|
||||
>
|
||||
<option value="llama3.2:3b">Llama 3.2 3B</option>
|
||||
<option value="llama3.1:8b">Llama 3.1 8B</option>
|
||||
<option value="mistral:7b">Mistral 7B</option>
|
||||
<option value="qwen2.5:7b">Qwen 2.5 7B</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<label className="block text-sm text-slate-600 mb-1">
|
||||
Temperature: {temperature.toFixed(2)}
|
||||
</label>
|
||||
<input
|
||||
type="range"
|
||||
min="0"
|
||||
max="2"
|
||||
step="0.1"
|
||||
value={temperature}
|
||||
onChange={(e) => setTemperature(parseFloat(e.target.value))}
|
||||
className="w-full"
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<label className="block text-sm text-slate-600 mb-1">Max Tokens: {maxTokens}</label>
|
||||
<input
|
||||
type="range"
|
||||
min="256"
|
||||
max="4096"
|
||||
step="256"
|
||||
value={maxTokens}
|
||||
onChange={(e) => setMaxTokens(parseInt(e.target.value))}
|
||||
className="w-full"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* History Panel */}
|
||||
<div className="bg-white rounded-xl border border-slate-200 overflow-hidden">
|
||||
<button
|
||||
onClick={() => setShowHistory(!showHistory)}
|
||||
className="w-full px-4 py-3 flex items-center justify-between hover:bg-slate-50"
|
||||
>
|
||||
<span className="font-semibold text-slate-900">Verlauf ({history.length})</span>
|
||||
<svg
|
||||
className={`w-5 h-5 transition-transform ${showHistory ? 'rotate-180' : ''}`}
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
viewBox="0 0 24 24"
|
||||
>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 9l-7 7-7-7" />
|
||||
</svg>
|
||||
</button>
|
||||
|
||||
{showHistory && history.length > 0 && (
|
||||
<div className="border-t border-slate-200 max-h-64 overflow-y-auto">
|
||||
{history.map((h) => (
|
||||
<button
|
||||
key={h.comparison_id}
|
||||
onClick={() => {
|
||||
setResult(h)
|
||||
setPrompt(h.prompt)
|
||||
if (h.system_prompt) setSystemPrompt(h.system_prompt)
|
||||
}}
|
||||
className="w-full px-4 py-2 text-left hover:bg-slate-50 border-b border-slate-100 last:border-0"
|
||||
>
|
||||
<div className="text-sm text-slate-700 truncate">{h.prompt}</div>
|
||||
<div className="text-xs text-slate-400">
|
||||
{new Date(h.created_at).toLocaleString('de-DE')}
|
||||
</div>
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Right Column: Results */}
|
||||
<div className="lg:col-span-2">
|
||||
{result ? (
|
||||
<div className="space-y-4">
|
||||
<div className="bg-white rounded-xl border border-slate-200 p-4">
|
||||
<div className="flex items-center justify-between">
|
||||
<div>
|
||||
<h2 className="font-semibold text-slate-900">Ergebnisse</h2>
|
||||
<p className="text-sm text-slate-500">ID: {result.comparison_id}</p>
|
||||
</div>
|
||||
<div className="text-sm text-slate-500">
|
||||
{new Date(result.created_at).toLocaleString('de-DE')}
|
||||
</div>
|
||||
</div>
|
||||
<div className="mt-2 p-3 bg-slate-50 rounded-lg">
|
||||
<p className="text-sm text-slate-700">{result.prompt}</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="grid grid-cols-1 xl:grid-cols-2 gap-4">
|
||||
{result.responses.map((response, idx) => (
|
||||
<ResponseCard key={`${response.provider}-${idx}`} response={response} />
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<div className="bg-white rounded-xl border border-slate-200 p-12 text-center">
|
||||
<svg
|
||||
className="w-16 h-16 mx-auto text-slate-300 mb-4"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
viewBox="0 0 24 24"
|
||||
>
|
||||
<path
|
||||
strokeLinecap="round"
|
||||
strokeLinejoin="round"
|
||||
strokeWidth={1.5}
|
||||
d="M9 3v2m6-2v2M9 19v2m6-2v2M5 9H3m2 6H3m18-6h-2m2 6h-2M7 19h10a2 2 0 002-2V7a2 2 0 00-2-2H7a2 2 0 00-2 2v10a2 2 0 002 2zM9 9h6v6H9V9z"
|
||||
/>
|
||||
</svg>
|
||||
<h3 className="text-lg font-medium text-slate-700 mb-2">LLM-Vergleich starten</h3>
|
||||
<p className="text-slate-500 max-w-md mx-auto">
|
||||
Geben Sie einen Prompt ein und klicken Sie auf "Vergleich starten", um
|
||||
die Antworten verschiedener LLM-Provider zu vergleichen.
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Info Box */}
|
||||
<div className="mt-8 bg-teal-50 border border-teal-200 rounded-xl p-6">
|
||||
<div className="flex items-start gap-4">
|
||||
<svg className="w-6 h-6 text-teal-600 flex-shrink-0 mt-0.5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
|
||||
</svg>
|
||||
<div>
|
||||
<h3 className="font-semibold text-teal-900">Qualitaetssicherung</h3>
|
||||
<p className="text-sm text-teal-800 mt-1">
|
||||
Dieses Tool dient zur Qualitaetssicherung der KI-Antworten. Vergleichen Sie verschiedene Provider,
|
||||
um die optimalen Parameter und System Prompts zu finden. Die Ergebnisse werden fuer Audits gespeichert.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -685,7 +685,6 @@ export default function OCRComparePage() {
|
||||
databases: ['PostgreSQL (Sessions)'],
|
||||
}}
|
||||
relatedPages={[
|
||||
{ name: 'LLM Vergleich', href: '/ai/llm-compare', description: 'KI-Provider vergleichen' },
|
||||
{ name: 'OCR-Labeling', href: '/ai/ocr-labeling', description: 'Ground Truth erstellen' },
|
||||
]}
|
||||
collapsible={true}
|
||||
|
||||
635
admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx
Normal file
635
admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx
Normal file
@@ -0,0 +1,635 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useState } from 'react'
|
||||
import { PagePurpose } from '@/components/common/PagePurpose'
|
||||
import { PipelineStepper } from '@/components/ocr-pipeline/PipelineStepper'
|
||||
import { StepOrientation } from '@/components/ocr-pipeline/StepOrientation'
|
||||
import { StepDeskew } from '@/components/ocr-pipeline/StepDeskew'
|
||||
import { StepDewarp } from '@/components/ocr-pipeline/StepDewarp'
|
||||
import { StepCrop } from '@/components/ocr-pipeline/StepCrop'
|
||||
import { StepStructureDetection } from '@/components/ocr-pipeline/StepStructureDetection'
|
||||
import { StepRowDetection } from '@/components/ocr-pipeline/StepRowDetection'
|
||||
import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition'
|
||||
import { OverlayReconstruction } from '@/components/ocr-overlay/OverlayReconstruction'
|
||||
import { PaddleDirectStep } from '@/components/ocr-overlay/PaddleDirectStep'
|
||||
import { GridEditor } from '@/components/grid-editor/GridEditor'
|
||||
import { OVERLAY_PIPELINE_STEPS, PADDLE_DIRECT_STEPS, KOMBI_STEPS, DOCUMENT_CATEGORIES, dbStepToOverlayUi, type PipelineStep, type SessionListItem, type DocumentCategory } from './types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
export default function OcrOverlayPage() {
|
||||
const [mode, setMode] = useState<'pipeline' | 'paddle-direct' | 'kombi'>('pipeline')
|
||||
const [currentStep, setCurrentStep] = useState(0)
|
||||
const [sessionId, setSessionId] = useState<string | null>(null)
|
||||
const [sessionName, setSessionName] = useState<string>('')
|
||||
const [sessions, setSessions] = useState<SessionListItem[]>([])
|
||||
const [loadingSessions, setLoadingSessions] = useState(true)
|
||||
const [editingName, setEditingName] = useState<string | null>(null)
|
||||
const [editNameValue, setEditNameValue] = useState('')
|
||||
const [editingCategory, setEditingCategory] = useState<string | null>(null)
|
||||
const [activeCategory, setActiveCategory] = useState<DocumentCategory | undefined>(undefined)
|
||||
const [editingActiveCategory, setEditingActiveCategory] = useState(false)
|
||||
const [isGroundTruth, setIsGroundTruth] = useState(false)
|
||||
const [gtSaving, setGtSaving] = useState(false)
|
||||
const [gtMessage, setGtMessage] = useState('')
|
||||
const [steps, setSteps] = useState<PipelineStep[]>(
|
||||
OVERLAY_PIPELINE_STEPS.map((s, i) => ({
|
||||
...s,
|
||||
status: i === 0 ? 'active' : 'pending',
|
||||
})),
|
||||
)
|
||||
|
||||
useEffect(() => {
|
||||
loadSessions()
|
||||
}, [])
|
||||
|
||||
const loadSessions = async () => {
|
||||
setLoadingSessions(true)
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`)
|
||||
if (res.ok) {
|
||||
const data = await res.json()
|
||||
// Filter to only show top-level sessions (no sub-sessions)
|
||||
setSessions((data.sessions || []).filter((s: SessionListItem) => !s.parent_session_id))
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to load sessions:', e)
|
||||
} finally {
|
||||
setLoadingSessions(false)
|
||||
}
|
||||
}
|
||||
|
||||
const openSession = useCallback(async (sid: string) => {
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`)
|
||||
if (!res.ok) return
|
||||
const data = await res.json()
|
||||
|
||||
setSessionId(sid)
|
||||
setSessionName(data.name || data.filename || '')
|
||||
setActiveCategory(data.document_category || undefined)
|
||||
setIsGroundTruth(!!data.ground_truth?.build_grid_reference)
|
||||
setGtMessage('')
|
||||
|
||||
// Check if this session was processed with paddle_direct, kombi, or rapid_kombi
|
||||
const ocrEngine = data.word_result?.ocr_engine
|
||||
const isPaddleDirect = ocrEngine === 'paddle_direct'
|
||||
const isKombi = ocrEngine === 'kombi' || ocrEngine === 'rapid_kombi'
|
||||
|
||||
if (isPaddleDirect || isKombi) {
|
||||
const m = isKombi ? 'kombi' : 'paddle-direct'
|
||||
const baseSteps = isKombi ? KOMBI_STEPS : PADDLE_DIRECT_STEPS
|
||||
setMode(m)
|
||||
|
||||
// For Kombi: if grid_editor_result exists, jump to grid editor step (6)
|
||||
// If structure_result exists, jump to grid editor (6)
|
||||
// If word_result exists, jump to structure step (5)
|
||||
const hasGrid = isKombi && data.grid_editor_result
|
||||
const hasStructure = isKombi && data.structure_result
|
||||
const hasWords = isKombi && data.word_result
|
||||
const activeStep = hasGrid ? 6 : hasStructure ? 6 : hasWords ? 5 : 4
|
||||
setSteps(
|
||||
baseSteps.map((s, i) => ({
|
||||
...s,
|
||||
status: i < activeStep ? 'completed' : i === activeStep ? 'active' : 'pending',
|
||||
})),
|
||||
)
|
||||
setCurrentStep(activeStep)
|
||||
} else {
|
||||
setMode('pipeline')
|
||||
// Map DB step to overlay UI step
|
||||
const dbStep = data.current_step || 1
|
||||
const uiStep = dbStepToOverlayUi(dbStep)
|
||||
|
||||
setSteps(
|
||||
OVERLAY_PIPELINE_STEPS.map((s, i) => ({
|
||||
...s,
|
||||
status: i < uiStep ? 'completed' : i === uiStep ? 'active' : 'pending',
|
||||
})),
|
||||
)
|
||||
setCurrentStep(uiStep)
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to open session:', e)
|
||||
}
|
||||
}, [])
|
||||
|
||||
const deleteSession = useCallback(async (sid: string) => {
|
||||
try {
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, { method: 'DELETE' })
|
||||
setSessions((prev) => prev.filter((s) => s.id !== sid))
|
||||
if (sessionId === sid) {
|
||||
setSessionId(null)
|
||||
setCurrentStep(0)
|
||||
const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
|
||||
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to delete session:', e)
|
||||
}
|
||||
}, [sessionId, mode])
|
||||
|
||||
const renameSession = useCallback(async (sid: string, newName: string) => {
|
||||
try {
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, {
|
||||
method: 'PUT',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ name: newName }),
|
||||
})
|
||||
setSessions((prev) => prev.map((s) => (s.id === sid ? { ...s, name: newName } : s)))
|
||||
if (sessionId === sid) setSessionName(newName)
|
||||
} catch (e) {
|
||||
console.error('Failed to rename session:', e)
|
||||
}
|
||||
setEditingName(null)
|
||||
}, [sessionId])
|
||||
|
||||
const updateCategory = useCallback(async (sid: string, category: DocumentCategory) => {
|
||||
try {
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, {
|
||||
method: 'PUT',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ document_category: category }),
|
||||
})
|
||||
setSessions((prev) => prev.map((s) => (s.id === sid ? { ...s, document_category: category } : s)))
|
||||
if (sessionId === sid) setActiveCategory(category)
|
||||
} catch (e) {
|
||||
console.error('Failed to update category:', e)
|
||||
}
|
||||
setEditingCategory(null)
|
||||
}, [sessionId])
|
||||
|
||||
const handleStepClick = (index: number) => {
|
||||
if (index <= currentStep || steps[index].status === 'completed') {
|
||||
setCurrentStep(index)
|
||||
}
|
||||
}
|
||||
|
||||
const goToStep = (step: number) => {
|
||||
setCurrentStep(step)
|
||||
setSteps((prev) =>
|
||||
prev.map((s, i) => ({
|
||||
...s,
|
||||
status: i < step ? 'completed' : i === step ? 'active' : 'pending',
|
||||
})),
|
||||
)
|
||||
}
|
||||
|
||||
const handleNext = () => {
|
||||
if (currentStep >= steps.length - 1) {
|
||||
// Last step completed — return to session list
|
||||
const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
|
||||
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||
setCurrentStep(0)
|
||||
setSessionId(null)
|
||||
loadSessions()
|
||||
return
|
||||
}
|
||||
|
||||
const nextStep = currentStep + 1
|
||||
setSteps((prev) =>
|
||||
prev.map((s, i) => {
|
||||
if (i === currentStep) return { ...s, status: 'completed' }
|
||||
if (i === nextStep) return { ...s, status: 'active' }
|
||||
return s
|
||||
}),
|
||||
)
|
||||
setCurrentStep(nextStep)
|
||||
}
|
||||
|
||||
const handleOrientationComplete = (sid: string) => {
|
||||
setSessionId(sid)
|
||||
loadSessions()
|
||||
handleNext()
|
||||
}
|
||||
|
||||
const handleNewSession = () => {
|
||||
setSessionId(null)
|
||||
setSessionName('')
|
||||
setCurrentStep(0)
|
||||
const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
|
||||
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||
}
|
||||
|
||||
const stepNames: Record<number, string> = {
|
||||
1: 'Orientierung',
|
||||
2: 'Begradigung',
|
||||
3: 'Entzerrung',
|
||||
4: 'Zuschneiden',
|
||||
5: 'Zeilen',
|
||||
6: 'Woerter',
|
||||
7: 'Overlay',
|
||||
}
|
||||
|
||||
const reprocessFromStep = useCallback(async (uiStep: number) => {
|
||||
if (!sessionId) return
|
||||
// Map overlay UI step to DB step
|
||||
const dbStepMap: Record<number, number> = { 0: 2, 1: 3, 2: 4, 3: 5, 4: 7, 5: 8, 6: 9 }
|
||||
const dbStep = dbStepMap[uiStep] || uiStep + 1
|
||||
if (!confirm(`Ab Schritt ${uiStep + 1} (${stepNames[uiStep + 1] || '?'}) neu verarbeiten?`)) return
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reprocess`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ from_step: dbStep }),
|
||||
})
|
||||
if (!res.ok) {
|
||||
const data = await res.json().catch(() => ({}))
|
||||
console.error('Reprocess failed:', data.detail || res.status)
|
||||
return
|
||||
}
|
||||
goToStep(uiStep)
|
||||
} catch (e) {
|
||||
console.error('Reprocess error:', e)
|
||||
}
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [sessionId, goToStep])
|
||||
|
||||
const handleMarkGroundTruth = async () => {
|
||||
if (!sessionId) return
|
||||
setGtSaving(true)
|
||||
setGtMessage('')
|
||||
try {
|
||||
const resp = await fetch(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/mark-ground-truth?pipeline=${mode}`,
|
||||
{ method: 'POST' }
|
||||
)
|
||||
if (!resp.ok) {
|
||||
const body = await resp.text().catch(() => '')
|
||||
throw new Error(`Ground Truth fehlgeschlagen (${resp.status}): ${body}`)
|
||||
}
|
||||
const data = await resp.json()
|
||||
setIsGroundTruth(true)
|
||||
setGtMessage(`Ground Truth gespeichert (${data.cells_saved} Zellen)`)
|
||||
setTimeout(() => setGtMessage(''), 5000)
|
||||
} catch (e) {
|
||||
setGtMessage(e instanceof Error ? e.message : String(e))
|
||||
} finally {
|
||||
setGtSaving(false)
|
||||
}
|
||||
}
|
||||
|
||||
const isLastStep = currentStep === steps.length - 1
|
||||
const showGtButton = isLastStep && sessionId != null
|
||||
|
||||
const renderStep = () => {
|
||||
if (mode === 'paddle-direct' || mode === 'kombi') {
|
||||
switch (currentStep) {
|
||||
case 0:
|
||||
return <StepOrientation sessionId={sessionId} onNext={handleOrientationComplete} />
|
||||
case 1:
|
||||
return <StepDeskew sessionId={sessionId} onNext={handleNext} />
|
||||
case 2:
|
||||
return <StepDewarp sessionId={sessionId} onNext={handleNext} />
|
||||
case 3:
|
||||
return <StepCrop sessionId={sessionId} onNext={handleNext} />
|
||||
case 4:
|
||||
if (mode === 'kombi') {
|
||||
return (
|
||||
<PaddleDirectStep
|
||||
sessionId={sessionId}
|
||||
onNext={handleNext}
|
||||
endpoint="paddle-kombi"
|
||||
title="Kombi-Modus"
|
||||
description="PP-OCRv5 und Tesseract laufen parallel. Koordinaten werden gewichtet gemittelt fuer optimale Positionierung."
|
||||
icon="🔀"
|
||||
buttonLabel="PP-OCRv5 + Tesseract starten"
|
||||
runningLabel="PP-OCRv5 + Tesseract laufen..."
|
||||
engineKey="kombi"
|
||||
/>
|
||||
)
|
||||
}
|
||||
return <PaddleDirectStep sessionId={sessionId} onNext={handleNext} />
|
||||
case 5:
|
||||
return mode === 'kombi' ? (
|
||||
<StepStructureDetection sessionId={sessionId} onNext={handleNext} />
|
||||
) : null
|
||||
case 6:
|
||||
return mode === 'kombi' ? (
|
||||
<GridEditor sessionId={sessionId} onNext={handleNext} />
|
||||
) : null
|
||||
default:
|
||||
return null
|
||||
}
|
||||
}
|
||||
switch (currentStep) {
|
||||
case 0:
|
||||
return <StepOrientation sessionId={sessionId} onNext={handleOrientationComplete} />
|
||||
case 1:
|
||||
return <StepDeskew sessionId={sessionId} onNext={handleNext} />
|
||||
case 2:
|
||||
return <StepDewarp sessionId={sessionId} onNext={handleNext} />
|
||||
case 3:
|
||||
return <StepCrop sessionId={sessionId} onNext={handleNext} />
|
||||
case 4:
|
||||
return <StepRowDetection sessionId={sessionId} onNext={handleNext} />
|
||||
case 5:
|
||||
return <StepWordRecognition sessionId={sessionId} onNext={handleNext} goToStep={goToStep} skipHealGaps />
|
||||
case 6:
|
||||
return <OverlayReconstruction sessionId={sessionId} onNext={handleNext} />
|
||||
default:
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
<PagePurpose
|
||||
title="OCR Overlay"
|
||||
purpose="Ganzseitige Overlay-Rekonstruktion: Scan begradigen, Zeilen und Woerter erkennen, dann pixelgenau ueber das Bild legen. Ohne Spaltenerkennung — ideal fuer Arbeitsblaetter."
|
||||
audience={['Entwickler']}
|
||||
architecture={{
|
||||
services: ['klausur-service (FastAPI)', 'OpenCV', 'Tesseract'],
|
||||
databases: ['PostgreSQL Sessions'],
|
||||
}}
|
||||
relatedPages={[
|
||||
{ name: 'OCR Pipeline', href: '/ai/ocr-pipeline', description: 'Volle Pipeline mit Spalten' },
|
||||
{ name: 'OCR Vergleich', href: '/ai/ocr-compare', description: 'Methoden-Vergleich' },
|
||||
]}
|
||||
defaultCollapsed
|
||||
/>
|
||||
|
||||
{/* Session List */}
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4">
|
||||
<div className="flex items-center justify-between mb-3">
|
||||
<h3 className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||
Sessions ({sessions.length})
|
||||
</h3>
|
||||
<button
|
||||
onClick={handleNewSession}
|
||||
className="text-xs px-3 py-1.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors"
|
||||
>
|
||||
+ Neue Session
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{loadingSessions ? (
|
||||
<div className="text-sm text-gray-400 py-2">Lade Sessions...</div>
|
||||
) : sessions.length === 0 ? (
|
||||
<div className="text-sm text-gray-400 py-2">Noch keine Sessions vorhanden.</div>
|
||||
) : (
|
||||
<div className="space-y-1.5 max-h-[320px] overflow-y-auto">
|
||||
{sessions.map((s) => {
|
||||
const catInfo = DOCUMENT_CATEGORIES.find(c => c.value === s.document_category)
|
||||
return (
|
||||
<div
|
||||
key={s.id}
|
||||
className={`relative flex items-start gap-3 px-3 py-2.5 rounded-lg text-sm transition-colors cursor-pointer ${
|
||||
sessionId === s.id
|
||||
? 'bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700'
|
||||
: 'hover:bg-gray-50 dark:hover:bg-gray-700/50'
|
||||
}`}
|
||||
>
|
||||
{/* Thumbnail */}
|
||||
<div
|
||||
className="flex-shrink-0 w-12 h-12 rounded-md overflow-hidden bg-gray-100 dark:bg-gray-700"
|
||||
onClick={() => openSession(s.id)}
|
||||
>
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
<img
|
||||
src={`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${s.id}/thumbnail?size=96`}
|
||||
alt=""
|
||||
className="w-full h-full object-cover"
|
||||
loading="lazy"
|
||||
onError={(e) => { (e.target as HTMLImageElement).style.display = 'none' }}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Info */}
|
||||
<div className="flex-1 min-w-0" onClick={() => openSession(s.id)}>
|
||||
{editingName === s.id ? (
|
||||
<input
|
||||
autoFocus
|
||||
value={editNameValue}
|
||||
onChange={(e) => setEditNameValue(e.target.value)}
|
||||
onBlur={() => renameSession(s.id, editNameValue)}
|
||||
onKeyDown={(e) => {
|
||||
if (e.key === 'Enter') renameSession(s.id, editNameValue)
|
||||
if (e.key === 'Escape') setEditingName(null)
|
||||
}}
|
||||
onClick={(e) => e.stopPropagation()}
|
||||
className="w-full px-1 py-0.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600"
|
||||
/>
|
||||
) : (
|
||||
<div className="truncate font-medium text-gray-700 dark:text-gray-300">
|
||||
{s.name || s.filename}
|
||||
</div>
|
||||
)}
|
||||
<button
|
||||
onClick={(e) => {
|
||||
e.stopPropagation()
|
||||
navigator.clipboard.writeText(s.id)
|
||||
const btn = e.currentTarget
|
||||
btn.textContent = 'Kopiert!'
|
||||
setTimeout(() => { btn.textContent = `ID: ${s.id.slice(0, 8)}` }, 1500)
|
||||
}}
|
||||
className="text-[10px] font-mono text-gray-400 hover:text-teal-500 transition-colors"
|
||||
title={`Volle ID: ${s.id} — Klick zum Kopieren`}
|
||||
>
|
||||
ID: {s.id.slice(0, 8)}
|
||||
</button>
|
||||
<div className="text-xs text-gray-400 flex gap-2 mt-0.5">
|
||||
<span>{new Date(s.created_at).toLocaleDateString('de-DE', { day: '2-digit', month: '2-digit', year: '2-digit', hour: '2-digit', minute: '2-digit' })}</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Category Badge */}
|
||||
<div className="flex flex-col gap-1 items-end flex-shrink-0" onClick={(e) => e.stopPropagation()}>
|
||||
<button
|
||||
onClick={() => setEditingCategory(editingCategory === s.id ? null : s.id)}
|
||||
className={`text-[10px] px-1.5 py-0.5 rounded-full border transition-colors ${
|
||||
catInfo
|
||||
? 'bg-teal-50 dark:bg-teal-900/30 border-teal-200 dark:border-teal-700 text-teal-700 dark:text-teal-300'
|
||||
: 'bg-gray-50 dark:bg-gray-700 border-gray-200 dark:border-gray-600 text-gray-400 hover:text-gray-600 dark:hover:text-gray-300'
|
||||
}`}
|
||||
title="Kategorie setzen"
|
||||
>
|
||||
{catInfo ? `${catInfo.icon} ${catInfo.label}` : '+ Kategorie'}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Actions */}
|
||||
<div className="flex flex-col gap-0.5 flex-shrink-0">
|
||||
<button
|
||||
onClick={(e) => {
|
||||
e.stopPropagation()
|
||||
setEditNameValue(s.name || s.filename)
|
||||
setEditingName(s.id)
|
||||
}}
|
||||
className="p-1 text-gray-400 hover:text-gray-600 dark:hover:text-gray-300"
|
||||
title="Umbenennen"
|
||||
>
|
||||
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" d="M15.232 5.232l3.536 3.536m-2.036-5.036a2.5 2.5 0 113.536 3.536L6.5 21.036H3v-3.572L16.732 3.732z" />
|
||||
</svg>
|
||||
</button>
|
||||
<button
|
||||
onClick={(e) => {
|
||||
e.stopPropagation()
|
||||
if (confirm('Session loeschen?')) deleteSession(s.id)
|
||||
}}
|
||||
className="p-1 text-gray-400 hover:text-red-500"
|
||||
title="Loeschen"
|
||||
>
|
||||
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16" />
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Category dropdown */}
|
||||
{editingCategory === s.id && (
|
||||
<div
|
||||
className="absolute right-0 top-full mt-1 z-20 bg-white dark:bg-gray-800 border border-gray-200 dark:border-gray-700 rounded-lg shadow-lg p-2 grid grid-cols-2 gap-1 w-64"
|
||||
onClick={(e) => e.stopPropagation()}
|
||||
>
|
||||
{DOCUMENT_CATEGORIES.map((cat) => (
|
||||
<button
|
||||
key={cat.value}
|
||||
onClick={() => updateCategory(s.id, cat.value)}
|
||||
className={`text-xs px-2 py-1.5 rounded-md text-left transition-colors ${
|
||||
s.document_category === cat.value
|
||||
? 'bg-teal-100 dark:bg-teal-900/40 text-teal-700 dark:text-teal-300'
|
||||
: 'hover:bg-gray-100 dark:hover:bg-gray-700 text-gray-600 dark:text-gray-400'
|
||||
}`}
|
||||
>
|
||||
{cat.icon} {cat.label}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Active session info + category picker */}
|
||||
{sessionId && sessionName && (
|
||||
<div className="relative flex items-center gap-3 text-sm text-gray-500 dark:text-gray-400">
|
||||
<span>Aktive Session: <span className="font-medium text-gray-700 dark:text-gray-300">{sessionName}</span></span>
|
||||
<button
|
||||
onClick={() => setEditingActiveCategory(!editingActiveCategory)}
|
||||
className={`text-xs px-2.5 py-1 rounded-full border transition-colors ${
|
||||
activeCategory
|
||||
? 'bg-teal-50 dark:bg-teal-900/30 border-teal-200 dark:border-teal-700 text-teal-700 dark:text-teal-300 hover:bg-teal-100 dark:hover:bg-teal-900/50'
|
||||
: 'bg-amber-50 dark:bg-amber-900/20 border-amber-300 dark:border-amber-700 text-amber-700 dark:text-amber-300 hover:bg-amber-100 dark:hover:bg-amber-900/40 animate-pulse'
|
||||
}`}
|
||||
>
|
||||
{activeCategory ? (() => {
|
||||
const cat = DOCUMENT_CATEGORIES.find(c => c.value === activeCategory)
|
||||
return cat ? `${cat.icon} ${cat.label}` : activeCategory
|
||||
})() : 'Kategorie setzen'}
|
||||
</button>
|
||||
{isGroundTruth && (
|
||||
<span className="text-xs px-2 py-0.5 rounded-full bg-amber-50 dark:bg-amber-900/20 border border-amber-300 dark:border-amber-700 text-amber-700 dark:text-amber-300">
|
||||
GT
|
||||
</span>
|
||||
)}
|
||||
{editingActiveCategory && (
|
||||
<div className="absolute left-0 top-full mt-1 z-20 bg-white dark:bg-gray-800 border border-gray-200 dark:border-gray-700 rounded-lg shadow-lg p-2 grid grid-cols-2 gap-1 w-64">
|
||||
{DOCUMENT_CATEGORIES.map((cat) => (
|
||||
<button
|
||||
key={cat.value}
|
||||
onClick={() => {
|
||||
updateCategory(sessionId, cat.value)
|
||||
setEditingActiveCategory(false)
|
||||
}}
|
||||
className={`text-xs px-2 py-1.5 rounded-md text-left transition-colors ${
|
||||
activeCategory === cat.value
|
||||
? 'bg-teal-100 dark:bg-teal-900/40 text-teal-700 dark:text-teal-300'
|
||||
: 'hover:bg-gray-100 dark:hover:bg-gray-700 text-gray-600 dark:text-gray-400'
|
||||
}`}
|
||||
>
|
||||
{cat.icon} {cat.label}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Mode Toggle */}
|
||||
<div className="flex items-center gap-1 bg-gray-100 dark:bg-gray-800 rounded-lg p-1 w-fit">
|
||||
<button
|
||||
onClick={() => {
|
||||
if (mode === 'pipeline') return
|
||||
setMode('pipeline')
|
||||
setCurrentStep(0)
|
||||
setSessionId(null)
|
||||
setSteps(OVERLAY_PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||
}}
|
||||
className={`px-3 py-1.5 text-xs font-medium rounded-md transition-colors ${
|
||||
mode === 'pipeline'
|
||||
? 'bg-white dark:bg-gray-700 text-gray-700 dark:text-gray-200 shadow-sm'
|
||||
: 'text-gray-500 dark:text-gray-400 hover:text-gray-700 dark:hover:text-gray-300'
|
||||
}`}
|
||||
>
|
||||
Pipeline (7 Schritte)
|
||||
</button>
|
||||
<button
|
||||
onClick={() => {
|
||||
if (mode === 'paddle-direct') return
|
||||
setMode('paddle-direct')
|
||||
setCurrentStep(0)
|
||||
setSessionId(null)
|
||||
setSteps(PADDLE_DIRECT_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||
}}
|
||||
className={`px-3 py-1.5 text-xs font-medium rounded-md transition-colors ${
|
||||
mode === 'paddle-direct'
|
||||
? 'bg-white dark:bg-gray-700 text-gray-700 dark:text-gray-200 shadow-sm'
|
||||
: 'text-gray-500 dark:text-gray-400 hover:text-gray-700 dark:hover:text-gray-300'
|
||||
}`}
|
||||
>
|
||||
PP-OCRv5 Direct (5 Schritte)
|
||||
</button>
|
||||
<button
|
||||
onClick={() => {
|
||||
if (mode === 'kombi') return
|
||||
setMode('kombi')
|
||||
setCurrentStep(0)
|
||||
setSessionId(null)
|
||||
setSteps(KOMBI_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||
}}
|
||||
className={`px-3 py-1.5 text-xs font-medium rounded-md transition-colors ${
|
||||
mode === 'kombi'
|
||||
? 'bg-white dark:bg-gray-700 text-gray-700 dark:text-gray-200 shadow-sm'
|
||||
: 'text-gray-500 dark:text-gray-400 hover:text-gray-700 dark:hover:text-gray-300'
|
||||
}`}
|
||||
>
|
||||
Kombi (7 Schritte)
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<PipelineStepper
|
||||
steps={steps}
|
||||
currentStep={currentStep}
|
||||
onStepClick={handleStepClick}
|
||||
onReprocess={mode === 'pipeline' && sessionId != null ? reprocessFromStep : undefined}
|
||||
/>
|
||||
|
||||
<div className="min-h-[400px]">{renderStep()}</div>
|
||||
|
||||
{/* Ground Truth button bar — visible on last step */}
|
||||
{showGtButton && (
|
||||
<div className="sticky bottom-0 bg-white dark:bg-gray-900 border-t dark:border-gray-700 py-3 px-4 -mx-1 flex items-center justify-between rounded-b-xl">
|
||||
<div className="text-sm text-gray-500 dark:text-gray-400">
|
||||
{gtMessage && (
|
||||
<span className={gtMessage.includes('fehlgeschlagen') ? 'text-red-500' : 'text-amber-600 dark:text-amber-400'}>
|
||||
{gtMessage}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
<button
|
||||
onClick={handleMarkGroundTruth}
|
||||
disabled={gtSaving}
|
||||
className="px-4 py-2 text-sm bg-amber-600 text-white rounded hover:bg-amber-700 disabled:opacity-50"
|
||||
>
|
||||
{gtSaving ? 'Speichere...' : isGroundTruth ? 'Ground Truth aktualisieren' : 'Als Ground Truth markieren'}
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
87
admin-lehrer/app/(admin)/ai/ocr-overlay/types.ts
Normal file
87
admin-lehrer/app/(admin)/ai/ocr-overlay/types.ts
Normal file
@@ -0,0 +1,87 @@
|
||||
import type { PipelineStep } from '../ocr-pipeline/types'
|
||||
|
||||
// Re-export types used by overlay components
|
||||
export type {
|
||||
PipelineStep,
|
||||
PipelineStepStatus,
|
||||
SessionListItem,
|
||||
SessionInfo,
|
||||
DocumentCategory,
|
||||
DocumentTypeResult,
|
||||
OrientationResult,
|
||||
CropResult,
|
||||
DeskewResult,
|
||||
DewarpResult,
|
||||
RowResult,
|
||||
RowItem,
|
||||
GridResult,
|
||||
GridCell,
|
||||
OcrWordBox,
|
||||
WordBbox,
|
||||
ColumnMeta,
|
||||
} from '../ocr-pipeline/types'
|
||||
|
||||
export { DOCUMENT_CATEGORIES } from '../ocr-pipeline/types'
|
||||
|
||||
/**
|
||||
* 7-step pipeline for full-page overlay reconstruction.
|
||||
* Skips: Spalten (columns), LLM-Review (Korrektur), Ground-Truth (Validierung)
|
||||
*/
|
||||
export const OVERLAY_PIPELINE_STEPS: PipelineStep[] = [
|
||||
{ id: 'orientation', name: 'Orientierung', icon: '🔄', status: 'pending' },
|
||||
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
|
||||
{ id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
|
||||
{ id: 'crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' },
|
||||
{ id: 'rows', name: 'Zeilen', icon: '📏', status: 'pending' },
|
||||
{ id: 'words', name: 'Woerter', icon: '🔤', status: 'pending' },
|
||||
{ id: 'reconstruction', name: 'Overlay', icon: '🏗️', status: 'pending' },
|
||||
]
|
||||
|
||||
/** Map from overlay UI step index to DB step number (1-indexed) */
|
||||
export const OVERLAY_UI_TO_DB: Record<number, number> = {
|
||||
0: 2, // orientation
|
||||
1: 3, // deskew
|
||||
2: 4, // dewarp
|
||||
3: 5, // crop
|
||||
4: 6, // rows (skip columns=6 in DB, rows=7 — but we reuse DB step numbering)
|
||||
5: 7, // words
|
||||
6: 9, // reconstruction
|
||||
}
|
||||
|
||||
/**
|
||||
* 5-step pipeline for Paddle Direct mode.
|
||||
* Same preprocessing (orient/deskew/dewarp/crop), then PaddleOCR replaces rows+words+overlay.
|
||||
*/
|
||||
export const PADDLE_DIRECT_STEPS: PipelineStep[] = [
|
||||
{ id: 'orientation', name: 'Orientierung', icon: '🔄', status: 'pending' },
|
||||
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
|
||||
{ id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
|
||||
{ id: 'crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' },
|
||||
{ id: 'paddle-direct', name: 'PP-OCRv5 + Overlay', icon: '⚡', status: 'pending' },
|
||||
]
|
||||
|
||||
/**
|
||||
* 5-step pipeline for Kombi mode (PP-OCRv5 + Tesseract).
|
||||
* Same preprocessing, then both engines run and results are merged.
|
||||
*/
|
||||
export const KOMBI_STEPS: PipelineStep[] = [
|
||||
{ id: 'orientation', name: 'Orientierung', icon: '🔄', status: 'pending' },
|
||||
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
|
||||
{ id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
|
||||
{ id: 'crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' },
|
||||
{ id: 'kombi', name: 'PP-OCRv5 + Tesseract', icon: '🔀', status: 'pending' },
|
||||
{ id: 'structure', name: 'Struktur', icon: '🔍', status: 'pending' },
|
||||
{ id: 'grid-editor', name: 'Tabelle', icon: '📊', status: 'pending' },
|
||||
]
|
||||
|
||||
/** Map from DB step to overlay UI step index */
|
||||
export function dbStepToOverlayUi(dbStep: number): number {
|
||||
// DB: 1=start, 2=orient, 3=deskew, 4=dewarp, 5=crop, 6=columns, 7=rows, 8=words, 9=recon, 10=gt
|
||||
if (dbStep <= 2) return 0 // orientation
|
||||
if (dbStep === 3) return 1 // deskew
|
||||
if (dbStep === 4) return 2 // dewarp
|
||||
if (dbStep === 5) return 3 // crop
|
||||
if (dbStep <= 7) return 4 // rows (skip columns)
|
||||
if (dbStep === 8) return 5 // words
|
||||
return 6 // reconstruction
|
||||
}
|
||||
624
admin-lehrer/app/(admin)/ai/ocr-pipeline/page.tsx
Normal file
624
admin-lehrer/app/(admin)/ai/ocr-pipeline/page.tsx
Normal file
@@ -0,0 +1,624 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useState } from 'react'
|
||||
import { PagePurpose } from '@/components/common/PagePurpose'
|
||||
import { PipelineStepper } from '@/components/ocr-pipeline/PipelineStepper'
|
||||
import { StepOrientation } from '@/components/ocr-pipeline/StepOrientation'
|
||||
import { StepCrop } from '@/components/ocr-pipeline/StepCrop'
|
||||
import { StepDeskew } from '@/components/ocr-pipeline/StepDeskew'
|
||||
import { StepDewarp } from '@/components/ocr-pipeline/StepDewarp'
|
||||
import { StepStructureDetection } from '@/components/ocr-pipeline/StepStructureDetection'
|
||||
import { StepColumnDetection } from '@/components/ocr-pipeline/StepColumnDetection'
|
||||
import { StepRowDetection } from '@/components/ocr-pipeline/StepRowDetection'
|
||||
import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition'
|
||||
import { StepLlmReview } from '@/components/ocr-pipeline/StepLlmReview'
|
||||
import { StepReconstruction } from '@/components/ocr-pipeline/StepReconstruction'
|
||||
import { StepGroundTruth } from '@/components/ocr-pipeline/StepGroundTruth'
|
||||
import { BoxSessionTabs } from '@/components/ocr-pipeline/BoxSessionTabs'
|
||||
import { PIPELINE_STEPS, DOCUMENT_CATEGORIES, type PipelineStep, type SessionListItem, type DocumentTypeResult, type DocumentCategory, type SubSession } from './types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
export default function OcrPipelinePage() {
|
||||
const [currentStep, setCurrentStep] = useState(0)
|
||||
const [sessionId, setSessionId] = useState<string | null>(null)
|
||||
const [sessionName, setSessionName] = useState<string>('')
|
||||
const [sessions, setSessions] = useState<SessionListItem[]>([])
|
||||
const [loadingSessions, setLoadingSessions] = useState(true)
|
||||
const [editingName, setEditingName] = useState<string | null>(null)
|
||||
const [editNameValue, setEditNameValue] = useState('')
|
||||
const [editingCategory, setEditingCategory] = useState<string | null>(null)
|
||||
const [docTypeResult, setDocTypeResult] = useState<DocumentTypeResult | null>(null)
|
||||
const [activeCategory, setActiveCategory] = useState<DocumentCategory | undefined>(undefined)
|
||||
const [subSessions, setSubSessions] = useState<SubSession[]>([])
|
||||
const [parentSessionId, setParentSessionId] = useState<string | null>(null)
|
||||
const [steps, setSteps] = useState<PipelineStep[]>(
|
||||
PIPELINE_STEPS.map((s, i) => ({
|
||||
...s,
|
||||
status: i === 0 ? 'active' : 'pending',
|
||||
})),
|
||||
)
|
||||
|
||||
// Load session list on mount
|
||||
useEffect(() => {
|
||||
loadSessions()
|
||||
}, [])
|
||||
|
||||
const loadSessions = async () => {
|
||||
setLoadingSessions(true)
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`)
|
||||
if (res.ok) {
|
||||
const data = await res.json()
|
||||
setSessions(data.sessions || [])
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to load sessions:', e)
|
||||
} finally {
|
||||
setLoadingSessions(false)
|
||||
}
|
||||
}
|
||||
|
||||
const openSession = useCallback(async (sid: string, keepSubSessions?: boolean) => {
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`)
|
||||
if (!res.ok) return
|
||||
const data = await res.json()
|
||||
|
||||
setSessionId(sid)
|
||||
setSessionName(data.name || data.filename || '')
|
||||
setActiveCategory(data.document_category || undefined)
|
||||
|
||||
// Sub-session handling
|
||||
if (data.sub_sessions && data.sub_sessions.length > 0) {
|
||||
setSubSessions(data.sub_sessions)
|
||||
setParentSessionId(sid)
|
||||
} else if (data.parent_session_id) {
|
||||
// This is a sub-session — keep parent info but don't reset sub-session list
|
||||
setParentSessionId(data.parent_session_id)
|
||||
} else if (!keepSubSessions) {
|
||||
setSubSessions([])
|
||||
setParentSessionId(null)
|
||||
}
|
||||
|
||||
// Restore doc type result if available
|
||||
const savedDocType: DocumentTypeResult | null = data.doc_type_result || null
|
||||
setDocTypeResult(savedDocType)
|
||||
|
||||
// Determine which step to jump to based on current_step
|
||||
const dbStep = data.current_step || 1
|
||||
// DB steps: 1=start, 2=orientation, 3=deskew, 4=dewarp, 5=crop, 6=columns, ...
|
||||
// UI steps are 0-indexed: 0=orientation, 1=deskew, 2=dewarp, 3=crop, 4=columns, ...
|
||||
let uiStep = Math.max(0, dbStep - 1)
|
||||
const skipSteps = [...(savedDocType?.skip_steps || [])]
|
||||
|
||||
// Sub-sessions: image is already cropped, skip pre-processing steps
|
||||
// Jump directly to columns (UI step 4) unless already further ahead
|
||||
const isSubSession = !!data.parent_session_id
|
||||
const SUB_SESSION_SKIP = ['orientation', 'deskew', 'dewarp', 'crop']
|
||||
if (isSubSession) {
|
||||
for (const s of SUB_SESSION_SKIP) {
|
||||
if (!skipSteps.includes(s)) skipSteps.push(s)
|
||||
}
|
||||
if (uiStep < 4) uiStep = 4 // columns step (index 4)
|
||||
}
|
||||
|
||||
setSteps(
|
||||
PIPELINE_STEPS.map((s, i) => ({
|
||||
...s,
|
||||
status: skipSteps.includes(s.id)
|
||||
? 'skipped'
|
||||
: i < uiStep ? 'completed' : i === uiStep ? 'active' : 'pending',
|
||||
})),
|
||||
)
|
||||
setCurrentStep(uiStep)
|
||||
} catch (e) {
|
||||
console.error('Failed to open session:', e)
|
||||
}
|
||||
}, [])
|
||||
|
||||
const deleteSession = useCallback(async (sid: string) => {
|
||||
try {
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, { method: 'DELETE' })
|
||||
setSessions((prev) => prev.filter((s) => s.id !== sid))
|
||||
if (sessionId === sid) {
|
||||
setSessionId(null)
|
||||
setCurrentStep(0)
|
||||
setDocTypeResult(null)
|
||||
setSubSessions([])
|
||||
setParentSessionId(null)
|
||||
setSteps(PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to delete session:', e)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
const renameSession = useCallback(async (sid: string, newName: string) => {
|
||||
try {
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, {
|
||||
method: 'PUT',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ name: newName }),
|
||||
})
|
||||
setSessions((prev) => prev.map((s) => (s.id === sid ? { ...s, name: newName } : s)))
|
||||
if (sessionId === sid) setSessionName(newName)
|
||||
} catch (e) {
|
||||
console.error('Failed to rename session:', e)
|
||||
}
|
||||
setEditingName(null)
|
||||
}, [sessionId])
|
||||
|
||||
const updateCategory = useCallback(async (sid: string, category: DocumentCategory) => {
|
||||
try {
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, {
|
||||
method: 'PUT',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ document_category: category }),
|
||||
})
|
||||
setSessions((prev) => prev.map((s) => (s.id === sid ? { ...s, document_category: category } : s)))
|
||||
if (sessionId === sid) setActiveCategory(category)
|
||||
} catch (e) {
|
||||
console.error('Failed to update category:', e)
|
||||
}
|
||||
setEditingCategory(null)
|
||||
}, [sessionId])
|
||||
|
||||
const deleteAllSessions = useCallback(async () => {
|
||||
if (!confirm('Alle Sessions loeschen? Dies kann nicht rueckgaengig gemacht werden.')) return
|
||||
try {
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`, { method: 'DELETE' })
|
||||
setSessions([])
|
||||
setSessionId(null)
|
||||
setCurrentStep(0)
|
||||
setDocTypeResult(null)
|
||||
setActiveCategory(undefined)
|
||||
setSubSessions([])
|
||||
setParentSessionId(null)
|
||||
setSteps(PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||
} catch (e) {
|
||||
console.error('Failed to delete all sessions:', e)
|
||||
}
|
||||
}, [])
|
||||
|
||||
const handleStepClick = (index: number) => {
|
||||
if (index <= currentStep || steps[index].status === 'completed') {
|
||||
setCurrentStep(index)
|
||||
}
|
||||
}
|
||||
|
||||
const goToStep = (step: number) => {
|
||||
setCurrentStep(step)
|
||||
setSteps((prev) =>
|
||||
prev.map((s, i) => ({
|
||||
...s,
|
||||
status: i < step ? 'completed' : i === step ? 'active' : 'pending',
|
||||
})),
|
||||
)
|
||||
}
|
||||
|
||||
const handleNext = () => {
|
||||
if (currentStep >= steps.length - 1) {
|
||||
// Last step completed
|
||||
if (parentSessionId && sessionId !== parentSessionId) {
|
||||
// Sub-session completed — update its status and stay in tab view
|
||||
setSubSessions((prev) =>
|
||||
prev.map((s) => s.id === sessionId ? { ...s, status: 'completed', current_step: 10 } : s)
|
||||
)
|
||||
// Switch back to parent
|
||||
handleSessionChange(parentSessionId)
|
||||
return
|
||||
}
|
||||
// Main session: return to session list
|
||||
setSteps(PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||
setCurrentStep(0)
|
||||
setSessionId(null)
|
||||
setSubSessions([])
|
||||
setParentSessionId(null)
|
||||
loadSessions()
|
||||
return
|
||||
}
|
||||
|
||||
// Find the next non-skipped step
|
||||
const skipSteps = docTypeResult?.skip_steps || []
|
||||
let nextStep = currentStep + 1
|
||||
while (nextStep < steps.length && skipSteps.includes(PIPELINE_STEPS[nextStep]?.id)) {
|
||||
nextStep++
|
||||
}
|
||||
if (nextStep >= steps.length) nextStep = steps.length - 1
|
||||
|
||||
setSteps((prev) =>
|
||||
prev.map((s, i) => {
|
||||
if (i === currentStep) return { ...s, status: 'completed' }
|
||||
if (i === nextStep) return { ...s, status: 'active' }
|
||||
// Mark skipped steps between current and next
|
||||
if (i > currentStep && i < nextStep && skipSteps.includes(PIPELINE_STEPS[i]?.id)) {
|
||||
return { ...s, status: 'skipped' }
|
||||
}
|
||||
return s
|
||||
}),
|
||||
)
|
||||
setCurrentStep(nextStep)
|
||||
}
|
||||
|
||||
const handleOrientationComplete = (sid: string) => {
|
||||
setSessionId(sid)
|
||||
// Reload session list to show the new session
|
||||
loadSessions()
|
||||
handleNext()
|
||||
}
|
||||
|
||||
const handleCropNext = async () => {
|
||||
// Auto-detect document type after crop (last image-processing step), then advance
|
||||
if (sessionId) {
|
||||
try {
|
||||
const res = await fetch(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/detect-type`,
|
||||
{ method: 'POST' },
|
||||
)
|
||||
if (res.ok) {
|
||||
const data: DocumentTypeResult = await res.json()
|
||||
setDocTypeResult(data)
|
||||
|
||||
// Mark skipped steps immediately
|
||||
const skipSteps = data.skip_steps || []
|
||||
if (skipSteps.length > 0) {
|
||||
setSteps((prev) =>
|
||||
prev.map((s) =>
|
||||
skipSteps.includes(s.id) ? { ...s, status: 'skipped' } : s,
|
||||
),
|
||||
)
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Doc type detection failed:', e)
|
||||
// Not critical — continue without it
|
||||
}
|
||||
}
|
||||
handleNext()
|
||||
}
|
||||
|
||||
const handleDocTypeChange = (newDocType: DocumentTypeResult['doc_type']) => {
|
||||
if (!docTypeResult) return
|
||||
|
||||
// Build new skip_steps based on doc type
|
||||
let skipSteps: string[] = []
|
||||
if (newDocType === 'full_text') {
|
||||
skipSteps = ['columns', 'rows']
|
||||
}
|
||||
// vocab_table and generic_table: no skips
|
||||
|
||||
const updated: DocumentTypeResult = {
|
||||
...docTypeResult,
|
||||
doc_type: newDocType,
|
||||
skip_steps: skipSteps,
|
||||
pipeline: newDocType === 'full_text' ? 'full_page' : 'cell_first',
|
||||
}
|
||||
setDocTypeResult(updated)
|
||||
|
||||
// Update step statuses
|
||||
setSteps((prev) =>
|
||||
prev.map((s) => {
|
||||
if (skipSteps.includes(s.id)) return { ...s, status: 'skipped' as const }
|
||||
if (s.status === 'skipped') return { ...s, status: 'pending' as const }
|
||||
return s
|
||||
}),
|
||||
)
|
||||
}
|
||||
|
||||
const handleNewSession = () => {
|
||||
setSessionId(null)
|
||||
setSessionName('')
|
||||
setCurrentStep(0)
|
||||
setDocTypeResult(null)
|
||||
setSubSessions([])
|
||||
setParentSessionId(null)
|
||||
setSteps(PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||
}
|
||||
|
||||
const handleSessionChange = useCallback((newSessionId: string) => {
|
||||
openSession(newSessionId, true)
|
||||
}, [openSession])
|
||||
|
||||
const handleBoxSessionsCreated = useCallback((subs: SubSession[]) => {
|
||||
setSubSessions(subs)
|
||||
if (sessionId) setParentSessionId(sessionId)
|
||||
}, [sessionId])
|
||||
|
||||
const stepNames: Record<number, string> = {
|
||||
1: 'Orientierung',
|
||||
2: 'Begradigung',
|
||||
3: 'Entzerrung',
|
||||
4: 'Zuschneiden',
|
||||
5: 'Spalten',
|
||||
6: 'Zeilen',
|
||||
7: 'Woerter',
|
||||
8: 'Struktur',
|
||||
9: 'Korrektur',
|
||||
10: 'Rekonstruktion',
|
||||
11: 'Validierung',
|
||||
}
|
||||
|
||||
const reprocessFromStep = useCallback(async (uiStep: number) => {
|
||||
if (!sessionId) return
|
||||
const dbStep = uiStep + 1 // UI is 0-indexed, DB is 1-indexed
|
||||
if (!confirm(`Ab Schritt ${dbStep} (${stepNames[dbStep] || '?'}) neu verarbeiten? Nachfolgende Daten werden geloescht.`)) return
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reprocess`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ from_step: dbStep }),
|
||||
})
|
||||
if (!res.ok) {
|
||||
const data = await res.json().catch(() => ({}))
|
||||
console.error('Reprocess failed:', data.detail || res.status)
|
||||
return
|
||||
}
|
||||
// Reset UI steps
|
||||
goToStep(uiStep)
|
||||
} catch (e) {
|
||||
console.error('Reprocess error:', e)
|
||||
}
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [sessionId, goToStep])
|
||||
|
||||
const renderStep = () => {
|
||||
switch (currentStep) {
|
||||
case 0:
|
||||
return <StepOrientation sessionId={sessionId} onNext={handleOrientationComplete} />
|
||||
case 1:
|
||||
return <StepDeskew sessionId={sessionId} onNext={handleNext} />
|
||||
case 2:
|
||||
return <StepDewarp sessionId={sessionId} onNext={handleNext} />
|
||||
case 3:
|
||||
return <StepCrop sessionId={sessionId} onNext={handleCropNext} />
|
||||
case 4:
|
||||
return <StepColumnDetection sessionId={sessionId} onNext={handleNext} onBoxSessionsCreated={handleBoxSessionsCreated} />
|
||||
case 5:
|
||||
return <StepRowDetection sessionId={sessionId} onNext={handleNext} />
|
||||
case 6:
|
||||
return <StepWordRecognition sessionId={sessionId} onNext={handleNext} goToStep={goToStep} />
|
||||
case 7:
|
||||
return <StepStructureDetection sessionId={sessionId} onNext={handleNext} />
|
||||
case 8:
|
||||
return <StepLlmReview sessionId={sessionId} onNext={handleNext} />
|
||||
case 9:
|
||||
return <StepReconstruction sessionId={sessionId} onNext={handleNext} />
|
||||
case 10:
|
||||
return <StepGroundTruth sessionId={sessionId} onNext={handleNext} />
|
||||
default:
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
<PagePurpose
|
||||
title="OCR Pipeline"
|
||||
purpose="Schrittweise Seitenrekonstruktion: Scan begradigen, Spalten erkennen, Woerter lokalisieren und die Seite Wort fuer Wort nachbauen. Ziel: 10 Vokabelseiten fehlerfrei rekonstruieren."
|
||||
audience={['Entwickler', 'Data Scientists']}
|
||||
architecture={{
|
||||
services: ['klausur-service (FastAPI)', 'OpenCV', 'Tesseract'],
|
||||
databases: ['PostgreSQL Sessions'],
|
||||
}}
|
||||
relatedPages={[
|
||||
{ name: 'OCR Vergleich', href: '/ai/ocr-compare', description: 'Methoden-Vergleich' },
|
||||
{ name: 'OCR-Labeling', href: '/ai/ocr-labeling', description: 'Trainingsdaten' },
|
||||
]}
|
||||
defaultCollapsed
|
||||
/>
|
||||
|
||||
{/* Session List */}
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4">
|
||||
<div className="flex items-center justify-between mb-3">
|
||||
<h3 className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||
Sessions ({sessions.length})
|
||||
</h3>
|
||||
<div className="flex gap-2">
|
||||
{sessions.length > 0 && (
|
||||
<button
|
||||
onClick={deleteAllSessions}
|
||||
className="text-xs px-3 py-1.5 text-red-600 hover:bg-red-50 dark:hover:bg-red-900/20 rounded-lg transition-colors"
|
||||
title="Alle Sessions loeschen"
|
||||
>
|
||||
Alle loeschen
|
||||
</button>
|
||||
)}
|
||||
<button
|
||||
onClick={handleNewSession}
|
||||
className="text-xs px-3 py-1.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors"
|
||||
>
|
||||
+ Neue Session
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{loadingSessions ? (
|
||||
<div className="text-sm text-gray-400 py-2">Lade Sessions...</div>
|
||||
) : sessions.length === 0 ? (
|
||||
<div className="text-sm text-gray-400 py-2">Noch keine Sessions vorhanden.</div>
|
||||
) : (
|
||||
<div className="space-y-1.5 max-h-[320px] overflow-y-auto">
|
||||
{sessions.map((s) => {
|
||||
const catInfo = DOCUMENT_CATEGORIES.find(c => c.value === s.document_category)
|
||||
return (
|
||||
<div
|
||||
key={s.id}
|
||||
className={`relative flex items-start gap-3 px-3 py-2.5 rounded-lg text-sm transition-colors cursor-pointer ${
|
||||
sessionId === s.id
|
||||
? 'bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700'
|
||||
: 'hover:bg-gray-50 dark:hover:bg-gray-700/50'
|
||||
}`}
|
||||
>
|
||||
{/* Thumbnail */}
|
||||
<div
|
||||
className="flex-shrink-0 w-12 h-12 rounded-md overflow-hidden bg-gray-100 dark:bg-gray-700"
|
||||
onClick={() => openSession(s.id)}
|
||||
>
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
<img
|
||||
src={`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${s.id}/thumbnail?size=96`}
|
||||
alt=""
|
||||
className="w-full h-full object-cover"
|
||||
loading="lazy"
|
||||
onError={(e) => { (e.target as HTMLImageElement).style.display = 'none' }}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Info */}
|
||||
<div className="flex-1 min-w-0" onClick={() => openSession(s.id)}>
|
||||
{editingName === s.id ? (
|
||||
<input
|
||||
autoFocus
|
||||
value={editNameValue}
|
||||
onChange={(e) => setEditNameValue(e.target.value)}
|
||||
onBlur={() => renameSession(s.id, editNameValue)}
|
||||
onKeyDown={(e) => {
|
||||
if (e.key === 'Enter') renameSession(s.id, editNameValue)
|
||||
if (e.key === 'Escape') setEditingName(null)
|
||||
}}
|
||||
onClick={(e) => e.stopPropagation()}
|
||||
className="w-full px-1 py-0.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600"
|
||||
/>
|
||||
) : (
|
||||
<div className="truncate font-medium text-gray-700 dark:text-gray-300">
|
||||
{s.name || s.filename}
|
||||
</div>
|
||||
)}
|
||||
{/* ID row */}
|
||||
<button
|
||||
onClick={(e) => {
|
||||
e.stopPropagation()
|
||||
navigator.clipboard.writeText(s.id)
|
||||
const btn = e.currentTarget
|
||||
btn.textContent = 'Kopiert!'
|
||||
setTimeout(() => { btn.textContent = `ID: ${s.id.slice(0, 8)}` }, 1500)
|
||||
}}
|
||||
className="text-[10px] font-mono text-gray-400 hover:text-teal-500 transition-colors"
|
||||
title={`Volle ID: ${s.id} — Klick zum Kopieren`}
|
||||
>
|
||||
ID: {s.id.slice(0, 8)}
|
||||
</button>
|
||||
<div className="text-xs text-gray-400 flex gap-2 mt-0.5">
|
||||
<span>{new Date(s.created_at).toLocaleDateString('de-DE', { day: '2-digit', month: '2-digit', year: '2-digit', hour: '2-digit', minute: '2-digit' })}</span>
|
||||
<span>Schritt {s.current_step}: {stepNames[s.current_step] || '?'}</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Badges */}
|
||||
<div className="flex flex-col gap-1 items-end flex-shrink-0" onClick={(e) => e.stopPropagation()}>
|
||||
{/* Category Badge */}
|
||||
<button
|
||||
onClick={() => setEditingCategory(editingCategory === s.id ? null : s.id)}
|
||||
className={`text-[10px] px-1.5 py-0.5 rounded-full border transition-colors ${
|
||||
catInfo
|
||||
? 'bg-teal-50 dark:bg-teal-900/30 border-teal-200 dark:border-teal-700 text-teal-700 dark:text-teal-300'
|
||||
: 'bg-gray-50 dark:bg-gray-700 border-gray-200 dark:border-gray-600 text-gray-400 hover:text-gray-600 dark:hover:text-gray-300'
|
||||
}`}
|
||||
title="Kategorie setzen"
|
||||
>
|
||||
{catInfo ? `${catInfo.icon} ${catInfo.label}` : '+ Kategorie'}
|
||||
</button>
|
||||
{/* Doc Type Badge (read-only) */}
|
||||
{s.doc_type && (
|
||||
<span className="text-[10px] px-1.5 py-0.5 rounded-full bg-gray-100 dark:bg-gray-700 text-gray-500 dark:text-gray-400 border border-gray-200 dark:border-gray-600">
|
||||
{s.doc_type}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Action buttons */}
|
||||
<div className="flex flex-col gap-0.5 flex-shrink-0">
|
||||
<button
|
||||
onClick={(e) => {
|
||||
e.stopPropagation()
|
||||
setEditNameValue(s.name || s.filename)
|
||||
setEditingName(s.id)
|
||||
}}
|
||||
className="p-1 text-gray-400 hover:text-gray-600 dark:hover:text-gray-300"
|
||||
title="Umbenennen"
|
||||
>
|
||||
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" d="M15.232 5.232l3.536 3.536m-2.036-5.036a2.5 2.5 0 113.536 3.536L6.5 21.036H3v-3.572L16.732 3.732z" />
|
||||
</svg>
|
||||
</button>
|
||||
<button
|
||||
onClick={(e) => {
|
||||
e.stopPropagation()
|
||||
if (confirm('Session loeschen?')) deleteSession(s.id)
|
||||
}}
|
||||
className="p-1 text-gray-400 hover:text-red-500"
|
||||
title="Loeschen"
|
||||
>
|
||||
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16" />
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Category dropdown (inline) */}
|
||||
{editingCategory === s.id && (
|
||||
<div
|
||||
className="absolute right-0 top-full mt-1 z-20 bg-white dark:bg-gray-800 border border-gray-200 dark:border-gray-700 rounded-lg shadow-lg p-2 grid grid-cols-2 gap-1 w-64"
|
||||
onClick={(e) => e.stopPropagation()}
|
||||
>
|
||||
{DOCUMENT_CATEGORIES.map((cat) => (
|
||||
<button
|
||||
key={cat.value}
|
||||
onClick={() => updateCategory(s.id, cat.value)}
|
||||
className={`text-xs px-2 py-1.5 rounded-md text-left transition-colors ${
|
||||
s.document_category === cat.value
|
||||
? 'bg-teal-100 dark:bg-teal-900/40 text-teal-700 dark:text-teal-300'
|
||||
: 'hover:bg-gray-100 dark:hover:bg-gray-700 text-gray-600 dark:text-gray-400'
|
||||
}`}
|
||||
>
|
||||
{cat.icon} {cat.label}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Active session info */}
|
||||
{sessionId && sessionName && (
|
||||
<div className="flex items-center gap-3 text-sm text-gray-500 dark:text-gray-400">
|
||||
<span>Aktive Session: <span className="font-medium text-gray-700 dark:text-gray-300">{sessionName}</span></span>
|
||||
{activeCategory && (() => {
|
||||
const cat = DOCUMENT_CATEGORIES.find(c => c.value === activeCategory)
|
||||
return cat ? <span className="text-xs px-2 py-0.5 rounded-full bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700 text-teal-700 dark:text-teal-300">{cat.icon} {cat.label}</span> : null
|
||||
})()}
|
||||
{docTypeResult && (
|
||||
<span className="text-xs px-2 py-0.5 rounded-full bg-gray-100 dark:bg-gray-700 text-gray-500 dark:text-gray-400 border border-gray-200 dark:border-gray-600">
|
||||
{docTypeResult.doc_type}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
<PipelineStepper
|
||||
steps={steps}
|
||||
currentStep={currentStep}
|
||||
onStepClick={handleStepClick}
|
||||
onReprocess={sessionId ? reprocessFromStep : undefined}
|
||||
docTypeResult={docTypeResult}
|
||||
onDocTypeChange={handleDocTypeChange}
|
||||
/>
|
||||
|
||||
{subSessions.length > 0 && parentSessionId && sessionId && (
|
||||
<BoxSessionTabs
|
||||
parentSessionId={parentSessionId}
|
||||
subSessions={subSessions}
|
||||
activeSessionId={sessionId}
|
||||
onSessionChange={handleSessionChange}
|
||||
/>
|
||||
)}
|
||||
|
||||
<div className="min-h-[400px]">{renderStep()}</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
412
admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts
Normal file
412
admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts
Normal file
@@ -0,0 +1,412 @@
|
||||
export type PipelineStepStatus = 'pending' | 'active' | 'completed' | 'failed' | 'skipped'
|
||||
|
||||
export interface PipelineStep {
|
||||
id: string
|
||||
name: string
|
||||
icon: string
|
||||
status: PipelineStepStatus
|
||||
}
|
||||
|
||||
export type DocumentCategory =
|
||||
| 'vokabelseite' | 'buchseite' | 'arbeitsblatt' | 'klausurseite'
|
||||
| 'mathearbeit' | 'statistik' | 'zeitung' | 'formular' | 'handschrift' | 'sonstiges'
|
||||
|
||||
export const DOCUMENT_CATEGORIES: { value: DocumentCategory; label: string; icon: string }[] = [
|
||||
{ value: 'vokabelseite', label: 'Vokabelseite', icon: '📖' },
|
||||
{ value: 'buchseite', label: 'Buchseite', icon: '📚' },
|
||||
{ value: 'arbeitsblatt', label: 'Arbeitsblatt', icon: '📝' },
|
||||
{ value: 'klausurseite', label: 'Klausurseite', icon: '📄' },
|
||||
{ value: 'mathearbeit', label: 'Mathearbeit', icon: '🔢' },
|
||||
{ value: 'statistik', label: 'Statistik', icon: '📊' },
|
||||
{ value: 'zeitung', label: 'Zeitung', icon: '📰' },
|
||||
{ value: 'formular', label: 'Formular', icon: '📋' },
|
||||
{ value: 'handschrift', label: 'Handschrift', icon: '✍️' },
|
||||
{ value: 'sonstiges', label: 'Sonstiges', icon: '📎' },
|
||||
]
|
||||
|
||||
export interface SessionListItem {
|
||||
id: string
|
||||
name: string
|
||||
filename: string
|
||||
status: string
|
||||
current_step: number
|
||||
document_category?: DocumentCategory
|
||||
doc_type?: string
|
||||
created_at: string
|
||||
updated_at?: string
|
||||
parent_session_id?: string | null
|
||||
box_index?: number | null
|
||||
}
|
||||
|
||||
export interface SubSession {
|
||||
id: string
|
||||
name: string
|
||||
box_index: number
|
||||
current_step?: number
|
||||
status?: string
|
||||
}
|
||||
|
||||
export interface PipelineLogEntry {
|
||||
step: string
|
||||
completed_at: string
|
||||
success: boolean
|
||||
duration_ms?: number
|
||||
metrics: Record<string, unknown>
|
||||
}
|
||||
|
||||
export interface PipelineLog {
|
||||
steps: PipelineLogEntry[]
|
||||
}
|
||||
|
||||
export interface DocumentTypeResult {
|
||||
doc_type: 'vocab_table' | 'full_text' | 'generic_table'
|
||||
confidence: number
|
||||
pipeline: 'cell_first' | 'full_page'
|
||||
skip_steps: string[]
|
||||
features?: Record<string, unknown>
|
||||
duration_seconds?: number
|
||||
}
|
||||
|
||||
export interface OrientationResult {
|
||||
orientation_degrees: number
|
||||
corrected: boolean
|
||||
duration_seconds: number
|
||||
}
|
||||
|
||||
export interface CropResult {
|
||||
crop_applied: boolean
|
||||
crop_rect?: { x: number; y: number; width: number; height: number }
|
||||
crop_rect_pct?: { x: number; y: number; width: number; height: number }
|
||||
original_size: { width: number; height: number }
|
||||
cropped_size: { width: number; height: number }
|
||||
detected_format?: string
|
||||
format_confidence?: number
|
||||
aspect_ratio?: number
|
||||
border_fractions?: { top: number; bottom: number; left: number; right: number }
|
||||
skipped?: boolean
|
||||
duration_seconds?: number
|
||||
}
|
||||
|
||||
export interface SessionInfo {
|
||||
session_id: string
|
||||
filename: string
|
||||
name?: string
|
||||
image_width: number
|
||||
image_height: number
|
||||
original_image_url: string
|
||||
current_step?: number
|
||||
document_category?: DocumentCategory
|
||||
doc_type?: string
|
||||
orientation_result?: OrientationResult
|
||||
crop_result?: CropResult
|
||||
deskew_result?: DeskewResult
|
||||
dewarp_result?: DewarpResult
|
||||
column_result?: ColumnResult
|
||||
row_result?: RowResult
|
||||
word_result?: GridResult
|
||||
doc_type_result?: DocumentTypeResult
|
||||
sub_sessions?: SubSession[]
|
||||
parent_session_id?: string
|
||||
box_index?: number
|
||||
}
|
||||
|
||||
export interface DeskewResult {
|
||||
session_id: string
|
||||
angle_hough: number
|
||||
angle_word_alignment: number
|
||||
angle_iterative?: number
|
||||
angle_residual?: number
|
||||
angle_textline?: number
|
||||
angle_applied: number
|
||||
method_used: 'hough' | 'word_alignment' | 'manual' | 'iterative' | 'two_pass' | 'three_pass' | 'manual_combined'
|
||||
confidence: number
|
||||
duration_seconds: number
|
||||
deskewed_image_url: string
|
||||
binarized_image_url: string
|
||||
}
|
||||
|
||||
export interface DeskewGroundTruth {
|
||||
is_correct: boolean
|
||||
corrected_angle?: number
|
||||
notes?: string
|
||||
}
|
||||
|
||||
export interface DewarpDetection {
|
||||
method: string
|
||||
shear_degrees: number
|
||||
confidence: number
|
||||
}
|
||||
|
||||
export interface DewarpResult {
|
||||
session_id: string
|
||||
method_used: string
|
||||
shear_degrees: number
|
||||
confidence: number
|
||||
duration_seconds: number
|
||||
dewarped_image_url: string
|
||||
detections?: DewarpDetection[]
|
||||
}
|
||||
|
||||
export interface DewarpGroundTruth {
|
||||
is_correct: boolean
|
||||
corrected_shear?: number
|
||||
notes?: string
|
||||
}
|
||||
|
||||
export interface PageRegion {
|
||||
type: 'column_en' | 'column_de' | 'column_example' | 'page_ref'
|
||||
| 'column_marker' | 'column_text' | 'column_ignore' | 'header' | 'footer'
|
||||
x: number
|
||||
y: number
|
||||
width: number
|
||||
height: number
|
||||
classification_confidence?: number
|
||||
classification_method?: string
|
||||
}
|
||||
|
||||
export interface PageZone {
|
||||
zone_type: 'content' | 'box'
|
||||
y_start: number
|
||||
y_end: number
|
||||
box?: { x: number; y: number; width: number; height: number }
|
||||
}
|
||||
|
||||
export interface ColumnResult {
|
||||
columns: PageRegion[]
|
||||
duration_seconds: number
|
||||
zones?: PageZone[]
|
||||
}
|
||||
|
||||
export interface ColumnGroundTruth {
|
||||
is_correct: boolean
|
||||
corrected_columns?: PageRegion[]
|
||||
notes?: string
|
||||
}
|
||||
|
||||
export interface ManualColumnDivider {
|
||||
xPercent: number // Position in % of image width (0-100)
|
||||
}
|
||||
|
||||
export type ColumnTypeKey = PageRegion['type']
|
||||
|
||||
export interface RowResult {
|
||||
rows: RowItem[]
|
||||
summary: Record<string, number>
|
||||
total_rows: number
|
||||
duration_seconds: number
|
||||
}
|
||||
|
||||
export interface RowItem {
|
||||
index: number
|
||||
x: number
|
||||
y: number
|
||||
width: number
|
||||
height: number
|
||||
word_count: number
|
||||
row_type: 'content' | 'header' | 'footer'
|
||||
gap_before: number
|
||||
}
|
||||
|
||||
export interface RowGroundTruth {
|
||||
is_correct: boolean
|
||||
corrected_rows?: RowItem[]
|
||||
notes?: string
|
||||
}
|
||||
|
||||
export interface StructureGraphic {
|
||||
x: number
|
||||
y: number
|
||||
w: number
|
||||
h: number
|
||||
area: number
|
||||
shape: string // image, illustration
|
||||
color_name: string
|
||||
color_hex: string
|
||||
confidence: number
|
||||
}
|
||||
|
||||
export interface ExcludeRegion {
|
||||
x: number
|
||||
y: number
|
||||
w: number
|
||||
h: number
|
||||
label?: string
|
||||
}
|
||||
|
||||
export interface StructureResult {
|
||||
image_width: number
|
||||
image_height: number
|
||||
content_bounds: { x: number; y: number; w: number; h: number }
|
||||
boxes: StructureBox[]
|
||||
zones: StructureZone[]
|
||||
graphics: StructureGraphic[]
|
||||
exclude_regions?: ExcludeRegion[]
|
||||
color_pixel_counts: Record<string, number>
|
||||
has_words: boolean
|
||||
word_count: number
|
||||
border_ghosts_removed?: number
|
||||
duration_seconds: number
|
||||
}
|
||||
|
||||
export interface StructureBox {
|
||||
x: number
|
||||
y: number
|
||||
w: number
|
||||
h: number
|
||||
confidence: number
|
||||
border_thickness: number
|
||||
bg_color_name?: string
|
||||
bg_color_hex?: string
|
||||
}
|
||||
|
||||
export interface StructureZone {
|
||||
index: number
|
||||
zone_type: 'content' | 'box'
|
||||
x: number
|
||||
y: number
|
||||
w: number
|
||||
h: number
|
||||
}
|
||||
|
||||
export interface WordBbox {
|
||||
x: number
|
||||
y: number
|
||||
w: number
|
||||
h: number
|
||||
}
|
||||
|
||||
export interface OcrWordBox {
|
||||
text: string
|
||||
left: number // absolute image x in px
|
||||
top: number // absolute image y in px
|
||||
width: number // px
|
||||
height: number // px
|
||||
conf: number
|
||||
color?: string // hex color of detected text, e.g. '#dc2626'
|
||||
color_name?: string // 'black' | 'red' | 'blue' | 'green' | 'orange' | 'purple' | 'yellow'
|
||||
recovered?: boolean // true if this word was recovered via color detection
|
||||
}
|
||||
|
||||
export interface GridCell {
|
||||
cell_id: string // "R03_C1"
|
||||
row_index: number
|
||||
col_index: number
|
||||
col_type: string
|
||||
text: string
|
||||
confidence: number
|
||||
bbox_px: WordBbox
|
||||
bbox_pct: WordBbox
|
||||
ocr_engine?: string
|
||||
is_bold?: boolean
|
||||
status?: 'pending' | 'confirmed' | 'edited' | 'skipped'
|
||||
word_boxes?: OcrWordBox[] // per-word bounding boxes from OCR engine
|
||||
}
|
||||
|
||||
export interface ColumnMeta {
|
||||
index: number
|
||||
type: string
|
||||
x: number
|
||||
width: number
|
||||
}
|
||||
|
||||
export interface GridResult {
|
||||
cells: GridCell[]
|
||||
grid_shape: { rows: number; cols: number; total_cells: number }
|
||||
columns_used: ColumnMeta[]
|
||||
layout: 'vocab' | 'generic'
|
||||
image_width: number
|
||||
image_height: number
|
||||
duration_seconds: number
|
||||
ocr_engine?: string
|
||||
vocab_entries?: WordEntry[] // Only when layout='vocab'
|
||||
entries?: WordEntry[] // Backwards compat alias for vocab_entries
|
||||
entry_count?: number
|
||||
summary: {
|
||||
total_cells: number
|
||||
non_empty_cells: number
|
||||
low_confidence: number
|
||||
// Only when layout='vocab':
|
||||
total_entries?: number
|
||||
with_english?: number
|
||||
with_german?: number
|
||||
}
|
||||
llm_review?: {
|
||||
changes: { row_index: number; field: string; old: string; new: string }[]
|
||||
model_used: string
|
||||
duration_ms: number
|
||||
entries_corrected: number
|
||||
applied_count?: number
|
||||
applied_at?: string
|
||||
}
|
||||
}
|
||||
|
||||
export interface WordEntry {
|
||||
row_index: number
|
||||
english: string
|
||||
german: string
|
||||
example: string
|
||||
source_page?: string
|
||||
marker?: string
|
||||
confidence: number
|
||||
bbox: WordBbox
|
||||
bbox_en: WordBbox | null
|
||||
bbox_de: WordBbox | null
|
||||
bbox_ex: WordBbox | null
|
||||
bbox_ref?: WordBbox | null
|
||||
bbox_marker?: WordBbox | null
|
||||
status?: 'pending' | 'confirmed' | 'edited' | 'skipped'
|
||||
}
|
||||
|
||||
/** @deprecated Use GridResult instead */
|
||||
export interface WordResult {
|
||||
entries: WordEntry[]
|
||||
entry_count: number
|
||||
image_width: number
|
||||
image_height: number
|
||||
duration_seconds: number
|
||||
ocr_engine?: string
|
||||
summary: {
|
||||
total_entries: number
|
||||
with_english: number
|
||||
with_german: number
|
||||
low_confidence: number
|
||||
}
|
||||
}
|
||||
|
||||
export interface WordGroundTruth {
|
||||
is_correct: boolean
|
||||
corrected_entries?: WordEntry[]
|
||||
notes?: string
|
||||
}
|
||||
|
||||
export interface ImageRegion {
|
||||
bbox_pct: { x: number; y: number; w: number; h: number }
|
||||
prompt: string
|
||||
description: string
|
||||
image_b64: string | null
|
||||
style: 'educational' | 'cartoon' | 'sketch' | 'clipart' | 'realistic'
|
||||
}
|
||||
|
||||
export type ImageStyle = ImageRegion['style']
|
||||
|
||||
export const IMAGE_STYLES: { value: ImageStyle; label: string }[] = [
|
||||
{ value: 'educational', label: 'Lehrbuch' },
|
||||
{ value: 'cartoon', label: 'Cartoon' },
|
||||
{ value: 'sketch', label: 'Skizze' },
|
||||
{ value: 'clipart', label: 'Clipart' },
|
||||
{ value: 'realistic', label: 'Realistisch' },
|
||||
]
|
||||
|
||||
export const PIPELINE_STEPS: PipelineStep[] = [
|
||||
{ id: 'orientation', name: 'Orientierung', icon: '🔄', status: 'pending' },
|
||||
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
|
||||
{ id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
|
||||
{ id: 'crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' },
|
||||
{ id: 'columns', name: 'Spalten', icon: '📊', status: 'pending' },
|
||||
{ id: 'rows', name: 'Zeilen', icon: '📏', status: 'pending' },
|
||||
{ id: 'words', name: 'Woerter', icon: '🔤', status: 'pending' },
|
||||
{ id: 'structure', name: 'Struktur', icon: '🔍', status: 'pending' },
|
||||
{ id: 'llm-review', name: 'Korrektur', icon: '✏️', status: 'pending' },
|
||||
{ id: 'reconstruction', name: 'Rekonstruktion', icon: '🏗️', status: 'pending' },
|
||||
{ id: 'ground-truth', name: 'Validierung', icon: '✅', status: 'pending' },
|
||||
]
|
||||
675
admin-lehrer/app/(admin)/ai/rag/components/ChunkBrowserQA.tsx
Normal file
675
admin-lehrer/app/(admin)/ai/rag/components/ChunkBrowserQA.tsx
Normal file
@@ -0,0 +1,675 @@
|
||||
'use client'
|
||||
|
||||
import React, { useState, useEffect, useCallback, useRef } from 'react'
|
||||
import { RAG_PDF_MAPPING } from './rag-pdf-mapping'
|
||||
import { REGULATIONS_IN_RAG, REGULATION_INFO } from '../rag-constants'
|
||||
|
||||
interface ChunkBrowserQAProps {
|
||||
apiProxy: string
|
||||
}
|
||||
|
||||
type RegGroupKey = 'eu_regulation' | 'eu_directive' | 'de_law' | 'at_law' | 'ch_law' | 'national_law' | 'bsi_standard' | 'eu_guideline' | 'international_standard' | 'other'
|
||||
|
||||
const GROUP_LABELS: Record<RegGroupKey, string> = {
|
||||
eu_regulation: 'EU Verordnungen',
|
||||
eu_directive: 'EU Richtlinien',
|
||||
de_law: 'DE Gesetze',
|
||||
at_law: 'AT Gesetze',
|
||||
ch_law: 'CH Gesetze',
|
||||
national_law: 'Nationale Gesetze (EU)',
|
||||
bsi_standard: 'BSI Standards',
|
||||
eu_guideline: 'EDPB / Guidelines',
|
||||
international_standard: 'Internationale Standards',
|
||||
other: 'Sonstige',
|
||||
}
|
||||
|
||||
const GROUP_ORDER: RegGroupKey[] = [
|
||||
'eu_regulation', 'eu_directive', 'de_law', 'at_law', 'ch_law',
|
||||
'national_law', 'bsi_standard', 'eu_guideline', 'international_standard', 'other',
|
||||
]
|
||||
|
||||
const COLLECTIONS = [
|
||||
'bp_compliance_gesetze',
|
||||
'bp_compliance_ce',
|
||||
'bp_compliance_datenschutz',
|
||||
'bp_dsfa_corpus',
|
||||
'bp_compliance_recht',
|
||||
'bp_legal_templates',
|
||||
'bp_nibis_eh',
|
||||
]
|
||||
|
||||
export function ChunkBrowserQA({ apiProxy }: ChunkBrowserQAProps) {
|
||||
// Filter-Sidebar
|
||||
const [selectedRegulation, setSelectedRegulation] = useState<string | null>(null)
|
||||
const [regulationCounts, setRegulationCounts] = useState<Record<string, number>>({})
|
||||
const [filterSearch, setFilterSearch] = useState('')
|
||||
const [countsLoading, setCountsLoading] = useState(false)
|
||||
|
||||
// Dokument-Chunks (sequenziell)
|
||||
const [docChunks, setDocChunks] = useState<Record<string, unknown>[]>([])
|
||||
const [docChunkIndex, setDocChunkIndex] = useState(0)
|
||||
const [docTotalChunks, setDocTotalChunks] = useState(0)
|
||||
const [docLoading, setDocLoading] = useState(false)
|
||||
const docChunksRef = useRef(docChunks)
|
||||
docChunksRef.current = docChunks
|
||||
|
||||
// Split-View
|
||||
const [splitViewActive, setSplitViewActive] = useState(true)
|
||||
const [chunksPerPage, setChunksPerPage] = useState(6)
|
||||
const [fullscreen, setFullscreen] = useState(false)
|
||||
|
||||
// Collection — default to bp_compliance_ce where we have PDFs downloaded
|
||||
const [collection, setCollection] = useState('bp_compliance_ce')
|
||||
|
||||
// PDF existence check
|
||||
const [pdfExists, setPdfExists] = useState<boolean | null>(null)
|
||||
|
||||
// Sidebar collapsed groups
|
||||
const [collapsedGroups, setCollapsedGroups] = useState<Set<string>>(new Set())
|
||||
|
||||
// Build grouped regulations for sidebar
|
||||
const regulationsInCollection = Object.entries(REGULATIONS_IN_RAG)
|
||||
.filter(([, info]) => info.collection === collection)
|
||||
.map(([code]) => code)
|
||||
|
||||
const groupedRegulations = React.useMemo(() => {
|
||||
const groups: Record<RegGroupKey, { code: string; name: string; type: string }[]> = {
|
||||
eu_regulation: [], eu_directive: [], de_law: [], at_law: [], ch_law: [],
|
||||
national_law: [], bsi_standard: [], eu_guideline: [], international_standard: [], other: [],
|
||||
}
|
||||
for (const code of regulationsInCollection) {
|
||||
const reg = REGULATION_INFO.find(r => r.code === code)
|
||||
const type = (reg?.type || 'other') as RegGroupKey
|
||||
const groupKey = type in groups ? type : 'other'
|
||||
groups[groupKey].push({
|
||||
code,
|
||||
name: reg?.name || code,
|
||||
type: reg?.type || 'unknown',
|
||||
})
|
||||
}
|
||||
return groups
|
||||
}, [regulationsInCollection.join(',')])
|
||||
|
||||
// Load regulation counts for current collection
|
||||
const loadRegulationCounts = useCallback(async (col: string) => {
|
||||
const entries = Object.entries(REGULATIONS_IN_RAG)
|
||||
.filter(([, info]) => info.collection === col && info.qdrant_id)
|
||||
if (entries.length === 0) return
|
||||
|
||||
// Build qdrant_id -> our_code mapping
|
||||
const qdrantIdToCode: Record<string, string[]> = {}
|
||||
for (const [code, info] of entries) {
|
||||
if (!qdrantIdToCode[info.qdrant_id]) qdrantIdToCode[info.qdrant_id] = []
|
||||
qdrantIdToCode[info.qdrant_id].push(code)
|
||||
}
|
||||
const uniqueQdrantIds = Object.keys(qdrantIdToCode)
|
||||
|
||||
setCountsLoading(true)
|
||||
try {
|
||||
const params = new URLSearchParams({
|
||||
action: 'regulation-counts-batch',
|
||||
collection: col,
|
||||
qdrant_ids: uniqueQdrantIds.join(','),
|
||||
})
|
||||
const res = await fetch(`${apiProxy}?${params}`)
|
||||
if (res.ok) {
|
||||
const data = await res.json()
|
||||
// Map qdrant_id counts back to our codes
|
||||
const mapped: Record<string, number> = {}
|
||||
for (const [qid, count] of Object.entries(data.counts as Record<string, number>)) {
|
||||
const codes = qdrantIdToCode[qid] || []
|
||||
for (const code of codes) {
|
||||
mapped[code] = count
|
||||
}
|
||||
}
|
||||
setRegulationCounts(prev => ({ ...prev, ...mapped }))
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Failed to load regulation counts:', error)
|
||||
} finally {
|
||||
setCountsLoading(false)
|
||||
}
|
||||
}, [apiProxy])
|
||||
|
||||
// Load all chunks for a regulation (paginated scroll)
|
||||
const loadDocumentChunks = useCallback(async (regulationCode: string) => {
|
||||
const ragInfo = REGULATIONS_IN_RAG[regulationCode]
|
||||
if (!ragInfo || !ragInfo.qdrant_id) return
|
||||
|
||||
setDocLoading(true)
|
||||
setDocChunks([])
|
||||
setDocChunkIndex(0)
|
||||
setDocTotalChunks(0)
|
||||
|
||||
const allChunks: Record<string, unknown>[] = []
|
||||
let offset: string | null = null
|
||||
|
||||
try {
|
||||
let safety = 0
|
||||
do {
|
||||
const params = new URLSearchParams({
|
||||
action: 'scroll',
|
||||
collection: ragInfo.collection,
|
||||
limit: '100',
|
||||
filter_key: 'regulation_id',
|
||||
filter_value: ragInfo.qdrant_id,
|
||||
})
|
||||
if (offset) params.append('offset', offset)
|
||||
|
||||
const res = await fetch(`${apiProxy}?${params}`)
|
||||
if (!res.ok) break
|
||||
|
||||
const data = await res.json()
|
||||
const chunks = data.chunks || []
|
||||
allChunks.push(...chunks)
|
||||
offset = data.next_offset || null
|
||||
safety++
|
||||
} while (offset && safety < 200)
|
||||
|
||||
// Sort by chunk_index
|
||||
allChunks.sort((a, b) => {
|
||||
const ai = Number(a.chunk_index ?? a.chunk_id ?? 0)
|
||||
const bi = Number(b.chunk_index ?? b.chunk_id ?? 0)
|
||||
return ai - bi
|
||||
})
|
||||
|
||||
setDocChunks(allChunks)
|
||||
setDocTotalChunks(allChunks.length)
|
||||
setDocChunkIndex(0)
|
||||
} catch (error) {
|
||||
console.error('Failed to load document chunks:', error)
|
||||
} finally {
|
||||
setDocLoading(false)
|
||||
}
|
||||
}, [apiProxy])
|
||||
|
||||
// Initial load
|
||||
useEffect(() => {
|
||||
loadRegulationCounts(collection)
|
||||
}, [collection, loadRegulationCounts])
|
||||
|
||||
// Current chunk
|
||||
const currentChunk = docChunks[docChunkIndex] || null
|
||||
const prevChunk = docChunkIndex > 0 ? docChunks[docChunkIndex - 1] : null
|
||||
const nextChunk = docChunkIndex < docChunks.length - 1 ? docChunks[docChunkIndex + 1] : null
|
||||
|
||||
// PDF page estimation — use pages metadata if available
|
||||
const estimatePdfPage = (chunk: Record<string, unknown> | null, chunkIdx: number): number => {
|
||||
if (chunk) {
|
||||
// Try pages array from payload (e.g. [7] or [7,8])
|
||||
const pages = chunk.pages as number[] | undefined
|
||||
if (Array.isArray(pages) && pages.length > 0) return pages[0]
|
||||
// Try page field
|
||||
const page = chunk.page as number | undefined
|
||||
if (typeof page === 'number' && page > 0) return page
|
||||
}
|
||||
const mapping = selectedRegulation ? RAG_PDF_MAPPING[selectedRegulation] : null
|
||||
const cpp = mapping?.chunksPerPage || chunksPerPage
|
||||
return Math.floor(chunkIdx / cpp) + 1
|
||||
}
|
||||
|
||||
const pdfPage = estimatePdfPage(currentChunk, docChunkIndex)
|
||||
const pdfMapping = selectedRegulation ? RAG_PDF_MAPPING[selectedRegulation] : null
|
||||
const pdfUrl = pdfMapping ? `/rag-originals/${pdfMapping.filename}#page=${pdfPage}` : null
|
||||
|
||||
// Check PDF existence when regulation changes
|
||||
useEffect(() => {
|
||||
if (!selectedRegulation) { setPdfExists(null); return }
|
||||
const mapping = RAG_PDF_MAPPING[selectedRegulation]
|
||||
if (!mapping) { setPdfExists(false); return }
|
||||
const url = `/rag-originals/${mapping.filename}`
|
||||
fetch(url, { method: 'HEAD' })
|
||||
.then(res => setPdfExists(res.ok))
|
||||
.catch(() => setPdfExists(false))
|
||||
}, [selectedRegulation])
|
||||
|
||||
// Handlers
|
||||
const handleSelectRegulation = (code: string) => {
|
||||
setSelectedRegulation(code)
|
||||
loadDocumentChunks(code)
|
||||
}
|
||||
|
||||
const handleCollectionChange = (col: string) => {
|
||||
setCollection(col)
|
||||
setSelectedRegulation(null)
|
||||
setDocChunks([])
|
||||
setDocChunkIndex(0)
|
||||
setDocTotalChunks(0)
|
||||
setRegulationCounts({})
|
||||
}
|
||||
|
||||
const handlePrev = () => {
|
||||
if (docChunkIndex > 0) setDocChunkIndex(i => i - 1)
|
||||
}
|
||||
|
||||
const handleNext = () => {
|
||||
if (docChunkIndex < docChunks.length - 1) setDocChunkIndex(i => i + 1)
|
||||
}
|
||||
|
||||
const handleKeyDown = useCallback((e: KeyboardEvent) => {
|
||||
if (e.key === 'Escape' && fullscreen) {
|
||||
e.preventDefault()
|
||||
setFullscreen(false)
|
||||
} else if (e.key === 'ArrowLeft' || e.key === 'ArrowUp') {
|
||||
e.preventDefault()
|
||||
setDocChunkIndex(i => Math.max(0, i - 1))
|
||||
} else if (e.key === 'ArrowRight' || e.key === 'ArrowDown') {
|
||||
e.preventDefault()
|
||||
setDocChunkIndex(i => Math.min(docChunksRef.current.length - 1, i + 1))
|
||||
}
|
||||
}, [fullscreen])
|
||||
|
||||
useEffect(() => {
|
||||
if (fullscreen || (selectedRegulation && docChunks.length > 0)) {
|
||||
window.addEventListener('keydown', handleKeyDown)
|
||||
return () => window.removeEventListener('keydown', handleKeyDown)
|
||||
}
|
||||
}, [selectedRegulation, docChunks.length, handleKeyDown, fullscreen])
|
||||
|
||||
const toggleGroup = (group: string) => {
|
||||
setCollapsedGroups(prev => {
|
||||
const next = new Set(prev)
|
||||
if (next.has(group)) next.delete(group)
|
||||
else next.add(group)
|
||||
return next
|
||||
})
|
||||
}
|
||||
|
||||
// Get text content from a chunk
|
||||
const getChunkText = (chunk: Record<string, unknown> | null): string => {
|
||||
if (!chunk) return ''
|
||||
return String(chunk.chunk_text || chunk.text || chunk.content || '')
|
||||
}
|
||||
|
||||
// Extract structural metadata for prominent display
|
||||
const getStructuralInfo = (chunk: Record<string, unknown> | null): { article?: string; section?: string; pages?: string } => {
|
||||
if (!chunk) return {}
|
||||
const result: { article?: string; section?: string; pages?: string } = {}
|
||||
// Article / paragraph
|
||||
const article = chunk.article || chunk.artikel || chunk.paragraph || chunk.section_title
|
||||
if (article) result.article = String(article)
|
||||
// Section
|
||||
const section = chunk.section || chunk.chapter || chunk.abschnitt || chunk.kapitel
|
||||
if (section) result.section = String(section)
|
||||
// Pages
|
||||
const pages = chunk.pages as number[] | undefined
|
||||
if (Array.isArray(pages) && pages.length > 0) {
|
||||
result.pages = pages.length === 1 ? `S. ${pages[0]}` : `S. ${pages[0]}-${pages[pages.length - 1]}`
|
||||
} else if (chunk.page) {
|
||||
result.pages = `S. ${chunk.page}`
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// Overlap extraction
|
||||
const getOverlapPrev = (): string => {
|
||||
if (!prevChunk) return ''
|
||||
const text = getChunkText(prevChunk)
|
||||
return text.length > 150 ? '...' + text.slice(-150) : text
|
||||
}
|
||||
|
||||
const getOverlapNext = (): string => {
|
||||
if (!nextChunk) return ''
|
||||
const text = getChunkText(nextChunk)
|
||||
return text.length > 150 ? text.slice(0, 150) + '...' : text
|
||||
}
|
||||
|
||||
// Filter sidebar items
|
||||
const filteredRegulations = React.useMemo(() => {
|
||||
if (!filterSearch.trim()) return groupedRegulations
|
||||
const term = filterSearch.toLowerCase()
|
||||
const filtered: typeof groupedRegulations = {
|
||||
eu_regulation: [], eu_directive: [], de_law: [], at_law: [], ch_law: [],
|
||||
national_law: [], bsi_standard: [], eu_guideline: [], international_standard: [], other: [],
|
||||
}
|
||||
for (const [group, items] of Object.entries(groupedRegulations)) {
|
||||
filtered[group as RegGroupKey] = items.filter(
|
||||
r => r.code.toLowerCase().includes(term) || r.name.toLowerCase().includes(term)
|
||||
)
|
||||
}
|
||||
return filtered
|
||||
}, [groupedRegulations, filterSearch])
|
||||
|
||||
// Regulation name lookup
|
||||
const getRegName = (code: string): string => {
|
||||
const reg = REGULATION_INFO.find(r => r.code === code)
|
||||
return reg?.name || code
|
||||
}
|
||||
|
||||
// Important metadata keys to show prominently
|
||||
const STRUCTURAL_KEYS = new Set([
|
||||
'article', 'artikel', 'paragraph', 'section_title', 'section', 'chapter',
|
||||
'abschnitt', 'kapitel', 'pages', 'page',
|
||||
])
|
||||
const HIDDEN_KEYS = new Set([
|
||||
'text', 'content', 'chunk_text', 'id', 'embedding',
|
||||
])
|
||||
|
||||
const structInfo = getStructuralInfo(currentChunk)
|
||||
|
||||
return (
|
||||
<div
|
||||
className={`flex flex-col ${fullscreen ? 'fixed inset-0 z-50 bg-slate-100 p-4' : ''}`}
|
||||
style={fullscreen ? { height: '100vh' } : { height: 'calc(100vh - 220px)' }}
|
||||
>
|
||||
{/* Header bar — fixed height */}
|
||||
<div className="flex-shrink-0 bg-white rounded-xl border border-slate-200 p-3 mb-3">
|
||||
<div className="flex flex-wrap items-center gap-4">
|
||||
<div>
|
||||
<label className="block text-xs font-medium text-slate-500 mb-1">Collection</label>
|
||||
<select
|
||||
value={collection}
|
||||
onChange={(e) => handleCollectionChange(e.target.value)}
|
||||
className="px-3 py-1.5 border rounded-lg text-sm focus:ring-2 focus:ring-teal-500"
|
||||
>
|
||||
{COLLECTIONS.map(c => (
|
||||
<option key={c} value={c}>{c}</option>
|
||||
))}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
{selectedRegulation && (
|
||||
<>
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-sm font-semibold text-slate-900">
|
||||
{selectedRegulation} — {getRegName(selectedRegulation)}
|
||||
</span>
|
||||
{structInfo.article && (
|
||||
<span className="px-2 py-0.5 bg-blue-100 text-blue-800 text-xs font-medium rounded">
|
||||
{structInfo.article}
|
||||
</span>
|
||||
)}
|
||||
{structInfo.pages && (
|
||||
<span className="px-2 py-0.5 bg-slate-100 text-slate-600 text-xs rounded">
|
||||
{structInfo.pages}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
<div className="flex items-center gap-2 ml-auto">
|
||||
<button
|
||||
onClick={handlePrev}
|
||||
disabled={docChunkIndex === 0}
|
||||
className="px-3 py-1.5 text-sm font-medium border rounded-lg bg-white hover:bg-slate-50 disabled:opacity-30 disabled:cursor-not-allowed"
|
||||
>
|
||||
◀ Zurueck
|
||||
</button>
|
||||
<span className="text-sm font-mono text-slate-600 min-w-[80px] text-center">
|
||||
{docChunkIndex + 1} / {docTotalChunks}
|
||||
</span>
|
||||
<button
|
||||
onClick={handleNext}
|
||||
disabled={docChunkIndex >= docChunks.length - 1}
|
||||
className="px-3 py-1.5 text-sm font-medium border rounded-lg bg-white hover:bg-slate-50 disabled:opacity-30 disabled:cursor-not-allowed"
|
||||
>
|
||||
Weiter ▶
|
||||
</button>
|
||||
<input
|
||||
type="number"
|
||||
min={1}
|
||||
max={docTotalChunks}
|
||||
value={docChunkIndex + 1}
|
||||
onChange={(e) => {
|
||||
const v = parseInt(e.target.value, 10)
|
||||
if (!isNaN(v) && v >= 1 && v <= docTotalChunks) setDocChunkIndex(v - 1)
|
||||
}}
|
||||
className="w-16 px-2 py-1 border rounded text-xs text-center"
|
||||
title="Springe zu Chunk Nr."
|
||||
/>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
<label className="text-xs text-slate-500">Chunks/Seite:</label>
|
||||
<select
|
||||
value={chunksPerPage}
|
||||
onChange={(e) => setChunksPerPage(Number(e.target.value))}
|
||||
className="px-2 py-1 border rounded text-xs"
|
||||
>
|
||||
{[3, 4, 5, 6, 8, 10, 12, 15, 20].map(n => (
|
||||
<option key={n} value={n}>{n}</option>
|
||||
))}
|
||||
</select>
|
||||
<button
|
||||
onClick={() => setSplitViewActive(!splitViewActive)}
|
||||
className={`px-3 py-1 text-xs rounded-lg border ${
|
||||
splitViewActive ? 'bg-teal-50 border-teal-300 text-teal-700' : 'bg-slate-50 border-slate-300 text-slate-600'
|
||||
}`}
|
||||
>
|
||||
{splitViewActive ? 'Split-View an' : 'Split-View aus'}
|
||||
</button>
|
||||
<button
|
||||
onClick={() => setFullscreen(!fullscreen)}
|
||||
className={`px-3 py-1 text-xs rounded-lg border ${
|
||||
fullscreen ? 'bg-indigo-50 border-indigo-300 text-indigo-700' : 'bg-slate-50 border-slate-300 text-slate-600'
|
||||
}`}
|
||||
title={fullscreen ? 'Vollbild beenden (Esc)' : 'Vollbild'}
|
||||
>
|
||||
{fullscreen ? '✕ Vollbild beenden' : '⛶ Vollbild'}
|
||||
</button>
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Main content: Sidebar + Content — fills remaining height */}
|
||||
<div className="flex gap-3 flex-1 min-h-0">
|
||||
{/* Sidebar — scrollable */}
|
||||
<div className="w-56 flex-shrink-0 bg-white rounded-xl border border-slate-200 flex flex-col min-h-0">
|
||||
<div className="flex-shrink-0 p-3 border-b border-slate-100">
|
||||
<input
|
||||
type="text"
|
||||
value={filterSearch}
|
||||
onChange={(e) => setFilterSearch(e.target.value)}
|
||||
placeholder="Suche..."
|
||||
className="w-full px-2 py-1.5 border rounded-lg text-sm focus:ring-2 focus:ring-teal-500"
|
||||
/>
|
||||
{countsLoading && (
|
||||
<div className="text-xs text-slate-400 mt-1 animate-pulse">Counts laden...</div>
|
||||
)}
|
||||
</div>
|
||||
<div className="flex-1 overflow-y-auto min-h-0">
|
||||
{GROUP_ORDER.map(group => {
|
||||
const items = filteredRegulations[group]
|
||||
if (items.length === 0) return null
|
||||
const isCollapsed = collapsedGroups.has(group)
|
||||
return (
|
||||
<div key={group}>
|
||||
<button
|
||||
onClick={() => toggleGroup(group)}
|
||||
className="w-full px-3 py-1.5 text-left text-xs font-semibold text-slate-500 bg-slate-50 hover:bg-slate-100 flex items-center justify-between sticky top-0 z-10"
|
||||
>
|
||||
<span>{GROUP_LABELS[group]}</span>
|
||||
<span className="text-slate-400">{isCollapsed ? '+' : '-'}</span>
|
||||
</button>
|
||||
{!isCollapsed && items.map(reg => {
|
||||
const count = regulationCounts[reg.code] ?? 0
|
||||
const isSelected = selectedRegulation === reg.code
|
||||
return (
|
||||
<button
|
||||
key={reg.code}
|
||||
onClick={() => handleSelectRegulation(reg.code)}
|
||||
className={`w-full px-3 py-1.5 text-left text-sm flex items-center justify-between hover:bg-teal-50 transition-colors ${
|
||||
isSelected ? 'bg-teal-100 text-teal-900 font-medium' : 'text-slate-700'
|
||||
}`}
|
||||
>
|
||||
<span className="truncate text-xs">{reg.name || reg.code}</span>
|
||||
<span className={`text-xs tabular-nums flex-shrink-0 ml-1 ${count > 0 ? 'text-slate-500' : 'text-slate-300'}`}>
|
||||
{count > 0 ? count.toLocaleString() : '—'}
|
||||
</span>
|
||||
</button>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Content area — fills remaining width and height */}
|
||||
{!selectedRegulation ? (
|
||||
<div className="flex-1 flex items-center justify-center bg-white rounded-xl border border-slate-200">
|
||||
<div className="text-center text-slate-400 space-y-2">
|
||||
<div className="text-4xl">🔍</div>
|
||||
<p className="text-sm">Dokument in der Sidebar auswaehlen, um QA zu starten.</p>
|
||||
<p className="text-xs text-slate-300">Pfeiltasten: Chunk vor/zurueck</p>
|
||||
</div>
|
||||
</div>
|
||||
) : docLoading ? (
|
||||
<div className="flex-1 flex items-center justify-center bg-white rounded-xl border border-slate-200">
|
||||
<div className="text-center text-slate-500 space-y-2">
|
||||
<div className="animate-spin text-3xl">⚙</div>
|
||||
<p className="text-sm">Chunks werden geladen...</p>
|
||||
<p className="text-xs text-slate-400">
|
||||
{selectedRegulation}: {REGULATIONS_IN_RAG[selectedRegulation]?.chunks.toLocaleString() || '?'} Chunks erwartet
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<div className={`flex-1 grid gap-3 min-h-0 ${splitViewActive ? 'grid-cols-2' : 'grid-cols-1'}`}>
|
||||
{/* Chunk-Text Panel — fixed height, internal scroll */}
|
||||
<div className="bg-white rounded-xl border border-slate-200 flex flex-col min-h-0 overflow-hidden">
|
||||
{/* Panel header */}
|
||||
<div className="flex-shrink-0 px-4 py-2 bg-slate-50 border-b border-slate-100 flex items-center justify-between">
|
||||
<span className="text-sm font-medium text-slate-700">Chunk-Text</span>
|
||||
<div className="flex items-center gap-2">
|
||||
{structInfo.article && (
|
||||
<span className="px-2 py-0.5 bg-blue-50 text-blue-700 text-xs font-medium rounded border border-blue-200">
|
||||
{structInfo.article}
|
||||
</span>
|
||||
)}
|
||||
{structInfo.section && (
|
||||
<span className="px-2 py-0.5 bg-purple-50 text-purple-700 text-xs rounded border border-purple-200">
|
||||
{structInfo.section}
|
||||
</span>
|
||||
)}
|
||||
<span className="text-xs text-slate-400 tabular-nums">
|
||||
#{docChunkIndex} / {docTotalChunks - 1}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Scrollable content */}
|
||||
<div className="flex-1 overflow-y-auto min-h-0 p-4 space-y-3">
|
||||
{/* Overlap from previous chunk */}
|
||||
{prevChunk && (
|
||||
<div className="text-xs text-slate-400 bg-amber-50 border-l-2 border-amber-300 px-3 py-2 rounded-r">
|
||||
<div className="font-medium text-amber-600 mb-1">↑ Ende vorheriger Chunk #{docChunkIndex - 1}</div>
|
||||
<p className="whitespace-pre-wrap break-words leading-relaxed">{getOverlapPrev()}</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Current chunk text */}
|
||||
{currentChunk ? (
|
||||
<div className="text-sm text-slate-800 whitespace-pre-wrap break-words leading-relaxed border-l-2 border-teal-400 pl-3">
|
||||
{getChunkText(currentChunk)}
|
||||
</div>
|
||||
) : (
|
||||
<div className="text-sm text-slate-400 italic">Kein Chunk-Text vorhanden.</div>
|
||||
)}
|
||||
|
||||
{/* Overlap from next chunk */}
|
||||
{nextChunk && (
|
||||
<div className="text-xs text-slate-400 bg-amber-50 border-l-2 border-amber-300 px-3 py-2 rounded-r">
|
||||
<div className="font-medium text-amber-600 mb-1">↓ Anfang naechster Chunk #{docChunkIndex + 1}</div>
|
||||
<p className="whitespace-pre-wrap break-words leading-relaxed">{getOverlapNext()}</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Metadata */}
|
||||
{currentChunk && (
|
||||
<div className="mt-4 pt-3 border-t border-slate-100">
|
||||
<div className="text-xs font-medium text-slate-500 mb-2">Metadaten</div>
|
||||
<div className="grid grid-cols-2 gap-x-4 gap-y-1 text-xs">
|
||||
{Object.entries(currentChunk)
|
||||
.filter(([k]) => !HIDDEN_KEYS.has(k))
|
||||
.sort(([a], [b]) => {
|
||||
// Structural keys first
|
||||
const aStruct = STRUCTURAL_KEYS.has(a) ? 0 : 1
|
||||
const bStruct = STRUCTURAL_KEYS.has(b) ? 0 : 1
|
||||
return aStruct - bStruct || a.localeCompare(b)
|
||||
})
|
||||
.map(([k, v]) => (
|
||||
<div key={k} className={`flex gap-1 ${STRUCTURAL_KEYS.has(k) ? 'col-span-2 font-medium' : ''}`}>
|
||||
<span className="font-medium text-slate-500 flex-shrink-0">{k}:</span>
|
||||
<span className="text-slate-700 break-all">
|
||||
{Array.isArray(v) ? v.join(', ') : String(v)}
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
{/* Chunk quality indicator */}
|
||||
<div className="mt-3 pt-2 border-t border-slate-50">
|
||||
<div className="text-xs text-slate-400">
|
||||
Chunk-Laenge: {getChunkText(currentChunk).length} Zeichen
|
||||
{getChunkText(currentChunk).length < 50 && (
|
||||
<span className="ml-2 text-orange-500 font-medium">⚠ Sehr kurz</span>
|
||||
)}
|
||||
{getChunkText(currentChunk).length > 2000 && (
|
||||
<span className="ml-2 text-orange-500 font-medium">⚠ Sehr lang</span>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* PDF-Viewer Panel */}
|
||||
{splitViewActive && (
|
||||
<div className="bg-white rounded-xl border border-slate-200 flex flex-col min-h-0 overflow-hidden">
|
||||
<div className="flex-shrink-0 px-4 py-2 bg-slate-50 border-b border-slate-100 flex items-center justify-between">
|
||||
<span className="text-sm font-medium text-slate-700">Original-PDF</span>
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-xs text-slate-400">
|
||||
Seite ~{pdfPage}
|
||||
{pdfMapping?.totalPages ? ` / ${pdfMapping.totalPages}` : ''}
|
||||
</span>
|
||||
{pdfUrl && (
|
||||
<a
|
||||
href={pdfUrl.split('#')[0]}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="text-xs text-teal-600 hover:text-teal-800 underline"
|
||||
>
|
||||
Oeffnen ↗
|
||||
</a>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex-1 min-h-0 relative">
|
||||
{pdfUrl && pdfExists ? (
|
||||
<iframe
|
||||
key={`${selectedRegulation}-${pdfPage}`}
|
||||
src={pdfUrl}
|
||||
className="absolute inset-0 w-full h-full border-0"
|
||||
title="Original PDF"
|
||||
/>
|
||||
) : (
|
||||
<div className="flex items-center justify-center h-full text-slate-400 text-sm p-4">
|
||||
<div className="text-center space-y-2">
|
||||
<div className="text-3xl">📄</div>
|
||||
{!pdfMapping ? (
|
||||
<>
|
||||
<p>Kein PDF-Mapping fuer {selectedRegulation}.</p>
|
||||
<p className="text-xs">rag-pdf-mapping.ts ergaenzen.</p>
|
||||
</>
|
||||
) : pdfExists === false ? (
|
||||
<>
|
||||
<p className="font-medium text-orange-600">PDF nicht vorhanden</p>
|
||||
<p className="text-xs">Datei <code className="bg-slate-100 px-1 rounded">{pdfMapping.filename}</code> fehlt in ~/rag-originals/</p>
|
||||
<p className="text-xs mt-1">Bitte manuell herunterladen und dort ablegen.</p>
|
||||
</>
|
||||
) : (
|
||||
<p>PDF wird geprueft...</p>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
126
admin-lehrer/app/(admin)/ai/rag/components/rag-pdf-mapping.ts
Normal file
126
admin-lehrer/app/(admin)/ai/rag/components/rag-pdf-mapping.ts
Normal file
@@ -0,0 +1,126 @@
|
||||
export interface RagPdfMapping {
|
||||
filename: string
|
||||
totalPages?: number
|
||||
chunksPerPage?: number
|
||||
language: string
|
||||
}
|
||||
|
||||
export const RAG_PDF_MAPPING: Record<string, RagPdfMapping> = {
|
||||
// EU Verordnungen
|
||||
GDPR: { filename: 'GDPR_DE.pdf', language: 'de', totalPages: 88 },
|
||||
EPRIVACY: { filename: 'EPRIVACY_DE.pdf', language: 'de' },
|
||||
SCC: { filename: 'SCC_DE.pdf', language: 'de' },
|
||||
SCC_FULL_TEXT: { filename: 'SCC_FULL_TEXT_DE.pdf', language: 'de' },
|
||||
AIACT: { filename: 'AIACT_DE.pdf', language: 'de', totalPages: 144 },
|
||||
CRA: { filename: 'CRA_DE.pdf', language: 'de' },
|
||||
NIS2: { filename: 'NIS2_DE.pdf', language: 'de' },
|
||||
DGA: { filename: 'DGA_DE.pdf', language: 'de' },
|
||||
DSA: { filename: 'DSA_DE.pdf', language: 'de' },
|
||||
PLD: { filename: 'PLD_DE.pdf', language: 'de' },
|
||||
E_COMMERCE_RL: { filename: 'E_COMMERCE_RL_DE.pdf', language: 'de' },
|
||||
VERBRAUCHERRECHTE_RL: { filename: 'VERBRAUCHERRECHTE_RL_DE.pdf', language: 'de' },
|
||||
DIGITALE_INHALTE_RL: { filename: 'DIGITALE_INHALTE_RL_DE.pdf', language: 'de' },
|
||||
DMA: { filename: 'DMA_DE.pdf', language: 'de' },
|
||||
DPF: { filename: 'DPF_DE.pdf', language: 'de' },
|
||||
EUCSA: { filename: 'EUCSA_DE.pdf', language: 'de' },
|
||||
DATAACT: { filename: 'DATAACT_DE.pdf', language: 'de' },
|
||||
DORA: { filename: 'DORA_DE.pdf', language: 'de' },
|
||||
PSD2: { filename: 'PSD2_DE.pdf', language: 'de' },
|
||||
AMLR: { filename: 'AMLR_DE.pdf', language: 'de' },
|
||||
MiCA: { filename: 'MiCA_DE.pdf', language: 'de' },
|
||||
EHDS: { filename: 'EHDS_DE.pdf', language: 'de' },
|
||||
EAA: { filename: 'EAA_DE.pdf', language: 'de' },
|
||||
DSM: { filename: 'DSM_DE.pdf', language: 'de' },
|
||||
GPSR: { filename: 'GPSR_DE.pdf', language: 'de' },
|
||||
MACHINERY_REG: { filename: 'MACHINERY_REG_DE.pdf', language: 'de' },
|
||||
BLUE_GUIDE: { filename: 'BLUE_GUIDE_DE.pdf', language: 'de' },
|
||||
// DE Gesetze
|
||||
TDDDG: { filename: 'TDDDG_DE.pdf', language: 'de' },
|
||||
BDSG_FULL: { filename: 'BDSG_FULL_DE.pdf', language: 'de' },
|
||||
DE_DDG: { filename: 'DE_DDG.pdf', language: 'de' },
|
||||
DE_BGB_AGB: { filename: 'DE_BGB_AGB.pdf', language: 'de' },
|
||||
DE_EGBGB: { filename: 'DE_EGBGB.pdf', language: 'de' },
|
||||
DE_HGB_RET: { filename: 'DE_HGB_RET.pdf', language: 'de' },
|
||||
DE_AO_RET: { filename: 'DE_AO_RET.pdf', language: 'de' },
|
||||
DE_UWG: { filename: 'DE_UWG.pdf', language: 'de' },
|
||||
DE_TKG: { filename: 'DE_TKG.pdf', language: 'de' },
|
||||
DE_PANGV: { filename: 'DE_PANGV.pdf', language: 'de' },
|
||||
DE_DLINFOV: { filename: 'DE_DLINFOV.pdf', language: 'de' },
|
||||
DE_BETRVG: { filename: 'DE_BETRVG.pdf', language: 'de' },
|
||||
DE_GESCHGEHG: { filename: 'DE_GESCHGEHG.pdf', language: 'de' },
|
||||
DE_BSIG: { filename: 'DE_BSIG.pdf', language: 'de' },
|
||||
DE_USTG_RET: { filename: 'DE_USTG_RET.pdf', language: 'de' },
|
||||
// BSI Standards
|
||||
'BSI-TR-03161-1': { filename: 'BSI-TR-03161-1.pdf', language: 'de' },
|
||||
'BSI-TR-03161-2': { filename: 'BSI-TR-03161-2.pdf', language: 'de' },
|
||||
'BSI-TR-03161-3': { filename: 'BSI-TR-03161-3.pdf', language: 'de' },
|
||||
// AT Gesetze
|
||||
AT_DSG: { filename: 'AT_DSG.pdf', language: 'de' },
|
||||
AT_DSG_FULL: { filename: 'AT_DSG_FULL.pdf', language: 'de' },
|
||||
AT_ECG: { filename: 'AT_ECG.pdf', language: 'de' },
|
||||
AT_TKG: { filename: 'AT_TKG.pdf', language: 'de' },
|
||||
AT_KSCHG: { filename: 'AT_KSCHG.pdf', language: 'de' },
|
||||
AT_FAGG: { filename: 'AT_FAGG.pdf', language: 'de' },
|
||||
AT_UGB_RET: { filename: 'AT_UGB_RET.pdf', language: 'de' },
|
||||
AT_BAO_RET: { filename: 'AT_BAO_RET.pdf', language: 'de' },
|
||||
AT_MEDIENG: { filename: 'AT_MEDIENG.pdf', language: 'de' },
|
||||
AT_ABGB_AGB: { filename: 'AT_ABGB_AGB.pdf', language: 'de' },
|
||||
AT_UWG: { filename: 'AT_UWG.pdf', language: 'de' },
|
||||
// CH Gesetze
|
||||
CH_DSG: { filename: 'CH_DSG.pdf', language: 'de' },
|
||||
CH_DSV: { filename: 'CH_DSV.pdf', language: 'de' },
|
||||
CH_OR_AGB: { filename: 'CH_OR_AGB.pdf', language: 'de' },
|
||||
CH_UWG: { filename: 'CH_UWG.pdf', language: 'de' },
|
||||
CH_FMG: { filename: 'CH_FMG.pdf', language: 'de' },
|
||||
CH_GEBUV: { filename: 'CH_GEBUV.pdf', language: 'de' },
|
||||
CH_ZERTES: { filename: 'CH_ZERTES.pdf', language: 'de' },
|
||||
CH_ZGB_PERS: { filename: 'CH_ZGB_PERS.pdf', language: 'de' },
|
||||
// LI
|
||||
LI_DSG: { filename: 'LI_DSG.pdf', language: 'de' },
|
||||
// Nationale DSG (andere EU)
|
||||
ES_LOPDGDD: { filename: 'ES_LOPDGDD.pdf', language: 'es' },
|
||||
IT_CODICE_PRIVACY: { filename: 'IT_CODICE_PRIVACY.pdf', language: 'it' },
|
||||
NL_UAVG: { filename: 'NL_UAVG.pdf', language: 'nl' },
|
||||
FR_CNIL_GUIDE: { filename: 'FR_CNIL_GUIDE.pdf', language: 'fr' },
|
||||
IE_DPA_2018: { filename: 'IE_DPA_2018.pdf', language: 'en' },
|
||||
UK_DPA_2018: { filename: 'UK_DPA_2018.pdf', language: 'en' },
|
||||
UK_GDPR: { filename: 'UK_GDPR.pdf', language: 'en' },
|
||||
NO_PERSONOPPLYSNINGSLOVEN: { filename: 'NO_PERSONOPPLYSNINGSLOVEN.pdf', language: 'no' },
|
||||
SE_DATASKYDDSLAG: { filename: 'SE_DATASKYDDSLAG.pdf', language: 'sv' },
|
||||
PL_UODO: { filename: 'PL_UODO.pdf', language: 'pl' },
|
||||
CZ_ZOU: { filename: 'CZ_ZOU.pdf', language: 'cs' },
|
||||
HU_INFOTV: { filename: 'HU_INFOTV.pdf', language: 'hu' },
|
||||
BE_DPA_LAW: { filename: 'BE_DPA_LAW.pdf', language: 'nl' },
|
||||
FI_TIETOSUOJALAKI: { filename: 'FI_TIETOSUOJALAKI.pdf', language: 'fi' },
|
||||
DK_DATABESKYTTELSESLOVEN: { filename: 'DK_DATABESKYTTELSESLOVEN.pdf', language: 'da' },
|
||||
LU_DPA_LAW: { filename: 'LU_DPA_LAW.pdf', language: 'fr' },
|
||||
// DE Gesetze (zusaetzlich)
|
||||
TMG_KOMPLETT: { filename: 'TMG_KOMPLETT.pdf', language: 'de' },
|
||||
DE_URHG: { filename: 'DE_URHG.pdf', language: 'de' },
|
||||
// EDPB Guidelines
|
||||
EDPB_GUIDELINES_5_2020: { filename: 'EDPB_GUIDELINES_5_2020.pdf', language: 'en' },
|
||||
EDPB_GUIDELINES_7_2020: { filename: 'EDPB_GUIDELINES_7_2020.pdf', language: 'en' },
|
||||
EDPB_GUIDELINES_1_2020: { filename: 'EDPB_GUIDELINES_1_2020.pdf', language: 'en' },
|
||||
EDPB_GUIDELINES_1_2022: { filename: 'EDPB_GUIDELINES_1_2022.pdf', language: 'en' },
|
||||
EDPB_GUIDELINES_2_2023: { filename: 'EDPB_GUIDELINES_2_2023.pdf', language: 'en' },
|
||||
EDPB_GUIDELINES_2_2024: { filename: 'EDPB_GUIDELINES_2_2024.pdf', language: 'en' },
|
||||
EDPB_GUIDELINES_4_2019: { filename: 'EDPB_GUIDELINES_4_2019.pdf', language: 'en' },
|
||||
EDPB_GUIDELINES_9_2022: { filename: 'EDPB_GUIDELINES_9_2022.pdf', language: 'en' },
|
||||
EDPB_DPIA_LIST: { filename: 'EDPB_DPIA_LIST.pdf', language: 'en' },
|
||||
EDPB_LEGITIMATE_INTEREST: { filename: 'EDPB_LEGITIMATE_INTEREST.pdf', language: 'en' },
|
||||
// EDPS
|
||||
EDPS_DPIA_LIST: { filename: 'EDPS_DPIA_LIST.pdf', language: 'en' },
|
||||
// Frameworks
|
||||
ENISA_SECURE_BY_DESIGN: { filename: 'ENISA_SECURE_BY_DESIGN.pdf', language: 'en' },
|
||||
ENISA_SUPPLY_CHAIN: { filename: 'ENISA_SUPPLY_CHAIN.pdf', language: 'en' },
|
||||
ENISA_THREAT_LANDSCAPE: { filename: 'ENISA_THREAT_LANDSCAPE.pdf', language: 'en' },
|
||||
ENISA_ICS_SCADA: { filename: 'ENISA_ICS_SCADA.pdf', language: 'en' },
|
||||
ENISA_CYBERSECURITY_2024: { filename: 'ENISA_CYBERSECURITY_2024.pdf', language: 'en' },
|
||||
NIST_SSDF: { filename: 'NIST_SSDF.pdf', language: 'en' },
|
||||
NIST_CSF_2: { filename: 'NIST_CSF_2.pdf', language: 'en' },
|
||||
OECD_AI_PRINCIPLES: { filename: 'OECD_AI_PRINCIPLES.pdf', language: 'en' },
|
||||
// EU-IFRS / EFRAG
|
||||
EU_IFRS_DE: { filename: 'EU_IFRS_DE.pdf', language: 'de' },
|
||||
EU_IFRS_EN: { filename: 'EU_IFRS_EN.pdf', language: 'en' },
|
||||
EFRAG_ENDORSEMENT: { filename: 'EFRAG_ENDORSEMENT.pdf', language: 'en' },
|
||||
}
|
||||
@@ -11,6 +11,8 @@ import React, { useState, useEffect, useCallback } from 'react'
|
||||
import Link from 'next/link'
|
||||
import { PagePurpose } from '@/components/common/PagePurpose'
|
||||
import { AIModuleSidebarResponsive } from '@/components/ai/AIModuleSidebar'
|
||||
import { REGULATIONS_IN_RAG } from './rag-constants'
|
||||
import { ChunkBrowserQA } from './components/ChunkBrowserQA'
|
||||
|
||||
// API uses local proxy route to klausur-service
|
||||
const API_PROXY = '/api/legal-corpus'
|
||||
@@ -73,7 +75,7 @@ interface DsfaCorpusStatus {
|
||||
type RegulationCategory = 'regulations' | 'dsfa' | 'nibis' | 'templates'
|
||||
|
||||
// Tab definitions
|
||||
type TabId = 'overview' | 'regulations' | 'map' | 'search' | 'data' | 'ingestion' | 'pipeline'
|
||||
type TabId = 'overview' | 'regulations' | 'map' | 'search' | 'chunks' | 'data' | 'ingestion' | 'pipeline'
|
||||
|
||||
// Custom document type
|
||||
interface CustomDocument {
|
||||
@@ -1011,8 +1013,264 @@ const REGULATIONS = [
|
||||
keyTopics: ['Bussgeldberechnung', 'Schweregrad', 'Milderungsgruende', 'Bussgeldrahmen'],
|
||||
effectiveDate: '2022'
|
||||
},
|
||||
// =====================================================================
|
||||
// Neu ingestierte EU-Richtlinien (Februar 2026)
|
||||
// =====================================================================
|
||||
{
|
||||
code: 'E_COMMERCE_RL',
|
||||
name: 'E-Commerce-Richtlinie',
|
||||
fullName: 'Richtlinie 2000/31/EG ueber den elektronischen Geschaeftsverkehr',
|
||||
type: 'eu_directive',
|
||||
expected: 30,
|
||||
description: 'EU-Richtlinie ueber den elektronischen Geschaeftsverkehr (E-Commerce). Regelt Herkunftslandprinzip, Informationspflichten, Haftungsprivilegien fuer Vermittler (Mere Conduit, Caching, Hosting).',
|
||||
relevantFor: ['Online-Dienste', 'E-Commerce', 'Hosting-Anbieter', 'Plattformen'],
|
||||
keyTopics: ['Herkunftslandprinzip', 'Haftungsprivileg', 'Informationspflichten', 'Spam-Verbot', 'Vermittlerhaftung'],
|
||||
effectiveDate: '17. Juli 2000'
|
||||
},
|
||||
{
|
||||
code: 'VERBRAUCHERRECHTE_RL',
|
||||
name: 'Verbraucherrechte-Richtlinie',
|
||||
fullName: 'Richtlinie 2011/83/EU ueber die Rechte der Verbraucher',
|
||||
type: 'eu_directive',
|
||||
expected: 25,
|
||||
description: 'EU-weite Harmonisierung der Verbraucherrechte bei Fernabsatz und aussergeschaeftlichen Vertraegen. 14-Tage-Widerrufsrecht, Informationspflichten, digitale Inhalte.',
|
||||
relevantFor: ['Online-Shops', 'E-Commerce', 'Fernabsatz', 'Dienstleister'],
|
||||
keyTopics: ['Widerrufsrecht 14 Tage', 'Informationspflichten', 'Fernabsatzvertraege', 'Digitale Inhalte'],
|
||||
effectiveDate: '13. Juni 2014'
|
||||
},
|
||||
{
|
||||
code: 'DIGITALE_INHALTE_RL',
|
||||
name: 'Digitale-Inhalte-Richtlinie',
|
||||
fullName: 'Richtlinie (EU) 2019/770 ueber digitale Inhalte und Dienstleistungen',
|
||||
type: 'eu_directive',
|
||||
expected: 20,
|
||||
description: 'Gewaehrleistungsrecht fuer digitale Inhalte und Dienstleistungen. Regelt Maengelhaftung, Updates, Vertragsmaessigkeit und Kuendigungsrechte bei digitalen Produkten.',
|
||||
relevantFor: ['SaaS-Anbieter', 'App-Entwickler', 'Cloud-Dienste', 'Streaming-Anbieter', 'Software-Hersteller'],
|
||||
keyTopics: ['Digitale Gewaehrleistung', 'Update-Pflicht', 'Vertragsmaessigkeit', 'Kuendigungsrecht', 'Datenportabilitaet'],
|
||||
effectiveDate: '1. Januar 2022'
|
||||
},
|
||||
{
|
||||
code: 'DMA',
|
||||
name: 'Digital Markets Act',
|
||||
fullName: 'Verordnung (EU) 2022/1925 - Digital Markets Act',
|
||||
type: 'eu_regulation',
|
||||
expected: 50,
|
||||
description: 'Reguliert digitale Gatekeeper-Plattformen. Stellt Verhaltensregeln fuer grosse Plattformen auf (Apple, Google, Meta, Amazon, Microsoft). Verbietet Selbstbevorzugung und erzwingt Interoperabilitaet.',
|
||||
relevantFor: ['Grosse Plattformen', 'App-Stores', 'Suchmaschinen', 'Social Media', 'Messenger-Dienste'],
|
||||
keyTopics: ['Gatekeeper-Pflichten', 'Interoperabilitaet', 'Selbstbevorzugung', 'App-Store-Regeln', 'Datenportabilitaet'],
|
||||
effectiveDate: '2. Mai 2023'
|
||||
},
|
||||
// === Industrie-Compliance (2026-02-28) ===
|
||||
{
|
||||
code: 'MACHINERY_REG',
|
||||
name: 'Maschinenverordnung',
|
||||
fullName: 'Verordnung (EU) 2023/1230 ueber Maschinen (Machinery Regulation)',
|
||||
type: 'eu_regulation',
|
||||
expected: 100,
|
||||
description: 'Loest die alte Maschinenrichtlinie 2006/42/EG ab. Regelt Sicherheitsanforderungen fuer Maschinen und zugehoerige Produkte, CE-Kennzeichnung, Konformitaetsbewertung und Marktaufsicht. Neu: Cybersecurity-Anforderungen fuer vernetzte Maschinen.',
|
||||
relevantFor: ['Maschinenbau', 'Industrie 4.0', 'Automatisierung', 'Hersteller', 'Importeure'],
|
||||
keyTopics: ['CE-Kennzeichnung', 'Konformitaetsbewertung', 'Risikobeurteilung', 'Cybersecurity', 'Betriebsanleitung'],
|
||||
effectiveDate: '20. Januar 2027'
|
||||
},
|
||||
{
|
||||
code: 'BLUE_GUIDE',
|
||||
name: 'Blue Guide',
|
||||
fullName: 'Leitfaden fuer die Umsetzung der EU-Produktvorschriften (Blue Guide 2022)',
|
||||
type: 'eu_guideline',
|
||||
expected: 200,
|
||||
description: 'Umfassender Leitfaden der EU-Kommission zur Umsetzung von Produktvorschriften. Erklaert CE-Kennzeichnung, Konformitaetsbewertungsverfahren, notifizierte Stellen, Marktaufsicht und den New Legislative Framework.',
|
||||
relevantFor: ['Hersteller', 'Importeure', 'Haendler', 'Notifizierte Stellen', 'Marktaufsichtsbehoerden'],
|
||||
keyTopics: ['CE-Kennzeichnung', 'Konformitaetserklaerung', 'Notifizierte Stellen', 'Marktaufsicht', 'New Legislative Framework'],
|
||||
effectiveDate: '29. Juni 2022'
|
||||
},
|
||||
{
|
||||
code: 'ENISA_SECURE_BY_DESIGN',
|
||||
name: 'ENISA Secure by Design',
|
||||
fullName: 'ENISA Secure Software Development Best Practices',
|
||||
type: 'eu_guideline',
|
||||
expected: 50,
|
||||
description: 'ENISA-Leitfaden fuer sichere Softwareentwicklung. Beschreibt Best Practices fuer Security by Design, sichere Entwicklungsprozesse und Schwachstellenmanagement.',
|
||||
relevantFor: ['Softwareentwickler', 'DevOps', 'IT-Sicherheit', 'Produktmanagement'],
|
||||
keyTopics: ['Security by Design', 'SDLC', 'Schwachstellenmanagement', 'Secure Coding', 'Threat Modeling'],
|
||||
effectiveDate: '2023'
|
||||
},
|
||||
{
|
||||
code: 'ENISA_SUPPLY_CHAIN',
|
||||
name: 'ENISA Supply Chain Security',
|
||||
fullName: 'ENISA Threat Landscape for Supply Chain Attacks',
|
||||
type: 'eu_guideline',
|
||||
expected: 50,
|
||||
description: 'ENISA-Analyse der Bedrohungslandschaft fuer Supply-Chain-Angriffe. Beschreibt Angriffsvektoren, Taxonomie und Empfehlungen zur Absicherung von Software-Lieferketten.',
|
||||
relevantFor: ['IT-Sicherheit', 'Beschaffung', 'Softwareentwickler', 'CISO'],
|
||||
keyTopics: ['Supply Chain Security', 'SolarWinds', 'SBOM', 'Lieferantenrisiko', 'Third-Party Risk'],
|
||||
effectiveDate: '2021'
|
||||
},
|
||||
{
|
||||
code: 'NIST_SSDF',
|
||||
name: 'NIST SSDF',
|
||||
fullName: 'NIST SP 800-218 — Secure Software Development Framework (SSDF)',
|
||||
type: 'international_standard',
|
||||
expected: 40,
|
||||
description: 'NIST-Framework fuer sichere Softwareentwicklung. Definiert Praktiken und Aufgaben in vier Gruppen: Prepare, Protect, Produce, Respond. Weit verbreitet als Referenz fuer Software Supply Chain Security.',
|
||||
relevantFor: ['Softwareentwickler', 'DevSecOps', 'IT-Sicherheit', 'Compliance-Manager'],
|
||||
keyTopics: ['SSDF', 'Secure SDLC', 'Software Supply Chain', 'Vulnerability Management', 'Code Review'],
|
||||
effectiveDate: '3. Februar 2022'
|
||||
},
|
||||
{
|
||||
code: 'NIST_CSF_2',
|
||||
name: 'NIST CSF 2.0',
|
||||
fullName: 'NIST Cybersecurity Framework (CSF) 2.0',
|
||||
type: 'international_standard',
|
||||
expected: 50,
|
||||
description: 'Version 2.0 des NIST Cybersecurity Framework. Neue Kernfunktion "Govern" ergaenzt Identify, Protect, Detect, Respond, Recover. Erweitert den Anwendungsbereich ueber kritische Infrastruktur hinaus auf alle Organisationen.',
|
||||
relevantFor: ['CISO', 'IT-Sicherheit', 'Risikomanagement', 'Geschaeftsfuehrung', 'Alle Branchen'],
|
||||
keyTopics: ['Govern', 'Identify', 'Protect', 'Detect', 'Respond', 'Recover', 'Cybersecurity'],
|
||||
effectiveDate: '26. Februar 2024'
|
||||
},
|
||||
{
|
||||
code: 'OECD_AI_PRINCIPLES',
|
||||
name: 'OECD AI Principles',
|
||||
fullName: 'OECD Recommendation on Artificial Intelligence (AI Principles)',
|
||||
type: 'international_standard',
|
||||
expected: 20,
|
||||
description: 'OECD-Empfehlung zu Kuenstlicher Intelligenz. Definiert fuenf Prinzipien fuer verantwortungsvolle KI: Inklusives Wachstum, Menschenzentrierte Werte, Transparenz, Robustheit und Rechenschaftspflicht. Von 46 Laendern angenommen.',
|
||||
relevantFor: ['KI-Entwickler', 'Policy-Maker', 'Ethik-Kommissionen', 'Geschaeftsfuehrung'],
|
||||
keyTopics: ['AI Ethics', 'Transparenz', 'Accountability', 'Trustworthy AI', 'Human-Centered AI'],
|
||||
effectiveDate: '22. Mai 2019'
|
||||
},
|
||||
{
|
||||
code: 'EU_IFRS',
|
||||
name: 'EU-IFRS',
|
||||
fullName: 'Verordnung (EU) 2023/1803 — International Financial Reporting Standards',
|
||||
type: 'eu_regulation',
|
||||
expected: 500,
|
||||
description: 'Konsolidierte Fassung der von der EU uebernommenen IFRS/IAS/IFRIC/SIC. Rechtsverbindlich fuer boersennotierte EU-Unternehmen. Enthalt IFRS 1-17, IAS 1-41, IFRIC 1-23 und SIC 7-32 in der EU-endorsed Fassung (Stand Okt 2023). ACHTUNG: Neuere IASB-Standards sind moeglicherweise noch nicht EU-endorsed.',
|
||||
relevantFor: ['Rechnungswesen', 'Wirtschaftspruefer', 'boersennotierte Unternehmen', 'Finanzberichterstattung', 'CFO'],
|
||||
keyTopics: ['IFRS 16 Leasing', 'IFRS 9 Finanzinstrumente', 'IAS 1 Darstellung', 'IFRS 15 Erloese', 'IFRS 17 Versicherungsvertraege', 'Konsolidierung'],
|
||||
effectiveDate: '16. Oktober 2023'
|
||||
},
|
||||
{
|
||||
code: 'EFRAG_ENDORSEMENT',
|
||||
name: 'EFRAG Endorsement Status',
|
||||
fullName: 'EFRAG EU Endorsement Status Report (Dezember 2025)',
|
||||
type: 'eu_guideline',
|
||||
expected: 30,
|
||||
description: 'Uebersicht des European Financial Reporting Advisory Group (EFRAG) ueber den EU-Endorsement-Stand aller IFRS/IAS-Standards. Zeigt welche Standards von der EU uebernommen wurden und welche noch ausstehend sind. Relevant fuer internationale Ausschreibungen und Compliance-Pruefung.',
|
||||
relevantFor: ['Rechnungswesen', 'Wirtschaftspruefer', 'Compliance Officer', 'internationale Ausschreibungen'],
|
||||
keyTopics: ['EU Endorsement', 'IFRS 18', 'IFRS S1/S2 Sustainability', 'Endorsement Status', 'IASB Updates'],
|
||||
effectiveDate: '18. Dezember 2025'
|
||||
},
|
||||
]
|
||||
|
||||
// Source URLs for original documents (click to view original)
|
||||
const REGULATION_SOURCES: Record<string, string> = {
|
||||
// EU Verordnungen/Richtlinien (EUR-Lex)
|
||||
GDPR: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32016R0679',
|
||||
EPRIVACY: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32002L0058',
|
||||
SCC: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32021D0914',
|
||||
DPF: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32023D1795',
|
||||
AIACT: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32024R1689',
|
||||
CRA: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32024R2847',
|
||||
NIS2: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32022L2555',
|
||||
EUCSA: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32019R0881',
|
||||
DATAACT: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32023R2854',
|
||||
DGA: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32022R0868',
|
||||
DSA: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32022R2065',
|
||||
EAA: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32019L0882',
|
||||
DSM: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32019L0790',
|
||||
PLD: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32024L2853',
|
||||
GPSR: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32023R0988',
|
||||
DORA: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32022R2554',
|
||||
PSD2: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32015L2366',
|
||||
AMLR: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32024R1624',
|
||||
MiCA: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32023R1114',
|
||||
EHDS: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32025R0327',
|
||||
SCC_FULL_TEXT: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32021D0914',
|
||||
E_COMMERCE_RL: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32000L0031',
|
||||
VERBRAUCHERRECHTE_RL: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32011L0083',
|
||||
DIGITALE_INHALTE_RL: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32019L0770',
|
||||
DMA: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32022R1925',
|
||||
MACHINERY_REG: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32023R1230',
|
||||
BLUE_GUIDE: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:52022XC0629(04)',
|
||||
EU_IFRS: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32023R1803',
|
||||
// EDPB Guidelines
|
||||
EDPB_GUIDELINES_2_2019: 'https://www.edpb.europa.eu/our-work-tools/our-documents/guidelines/guidelines-22019-processing-personal-data-under-article-61b_en',
|
||||
EDPB_GUIDELINES_3_2019: 'https://www.edpb.europa.eu/our-work-tools/our-documents/guidelines/guidelines-32019-processing-personal-data-through-video_en',
|
||||
EDPB_GUIDELINES_5_2020: 'https://www.edpb.europa.eu/our-work-tools/our-documents/guidelines/guidelines-052020-consent-under-regulation-2016679_en',
|
||||
EDPB_GUIDELINES_7_2020: 'https://www.edpb.europa.eu/our-work-tools/our-documents/guidelines/guidelines-072020-concepts-controller-and-processor-gdpr_en',
|
||||
EDPB_GUIDELINES_1_2022: 'https://www.edpb.europa.eu/our-work-tools/our-documents/guidelines/guidelines-042022-calculation-administrative-fines-under-gdpr_en',
|
||||
// BSI Technische Richtlinien
|
||||
'BSI-TR-03161-1': 'https://www.bsi.bund.de/SharedDocs/Downloads/DE/BSI/Publikationen/TechnischeRichtlinien/TR03161/BSI-TR-03161-1.html',
|
||||
'BSI-TR-03161-2': 'https://www.bsi.bund.de/SharedDocs/Downloads/DE/BSI/Publikationen/TechnischeRichtlinien/TR03161/BSI-TR-03161-2.html',
|
||||
'BSI-TR-03161-3': 'https://www.bsi.bund.de/SharedDocs/Downloads/DE/BSI/Publikationen/TechnischeRichtlinien/TR03161/BSI-TR-03161-3.html',
|
||||
// Nationale Datenschutzgesetze
|
||||
AT_DSG: 'https://www.ris.bka.gv.at/GeltendeFassung.wxe?Abfrage=Bundesnormen&Gesetzesnummer=10001597',
|
||||
BDSG_FULL: 'https://www.gesetze-im-internet.de/bdsg_2018/',
|
||||
CH_DSG: 'https://www.fedlex.admin.ch/eli/cc/2022/491/de',
|
||||
LI_DSG: 'https://www.gesetze.li/konso/2018.272',
|
||||
BE_DPA_LAW: 'https://www.autoriteprotectiondonnees.be/citoyen/la-loi-du-30-juillet-2018',
|
||||
NL_UAVG: 'https://wetten.overheid.nl/BWBR0040940/',
|
||||
FR_CNIL_GUIDE: 'https://www.cnil.fr/fr/rgpd-par-ou-commencer',
|
||||
ES_LOPDGDD: 'https://www.boe.es/buscar/act.php?id=BOE-A-2018-16673',
|
||||
IT_CODICE_PRIVACY: 'https://www.garanteprivacy.it/home/docweb/-/docweb-display/docweb/9042678',
|
||||
IE_DPA_2018: 'https://www.irishstatutebook.ie/eli/2018/act/7/enacted/en/html',
|
||||
UK_DPA_2018: 'https://www.legislation.gov.uk/ukpga/2018/12/contents',
|
||||
UK_GDPR: 'https://www.legislation.gov.uk/eur/2016/679/contents',
|
||||
NO_PERSONOPPLYSNINGSLOVEN: 'https://lovdata.no/dokument/NL/lov/2018-06-15-38',
|
||||
SE_DATASKYDDSLAG: 'https://www.riksdagen.se/sv/dokument-och-lagar/dokument/svensk-forfattningssamling/lag-2018218-med-kompletterande-bestammelser_sfs-2018-218/',
|
||||
FI_TIETOSUOJALAKI: 'https://www.finlex.fi/fi/laki/ajantasa/2018/20181050',
|
||||
PL_UODO: 'https://isap.sejm.gov.pl/isap.nsf/DocDetails.xsp?id=WDU20180001000',
|
||||
CZ_ZOU: 'https://www.zakonyprolidi.cz/cs/2019-110',
|
||||
HU_INFOTV: 'https://net.jogtar.hu/jogszabaly?docid=a1100112.tv',
|
||||
LU_DPA_LAW: 'https://legilux.public.lu/eli/etat/leg/loi/2018/08/01/a686/jo',
|
||||
DK_DATABESKYTTELSESLOVEN: 'https://www.retsinformation.dk/eli/lta/2018/502',
|
||||
// Deutschland — Weitere Gesetze
|
||||
TDDDG: 'https://www.gesetze-im-internet.de/tdddg/',
|
||||
DE_DDG: 'https://www.gesetze-im-internet.de/ddg/',
|
||||
DE_BGB_AGB: 'https://www.gesetze-im-internet.de/bgb/__305.html',
|
||||
DE_EGBGB: 'https://www.gesetze-im-internet.de/bgbeg/art_246.html',
|
||||
DE_UWG: 'https://www.gesetze-im-internet.de/uwg_2004/',
|
||||
DE_HGB_RET: 'https://www.gesetze-im-internet.de/hgb/__257.html',
|
||||
DE_AO_RET: 'https://www.gesetze-im-internet.de/ao_1977/__147.html',
|
||||
DE_TKG: 'https://www.gesetze-im-internet.de/tkg_2021/',
|
||||
DE_PANGV: 'https://www.gesetze-im-internet.de/pangv_2022/',
|
||||
DE_DLINFOV: 'https://www.gesetze-im-internet.de/dlinfov/',
|
||||
DE_BETRVG: 'https://www.gesetze-im-internet.de/betrvg/__87.html',
|
||||
DE_GESCHGEHG: 'https://www.gesetze-im-internet.de/geschgehg/',
|
||||
DE_BSIG: 'https://www.gesetze-im-internet.de/bsig_2009/',
|
||||
DE_USTG_RET: 'https://www.gesetze-im-internet.de/ustg_1980/__14b.html',
|
||||
// Oesterreich — Weitere Gesetze
|
||||
AT_ECG: 'https://www.ris.bka.gv.at/GeltendeFassung.wxe?Abfrage=Bundesnormen&Gesetzesnummer=20001703',
|
||||
AT_TKG: 'https://www.ris.bka.gv.at/GeltendeFassung.wxe?Abfrage=Bundesnormen&Gesetzesnummer=20007898',
|
||||
AT_KSCHG: 'https://www.ris.bka.gv.at/GeltendeFassung.wxe?Abfrage=Bundesnormen&Gesetzesnummer=10002462',
|
||||
AT_FAGG: 'https://www.ris.bka.gv.at/GeltendeFassung.wxe?Abfrage=Bundesnormen&Gesetzesnummer=20008783',
|
||||
AT_UGB_RET: 'https://www.ris.bka.gv.at/GeltendeFassung.wxe?Abfrage=Bundesnormen&Gesetzesnummer=10001702',
|
||||
AT_BAO_RET: 'https://www.ris.bka.gv.at/GeltendeFassung.wxe?Abfrage=Bundesnormen&Gesetzesnummer=10003940',
|
||||
AT_MEDIENG: 'https://www.ris.bka.gv.at/GeltendeFassung.wxe?Abfrage=Bundesnormen&Gesetzesnummer=10000719',
|
||||
AT_ABGB_AGB: 'https://www.ris.bka.gv.at/GeltendeFassung.wxe?Abfrage=Bundesnormen&Gesetzesnummer=10001622',
|
||||
AT_UWG: 'https://www.ris.bka.gv.at/GeltendeFassung.wxe?Abfrage=Bundesnormen&Gesetzesnummer=10002665',
|
||||
// Schweiz
|
||||
CH_DSV: 'https://www.fedlex.admin.ch/eli/cc/2022/568/de',
|
||||
CH_OR_AGB: 'https://www.fedlex.admin.ch/eli/cc/27/317_321_377/de',
|
||||
CH_UWG: 'https://www.fedlex.admin.ch/eli/cc/1988/223_223_223/de',
|
||||
CH_FMG: 'https://www.fedlex.admin.ch/eli/cc/1997/2187_2187_2187/de',
|
||||
CH_GEBUV: 'https://www.fedlex.admin.ch/eli/cc/2002/249/de',
|
||||
CH_ZERTES: 'https://www.fedlex.admin.ch/eli/cc/2016/752/de',
|
||||
CH_ZGB_PERS: 'https://www.fedlex.admin.ch/eli/cc/24/233_245_233/de',
|
||||
// Industrie-Compliance
|
||||
ENISA_SECURE_BY_DESIGN: 'https://www.enisa.europa.eu/publications/secure-development-best-practices',
|
||||
ENISA_SUPPLY_CHAIN: 'https://www.enisa.europa.eu/publications/threat-landscape-for-supply-chain-attacks',
|
||||
NIST_SSDF: 'https://csrc.nist.gov/pubs/sp/800/218/final',
|
||||
NIST_CSF_2: 'https://www.nist.gov/cyberframework',
|
||||
OECD_AI_PRINCIPLES: 'https://legalinstruments.oecd.org/en/instruments/OECD-LEGAL-0449',
|
||||
// IFRS / EFRAG
|
||||
EU_IFRS_DE: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32023R1803',
|
||||
EU_IFRS_EN: 'https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX:32023R1803',
|
||||
EFRAG_ENDORSEMENT: 'https://www.efrag.org/activities/endorsement-status-report',
|
||||
// Full-text Datenschutzgesetz AT
|
||||
AT_DSG_FULL: 'https://www.ris.bka.gv.at/GeltendeFassung.wxe?Abfrage=Bundesnormen&Gesetzesnummer=10001597',
|
||||
}
|
||||
|
||||
// License info for each regulation
|
||||
const REGULATION_LICENSES: Record<string, { license: string; licenseNote: string }> = {
|
||||
GDPR: { license: 'PUBLIC_DOMAIN', licenseNote: 'Amtliches Werk der EU — frei verwendbar' },
|
||||
@@ -1063,6 +1321,18 @@ const REGULATION_LICENSES: Record<string, { license: string; licenseNote: string
|
||||
EDPB_GUIDELINES_3_2019: { license: 'EDPB-LICENSE', licenseNote: 'EDPB Document License' },
|
||||
EDPB_GUIDELINES_5_2020: { license: 'EDPB-LICENSE', licenseNote: 'EDPB Document License' },
|
||||
EDPB_GUIDELINES_7_2020: { license: 'EDPB-LICENSE', licenseNote: 'EDPB Document License' },
|
||||
// Industrie-Compliance (2026-02-28)
|
||||
MACHINERY_REG: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Verordnung — amtliches Werk' },
|
||||
BLUE_GUIDE: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Leitfaden — amtliches Werk der Kommission' },
|
||||
ENISA_SECURE_BY_DESIGN: { license: 'CC-BY-4.0', licenseNote: 'ENISA Publication — CC BY 4.0' },
|
||||
ENISA_SUPPLY_CHAIN: { license: 'CC-BY-4.0', licenseNote: 'ENISA Publication — CC BY 4.0' },
|
||||
NIST_SSDF: { license: 'PUBLIC_DOMAIN', licenseNote: 'US Government Work — Public Domain' },
|
||||
NIST_CSF_2: { license: 'PUBLIC_DOMAIN', licenseNote: 'US Government Work — Public Domain' },
|
||||
OECD_AI_PRINCIPLES: { license: 'PUBLIC_DOMAIN', licenseNote: 'OECD Legal Instrument — Reuse Notice' },
|
||||
// EU-IFRS / EFRAG (2026-02-28)
|
||||
EU_IFRS_DE: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Verordnung — amtliches Werk' },
|
||||
EU_IFRS_EN: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Verordnung — amtliches Werk' },
|
||||
EFRAG_ENDORSEMENT: { license: 'PUBLIC_DOMAIN', licenseNote: 'EFRAG — oeffentliches Dokument' },
|
||||
// DACH National Laws — Deutschland
|
||||
DE_DDG: { license: 'PUBLIC_DOMAIN', licenseNote: 'Deutsches Bundesgesetz — amtliches Werk (§5 UrhG)' },
|
||||
DE_BGB_AGB: { license: 'PUBLIC_DOMAIN', licenseNote: 'Deutsches Bundesgesetz — amtliches Werk (§5 UrhG)' },
|
||||
@@ -1099,6 +1369,35 @@ const REGULATION_LICENSES: Record<string, { license: string; licenseNote: string
|
||||
LU_DPA_LAW: { license: 'PUBLIC_DOMAIN', licenseNote: 'Amtliches Werk Luxemburg — frei verwendbar' },
|
||||
DK_DATABESKYTTELSESLOVEN: { license: 'PUBLIC_DOMAIN', licenseNote: 'Amtliches Werk Daenemark — frei verwendbar' },
|
||||
EDPB_GUIDELINES_1_2022: { license: 'EDPB-LICENSE', licenseNote: 'EDPB Document License' },
|
||||
// Neue EU-Richtlinien (Februar 2026 ingestiert)
|
||||
E_COMMERCE_RL: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Richtlinie — amtliches Werk' },
|
||||
VERBRAUCHERRECHTE_RL: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Richtlinie — amtliches Werk' },
|
||||
DIGITALE_INHALTE_RL: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Richtlinie — amtliches Werk' },
|
||||
DMA: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Verordnung — amtliches Werk' },
|
||||
}
|
||||
|
||||
// REGULATIONS_IN_RAG is imported from ./rag-constants.ts
|
||||
|
||||
// Helper: Check if regulation is in RAG
|
||||
const isInRag = (code: string): boolean => code in REGULATIONS_IN_RAG
|
||||
|
||||
// Helper: Get known chunk count for a regulation
|
||||
const getKnownChunks = (code: string): number => REGULATIONS_IN_RAG[code]?.chunks || 0
|
||||
|
||||
// Known collection totals (updated: 2026-03-12)
|
||||
// Note: bp_compliance_datenschutz expanded via edpb-crawler.py (55 EDPB/WP29/EDPS documents).
|
||||
// bp_dsfa_corpus expanded with 20 DSFA Muss-Listen (BfDI + DSK + 16 Bundeslaender).
|
||||
// bp_compliance_gesetze: +5263 Chunks durch Phase H Verbraucherschutz (Run #701, inkl. BDSG/DDG/TKG/HGB/AO Duplikate)
|
||||
const COLLECTION_TOTALS = {
|
||||
bp_compliance_gesetze: 63567, // 58304 + 5263 (Phase H)
|
||||
bp_compliance_ce: 18183,
|
||||
bp_legal_templates: 7689,
|
||||
bp_compliance_datenschutz: 17459,
|
||||
bp_dsfa_corpus: 8666,
|
||||
bp_compliance_recht: 1425,
|
||||
bp_nibis_eh: 7996,
|
||||
total_legal: 81750, // gesetze + ce
|
||||
total_all: 124985,
|
||||
}
|
||||
|
||||
// License display labels
|
||||
@@ -1152,7 +1451,10 @@ const INDUSTRY_REGULATION_MAP: Record<string, string[]> = {
|
||||
all: ['GDPR', 'EPRIVACY', 'TDDDG'],
|
||||
health: ['GDPR', 'TDDDG', 'BSI-TR-03161-1', 'BSI-TR-03161-2', 'BSI-TR-03161-3', 'NIS2', 'AIACT', 'PLD', 'EHDS'],
|
||||
finance: ['GDPR', 'TDDDG', 'NIS2', 'EUCSA', 'DSA', 'AIACT', 'DPF', 'DORA', 'PSD2', 'AMLR', 'MiCA'],
|
||||
ecommerce: ['GDPR', 'TDDDG', 'DSA', 'GPSR', 'EAA', 'PLD', 'DPF', 'PSD2'],
|
||||
ecommerce: ['GDPR', 'TDDDG', 'DSA', 'GPSR', 'EAA', 'PLD', 'DPF', 'PSD2',
|
||||
'DE_PANGV', 'DE_VSBG', 'DE_PRODHAFTG', 'DE_VERPACKG', 'DE_ELEKTROG', 'DE_BFSG', 'DE_UWG',
|
||||
'E_COMMERCE_RL', 'VERBRAUCHERRECHTE_RL', 'WARENKAUF_RL', 'KLAUSEL_RL', 'UNLAUTERE_PRAKTIKEN_RL',
|
||||
'PREISANGABEN_RL', 'OMNIBUS_RL', 'DIGITALE_INHALTE_RL'],
|
||||
tech: ['GDPR', 'TDDDG', 'CRA', 'AIACT', 'DPF', 'SCC', 'DATAACT', 'DSM', 'MiCA'],
|
||||
iot: ['GDPR', 'CRA', 'GPSR', 'PLD', 'DATAACT', 'AIACT'],
|
||||
ai: ['GDPR', 'AIACT', 'PLD', 'DSM', 'DATAACT'],
|
||||
@@ -1212,6 +1514,15 @@ const THEMATIC_GROUPS = [
|
||||
regulations: ['EHDS', 'BSI-TR-03161-1', 'BSI-TR-03161-2', 'BSI-TR-03161-3'],
|
||||
description: 'Gesundheitsdatenraum, DiGA-Sicherheit, Patientenrechte'
|
||||
},
|
||||
{
|
||||
id: 'verbraucherschutz',
|
||||
name: 'Verbraucherschutz & E-Commerce',
|
||||
color: 'bg-amber-500',
|
||||
regulations: ['DE_PANGV', 'DE_VSBG', 'DE_PRODHAFTG', 'DE_UWG', 'DE_BFSG',
|
||||
'WARENKAUF_RL', 'KLAUSEL_RL', 'UNLAUTERE_PRAKTIKEN_RL', 'PREISANGABEN_RL',
|
||||
'OMNIBUS_RL', 'E_COMMERCE_RL', 'VERBRAUCHERRECHTE_RL', 'DIGITALE_INHALTE_RL'],
|
||||
description: 'Widerrufsrecht, Preisangaben, Fernabsatz, AGB-Recht, Barrierefreiheit'
|
||||
},
|
||||
]
|
||||
|
||||
// Key overlaps and intersections
|
||||
@@ -1444,6 +1755,8 @@ export default function RAGPage() {
|
||||
const [autoRefresh, setAutoRefresh] = useState(true)
|
||||
const [elapsedTime, setElapsedTime] = useState<string>('')
|
||||
|
||||
// Chunk browser state is now in ChunkBrowserQA component
|
||||
|
||||
// DSFA corpus state
|
||||
const [dsfaSources, setDsfaSources] = useState<DsfaSource[]>([])
|
||||
const [dsfaStatus, setDsfaStatus] = useState<DsfaCorpusStatus | null>(null)
|
||||
@@ -1689,6 +2002,8 @@ export default function RAGPage() {
|
||||
return () => clearInterval(interval)
|
||||
}, [pipelineState?.started_at, pipelineState?.status])
|
||||
|
||||
// Chunk browser functions are now in ChunkBrowserQA component
|
||||
|
||||
const handleSearch = async () => {
|
||||
if (!searchQuery.trim()) return
|
||||
|
||||
@@ -1774,6 +2089,7 @@ export default function RAGPage() {
|
||||
{ id: 'regulations' as TabId, name: 'Regulierungen', icon: '📜' },
|
||||
{ id: 'map' as TabId, name: 'Landkarte', icon: '🗺️' },
|
||||
{ id: 'search' as TabId, name: 'Suche', icon: '🔍' },
|
||||
{ id: 'chunks' as TabId, name: 'Chunk-Browser', icon: '🧩' },
|
||||
{ id: 'data' as TabId, name: 'Daten', icon: '📁' },
|
||||
{ id: 'ingestion' as TabId, name: 'Ingestion', icon: '⚙️' },
|
||||
{ id: 'pipeline' as TabId, name: 'Pipeline', icon: '🔄' },
|
||||
@@ -1804,7 +2120,7 @@ export default function RAGPage() {
|
||||
{/* Page Purpose */}
|
||||
<PagePurpose
|
||||
title="Daten & RAG"
|
||||
purpose="Verwalten und durchsuchen Sie 4 RAG-Collections: Legal Corpus (24 Regulierungen), DSFA Corpus (70+ Quellen inkl. internationaler Datenschutzgesetze), NiBiS EH (Bildungsinhalte) und Legal Templates (Dokumentvorlagen). Teil der KI-Daten-Pipeline fuer Compliance und Klausur-Korrektur."
|
||||
purpose={`Verwalten und durchsuchen Sie 7 RAG-Collections mit ${REGULATIONS.length} Regulierungen (${Object.keys(REGULATIONS_IN_RAG).length} im RAG). Legal Corpus, DSFA Corpus (70+ Quellen), NiBiS EH (Bildungsinhalte) und Legal Templates. Teil der KI-Daten-Pipeline fuer Compliance und Klausur-Korrektur.`}
|
||||
audience={['DSB', 'Compliance Officer', 'Entwickler']}
|
||||
gdprArticles={['§5 UrhG (Amtliche Werke)', 'Art. 5 DSGVO (Rechenschaftspflicht)']}
|
||||
architecture={{
|
||||
@@ -1826,8 +2142,8 @@ export default function RAGPage() {
|
||||
<div className="grid grid-cols-2 md:grid-cols-4 gap-4 mb-6">
|
||||
<div className="bg-white rounded-xl p-4 border border-slate-200">
|
||||
<p className="text-xs font-medium text-blue-600 uppercase mb-1">Legal Corpus</p>
|
||||
<p className="text-2xl font-bold text-slate-900">{loading ? '-' : getTotalChunks().toLocaleString()}</p>
|
||||
<p className="text-xs text-slate-500">Chunks · {REGULATIONS.length} Regulierungen</p>
|
||||
<p className="text-2xl font-bold text-slate-900">{COLLECTION_TOTALS.total_legal.toLocaleString()}</p>
|
||||
<p className="text-xs text-slate-500">Chunks · {Object.keys(REGULATIONS_IN_RAG).length}/{REGULATIONS.length} im RAG</p>
|
||||
</div>
|
||||
<div className="bg-white rounded-xl p-4 border border-slate-200">
|
||||
<p className="text-xs font-medium text-purple-600 uppercase mb-1">DSFA Corpus</p>
|
||||
@@ -1836,12 +2152,12 @@ export default function RAGPage() {
|
||||
</div>
|
||||
<div className="bg-white rounded-xl p-4 border border-slate-200">
|
||||
<p className="text-xs font-medium text-emerald-600 uppercase mb-1">NiBiS EH</p>
|
||||
<p className="text-2xl font-bold text-slate-900">28.662</p>
|
||||
<p className="text-2xl font-bold text-slate-900">7.996</p>
|
||||
<p className="text-xs text-slate-500">Chunks · Bildungs-Erwartungshorizonte</p>
|
||||
</div>
|
||||
<div className="bg-white rounded-xl p-4 border border-slate-200">
|
||||
<p className="text-xs font-medium text-orange-600 uppercase mb-1">Legal Templates</p>
|
||||
<p className="text-2xl font-bold text-slate-900">824</p>
|
||||
<p className="text-2xl font-bold text-slate-900">7.689</p>
|
||||
<p className="text-xs text-slate-500">Chunks · Dokumentvorlagen</p>
|
||||
</div>
|
||||
</div>
|
||||
@@ -1876,8 +2192,8 @@ export default function RAGPage() {
|
||||
className="p-4 rounded-lg border border-blue-200 bg-blue-50 hover:bg-blue-100 transition-colors text-left"
|
||||
>
|
||||
<p className="text-xs font-medium text-blue-600 uppercase">Gesetze & Regulierungen</p>
|
||||
<p className="text-2xl font-bold text-slate-900 mt-1">{loading ? '-' : getTotalChunks().toLocaleString()}</p>
|
||||
<p className="text-xs text-slate-500 mt-1">{REGULATIONS.length} Regulierungen (EU, DE, BSI)</p>
|
||||
<p className="text-2xl font-bold text-slate-900 mt-1">{COLLECTION_TOTALS.total_legal.toLocaleString()}</p>
|
||||
<p className="text-xs text-slate-500 mt-1">{Object.keys(REGULATIONS_IN_RAG).length}/{REGULATIONS.length} im RAG</p>
|
||||
</button>
|
||||
<button
|
||||
onClick={() => { setRegulationCategory('dsfa'); setActiveTab('regulations') }}
|
||||
@@ -1889,12 +2205,12 @@ export default function RAGPage() {
|
||||
</button>
|
||||
<div className="p-4 rounded-lg border border-emerald-200 bg-emerald-50 text-left">
|
||||
<p className="text-xs font-medium text-emerald-600 uppercase">NiBiS EH</p>
|
||||
<p className="text-2xl font-bold text-slate-900 mt-1">28.662</p>
|
||||
<p className="text-2xl font-bold text-slate-900 mt-1">7.996</p>
|
||||
<p className="text-xs text-slate-500 mt-1">Chunks · Bildungs-Erwartungshorizonte</p>
|
||||
</div>
|
||||
<div className="p-4 rounded-lg border border-orange-200 bg-orange-50 text-left">
|
||||
<p className="text-xs font-medium text-orange-600 uppercase">Legal Templates</p>
|
||||
<p className="text-2xl font-bold text-slate-900 mt-1">824</p>
|
||||
<p className="text-2xl font-bold text-slate-900 mt-1">7.689</p>
|
||||
<p className="text-xs text-slate-500 mt-1">Chunks · Dokumentvorlagen (VVT, TOM, DSFA)</p>
|
||||
</div>
|
||||
</div>
|
||||
@@ -1904,12 +2220,13 @@ export default function RAGPage() {
|
||||
<div className="grid grid-cols-1 md:grid-cols-4 gap-4">
|
||||
{Object.entries(TYPE_LABELS).map(([type, label]) => {
|
||||
const regs = REGULATIONS.filter((r) => r.type === type)
|
||||
const totalChunks = regs.reduce((sum, r) => sum + getRegulationChunks(r.code), 0)
|
||||
const inRagCount = regs.filter((r) => isInRag(r.code)).length
|
||||
const totalChunks = regs.reduce((sum, r) => sum + getKnownChunks(r.code), 0)
|
||||
return (
|
||||
<div key={type} className="bg-white rounded-xl p-4 border border-slate-200">
|
||||
<div className="flex items-center gap-2 mb-2">
|
||||
<span className={`px-2 py-0.5 text-xs rounded ${TYPE_COLORS[type]}`}>{label}</span>
|
||||
<span className="text-slate-500 text-sm">{regs.length} Dok.</span>
|
||||
<span className="text-slate-500 text-sm">{inRagCount}/{regs.length} im RAG</span>
|
||||
</div>
|
||||
<p className="text-xl font-bold text-slate-900">{totalChunks.toLocaleString()} Chunks</p>
|
||||
</div>
|
||||
@@ -1923,20 +2240,25 @@ export default function RAGPage() {
|
||||
<h3 className="font-semibold text-slate-900">Top Regulierungen (nach Chunks)</h3>
|
||||
</div>
|
||||
<div className="divide-y">
|
||||
{REGULATIONS.sort((a, b) => getRegulationChunks(b.code) - getRegulationChunks(a.code))
|
||||
.slice(0, 5)
|
||||
{[...REGULATIONS].sort((a, b) => getKnownChunks(b.code) - getKnownChunks(a.code))
|
||||
.slice(0, 10)
|
||||
.map((reg) => {
|
||||
const chunks = getRegulationChunks(reg.code)
|
||||
const chunks = getKnownChunks(reg.code)
|
||||
return (
|
||||
<div key={reg.code} className="px-4 py-3 flex items-center justify-between">
|
||||
<div className="flex items-center gap-3">
|
||||
{isInRag(reg.code) ? (
|
||||
<span className="text-green-500 text-sm">✓</span>
|
||||
) : (
|
||||
<span className="text-red-400 text-sm">✗</span>
|
||||
)}
|
||||
<span className={`px-2 py-0.5 text-xs rounded ${TYPE_COLORS[reg.type]}`}>
|
||||
{TYPE_LABELS[reg.type]}
|
||||
</span>
|
||||
<span className="font-medium text-slate-900">{reg.name}</span>
|
||||
<span className="text-slate-500 text-sm">({reg.code})</span>
|
||||
</div>
|
||||
<span className="font-bold text-teal-600">{chunks.toLocaleString()} Chunks</span>
|
||||
<span className={`font-bold ${chunks > 0 ? 'text-teal-600' : 'text-slate-300'}`}>{chunks > 0 ? chunks.toLocaleString() + ' Chunks' : '—'}</span>
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
@@ -1995,7 +2317,13 @@ export default function RAGPage() {
|
||||
{regulationCategory === 'regulations' && (
|
||||
<div className="bg-white rounded-xl border border-slate-200 overflow-hidden">
|
||||
<div className="px-4 py-3 border-b bg-slate-50 flex items-center justify-between">
|
||||
<h3 className="font-semibold text-slate-900">Alle {REGULATIONS.length} Regulierungen</h3>
|
||||
<h3 className="font-semibold text-slate-900">
|
||||
Alle {REGULATIONS.length} Regulierungen
|
||||
<span className="ml-2 text-sm font-normal text-slate-500">
|
||||
({REGULATIONS.filter(r => isInRag(r.code)).length} im RAG,{' '}
|
||||
{REGULATIONS.filter(r => !isInRag(r.code)).length} ausstehend)
|
||||
</span>
|
||||
</h3>
|
||||
<button
|
||||
onClick={fetchStatus}
|
||||
className="text-sm text-teal-600 hover:text-teal-700"
|
||||
@@ -2007,6 +2335,7 @@ export default function RAGPage() {
|
||||
<table className="w-full">
|
||||
<thead className="bg-slate-50 border-b">
|
||||
<tr>
|
||||
<th className="px-4 py-3 text-center text-xs font-medium text-slate-500 uppercase w-12">RAG</th>
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-slate-500 uppercase">Code</th>
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-slate-500 uppercase">Typ</th>
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-slate-500 uppercase">Name</th>
|
||||
@@ -2017,17 +2346,10 @@ export default function RAGPage() {
|
||||
</thead>
|
||||
<tbody className="divide-y">
|
||||
{REGULATIONS.map((reg) => {
|
||||
const chunks = getRegulationChunks(reg.code)
|
||||
const ratio = chunks / (reg.expected * 10) // Rough estimate: 10 chunks per requirement
|
||||
let statusColor = 'text-red-500'
|
||||
let statusIcon = '❌'
|
||||
if (ratio > 0.5) {
|
||||
statusColor = 'text-green-500'
|
||||
statusIcon = '✓'
|
||||
} else if (ratio > 0.1) {
|
||||
statusColor = 'text-yellow-500'
|
||||
statusIcon = '⚠'
|
||||
}
|
||||
const chunks = getKnownChunks(reg.code)
|
||||
const inRag = isInRag(reg.code)
|
||||
let statusColor = inRag ? 'text-green-500' : 'text-red-500'
|
||||
let statusIcon = inRag ? '✓' : '❌'
|
||||
const isExpanded = expandedRegulation === reg.code
|
||||
|
||||
return (
|
||||
@@ -2036,6 +2358,13 @@ export default function RAGPage() {
|
||||
onClick={() => setExpandedRegulation(isExpanded ? null : reg.code)}
|
||||
className="hover:bg-slate-50 cursor-pointer transition-colors"
|
||||
>
|
||||
<td className="px-4 py-3 text-center">
|
||||
{isInRag(reg.code) ? (
|
||||
<span className="inline-flex items-center justify-center w-6 h-6 bg-green-100 text-green-600 rounded-full text-xs font-bold" title="Im RAG vorhanden">✓</span>
|
||||
) : (
|
||||
<span className="inline-flex items-center justify-center w-6 h-6 bg-red-50 text-red-400 rounded-full text-xs font-bold" title="Nicht im RAG">✗</span>
|
||||
)}
|
||||
</td>
|
||||
<td className="px-4 py-3 font-mono font-medium text-teal-600">
|
||||
<span className="inline-flex items-center gap-2">
|
||||
<span className={`transform transition-transform ${isExpanded ? 'rotate-90' : ''}`}>▶</span>
|
||||
@@ -2048,13 +2377,20 @@ export default function RAGPage() {
|
||||
</span>
|
||||
</td>
|
||||
<td className="px-4 py-3 text-slate-900">{reg.name}</td>
|
||||
<td className="px-4 py-3 text-right font-bold">{chunks.toLocaleString()}</td>
|
||||
<td className="px-4 py-3 text-right font-bold">
|
||||
<span className={chunks > 0 && chunks < 10 && reg.expected >= 10 ? 'text-amber-600' : ''}>
|
||||
{chunks.toLocaleString()}
|
||||
{chunks > 0 && chunks < 10 && reg.expected >= 10 && (
|
||||
<span className="ml-1 inline-block w-4 h-4 text-[10px] leading-4 text-center bg-amber-100 text-amber-700 rounded-full" title="Verdaechtig niedrig — Ingestion pruefen">⚠</span>
|
||||
)}
|
||||
</span>
|
||||
</td>
|
||||
<td className="px-4 py-3 text-right text-slate-500">{reg.expected}</td>
|
||||
<td className={`px-4 py-3 text-center ${statusColor}`}>{statusIcon}</td>
|
||||
</tr>
|
||||
{isExpanded && (
|
||||
<tr key={`${reg.code}-detail`} className="bg-slate-50">
|
||||
<td colSpan={6} className="px-4 py-4">
|
||||
<td colSpan={7} className="px-4 py-4">
|
||||
<div className="bg-white rounded-lg border border-slate-200 p-4 space-y-3">
|
||||
<div>
|
||||
<h4 className="font-semibold text-slate-900 mb-1">{reg.fullName}</h4>
|
||||
@@ -2094,16 +2430,28 @@ export default function RAGPage() {
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
<button
|
||||
onClick={(e) => {
|
||||
e.stopPropagation()
|
||||
setSearchQuery(reg.name)
|
||||
setActiveTab('search')
|
||||
}}
|
||||
className="text-teal-600 hover:text-teal-700 font-medium"
|
||||
>
|
||||
In Chunks suchen →
|
||||
</button>
|
||||
<div className="flex items-center gap-3">
|
||||
{REGULATION_SOURCES[reg.code] && (
|
||||
<a
|
||||
href={REGULATION_SOURCES[reg.code]}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
onClick={(e) => e.stopPropagation()}
|
||||
className="text-blue-600 hover:text-blue-700 font-medium"
|
||||
>
|
||||
Originalquelle →
|
||||
</a>
|
||||
)}
|
||||
<button
|
||||
onClick={(e) => {
|
||||
e.stopPropagation()
|
||||
setActiveTab('chunks')
|
||||
}}
|
||||
className="text-teal-600 hover:text-teal-700 font-medium"
|
||||
>
|
||||
In Chunks suchen →
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</td>
|
||||
@@ -2232,7 +2580,7 @@ export default function RAGPage() {
|
||||
<div className="grid grid-cols-3 gap-4 mb-4">
|
||||
<div className="bg-emerald-50 rounded-lg p-4 border border-emerald-200">
|
||||
<p className="text-sm text-emerald-600 font-medium">Chunks</p>
|
||||
<p className="text-2xl font-bold text-slate-900">28.662</p>
|
||||
<p className="text-2xl font-bold text-slate-900">7.996</p>
|
||||
</div>
|
||||
<div className="bg-emerald-50 rounded-lg p-4 border border-emerald-200">
|
||||
<p className="text-sm text-emerald-600 font-medium">Vector Size</p>
|
||||
@@ -2264,7 +2612,7 @@ export default function RAGPage() {
|
||||
<div className="grid grid-cols-3 gap-4 mb-4">
|
||||
<div className="bg-orange-50 rounded-lg p-4 border border-orange-200">
|
||||
<p className="text-sm text-orange-600 font-medium">Chunks</p>
|
||||
<p className="text-2xl font-bold text-slate-900">824</p>
|
||||
<p className="text-2xl font-bold text-slate-900">7.689</p>
|
||||
</div>
|
||||
<div className="bg-orange-50 rounded-lg p-4 border border-orange-200">
|
||||
<p className="text-sm text-orange-600 font-medium">Vector Size</p>
|
||||
@@ -2332,20 +2680,28 @@ export default function RAGPage() {
|
||||
</div>
|
||||
</div>
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-3">
|
||||
{regs.map((reg) => (
|
||||
{regs.map((reg) => {
|
||||
const regInRag = isInRag(reg.code)
|
||||
return (
|
||||
<div
|
||||
key={reg.code}
|
||||
className="bg-white p-3 rounded-lg border border-slate-200"
|
||||
className={`bg-white p-3 rounded-lg border ${regInRag ? 'border-green-200' : 'border-slate-200'}`}
|
||||
>
|
||||
<div className="flex items-center gap-2 mb-1">
|
||||
<span className={`px-2 py-0.5 text-xs rounded ${TYPE_COLORS[reg.type]}`}>
|
||||
{reg.code}
|
||||
</span>
|
||||
{regInRag ? (
|
||||
<span className="px-1.5 py-0.5 text-[10px] font-bold bg-green-100 text-green-600 rounded">RAG</span>
|
||||
) : (
|
||||
<span className="px-1.5 py-0.5 text-[10px] font-bold bg-red-50 text-red-400 rounded">✗</span>
|
||||
)}
|
||||
</div>
|
||||
<div className="font-medium text-sm text-slate-900">{reg.name}</div>
|
||||
<div className="text-xs text-slate-500 mt-1 line-clamp-2">{reg.description}</div>
|
||||
</div>
|
||||
))}
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</>
|
||||
)
|
||||
@@ -2372,17 +2728,22 @@ export default function RAGPage() {
|
||||
<div className="flex flex-wrap gap-2">
|
||||
{group.regulations.map((code) => {
|
||||
const reg = REGULATIONS.find(r => r.code === code)
|
||||
const codeInRag = isInRag(code)
|
||||
return (
|
||||
<span
|
||||
key={code}
|
||||
className="px-3 py-1.5 bg-slate-100 rounded-full text-sm font-medium text-slate-700 hover:bg-slate-200 cursor-pointer"
|
||||
className={`px-3 py-1.5 rounded-full text-sm font-medium cursor-pointer ${
|
||||
codeInRag
|
||||
? 'bg-green-100 text-green-700 hover:bg-green-200'
|
||||
: 'bg-slate-100 text-slate-700 hover:bg-slate-200'
|
||||
}`}
|
||||
onClick={() => {
|
||||
setActiveTab('regulations')
|
||||
setExpandedRegulation(code)
|
||||
}}
|
||||
title={reg?.fullName || code}
|
||||
title={`${reg?.fullName || code}${codeInRag ? ' (im RAG)' : ' (nicht im RAG)'}`}
|
||||
>
|
||||
{code}
|
||||
{codeInRag ? '✓ ' : '✗ '}{code}
|
||||
</span>
|
||||
)
|
||||
})}
|
||||
@@ -2406,9 +2767,13 @@ export default function RAGPage() {
|
||||
{intersection.regulations.map((code) => (
|
||||
<span
|
||||
key={code}
|
||||
className="px-2 py-0.5 text-xs font-medium bg-teal-100 text-teal-700 rounded"
|
||||
className={`px-2 py-0.5 text-xs font-medium rounded ${
|
||||
isInRag(code)
|
||||
? 'bg-green-100 text-green-700'
|
||||
: 'bg-red-50 text-red-500'
|
||||
}`}
|
||||
>
|
||||
{code}
|
||||
{isInRag(code) ? '✓ ' : '✗ '}{code}
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
@@ -2443,8 +2808,15 @@ export default function RAGPage() {
|
||||
<tbody className="divide-y">
|
||||
{REGULATIONS.map((reg) => (
|
||||
<tr key={reg.code} className="hover:bg-slate-50">
|
||||
<td className="px-2 py-2 font-medium text-teal-600 sticky left-0 bg-white">
|
||||
{reg.code}
|
||||
<td className="px-2 py-2 font-medium sticky left-0 bg-white">
|
||||
<span className="flex items-center gap-1">
|
||||
{isInRag(reg.code) ? (
|
||||
<span className="text-green-500 text-[10px]">●</span>
|
||||
) : (
|
||||
<span className="text-red-300 text-[10px]">○</span>
|
||||
)}
|
||||
<span className="text-teal-600">{reg.code}</span>
|
||||
</span>
|
||||
</td>
|
||||
{INDUSTRIES.filter(i => i.id !== 'all').map((industry) => {
|
||||
const applies = INDUSTRY_REGULATION_MAP[industry.id]?.includes(reg.code)
|
||||
@@ -2531,27 +2903,33 @@ export default function RAGPage() {
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Integrated Regulations */}
|
||||
{/* RAG Coverage Overview */}
|
||||
<div className="bg-white rounded-xl border border-slate-200 p-6">
|
||||
<div className="flex items-center gap-3 mb-4">
|
||||
<span className="text-2xl">✅</span>
|
||||
<div>
|
||||
<h3 className="font-semibold text-slate-900">Neu integrierte Regulierungen</h3>
|
||||
<p className="text-sm text-slate-500">Jetzt im RAG-System verfuegbar (Stand: Januar 2025)</p>
|
||||
<h3 className="font-semibold text-slate-900">RAG-Abdeckung ({Object.keys(REGULATIONS_IN_RAG).length} von {REGULATIONS.length} Regulierungen)</h3>
|
||||
<p className="text-sm text-slate-500">Stand: Maerz 2026 — Alle im RAG-System verfuegbaren Regulierungen (inkl. Verbraucherschutz Phase H)</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="grid grid-cols-2 md:grid-cols-5 gap-3">
|
||||
{INTEGRATED_REGULATIONS.map((reg) => (
|
||||
<div key={reg.code} className="rounded-lg border border-green-200 bg-green-50 p-3 text-center">
|
||||
<span className="px-2 py-1 text-sm font-bold bg-green-100 text-green-700 rounded">
|
||||
{reg.code}
|
||||
</span>
|
||||
<p className="text-xs text-slate-600 mt-2">{reg.name}</p>
|
||||
<p className="text-xs text-green-600 mt-1">Im RAG</p>
|
||||
</div>
|
||||
<div className="flex flex-wrap gap-2">
|
||||
{REGULATIONS.filter(r => isInRag(r.code)).map((reg) => (
|
||||
<span key={reg.code} className="px-2.5 py-1 text-xs font-medium bg-green-100 text-green-700 rounded-full border border-green-200">
|
||||
✓ {reg.code}
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
<div className="mt-4 pt-4 border-t border-slate-100">
|
||||
<p className="text-xs font-medium text-slate-500 mb-2">Noch nicht im RAG:</p>
|
||||
<div className="flex flex-wrap gap-2">
|
||||
{REGULATIONS.filter(r => !isInRag(r.code)).map((reg) => (
|
||||
<span key={reg.code} className="px-2.5 py-1 text-xs font-medium bg-red-50 text-red-400 rounded-full border border-red-100">
|
||||
✗ {reg.code}
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Potential Future Regulations */}
|
||||
@@ -2714,6 +3092,10 @@ export default function RAGPage() {
|
||||
</div>
|
||||
)}
|
||||
|
||||
{activeTab === 'chunks' && (
|
||||
<ChunkBrowserQA apiProxy={API_PROXY} />
|
||||
)}
|
||||
|
||||
{activeTab === 'data' && (
|
||||
<div className="space-y-6">
|
||||
{/* Upload Document */}
|
||||
@@ -2899,7 +3281,7 @@ export default function RAGPage() {
|
||||
<span className="flex items-center gap-2 text-teal-600">
|
||||
<svg className="animate-spin h-4 w-4" fill="none" viewBox="0 0 24 24">
|
||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.7.689 3 7.938l3-2.647z" />
|
||||
</svg>
|
||||
Ingestion laeuft...
|
||||
</span>
|
||||
@@ -2969,7 +3351,7 @@ export default function RAGPage() {
|
||||
{pipelineStarting ? (
|
||||
<svg className="animate-spin h-4 w-4" fill="none" viewBox="0 0 24 24">
|
||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.7.689 3 7.938l3-2.647z" />
|
||||
</svg>
|
||||
) : (
|
||||
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
@@ -2988,7 +3370,7 @@ export default function RAGPage() {
|
||||
{pipelineLoading ? (
|
||||
<svg className="animate-spin h-4 w-4" fill="none" viewBox="0 0 24 24">
|
||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.7.689 3 7.938l3-2.647z" />
|
||||
</svg>
|
||||
) : (
|
||||
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
@@ -3021,7 +3403,7 @@ export default function RAGPage() {
|
||||
<>
|
||||
<svg className="animate-spin h-5 w-5" fill="none" viewBox="0 0 24 24">
|
||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.7.689 3 7.938l3-2.647z" />
|
||||
</svg>
|
||||
Startet...
|
||||
</>
|
||||
@@ -3058,7 +3440,7 @@ export default function RAGPage() {
|
||||
{pipelineState.status === 'running' && (
|
||||
<svg className="w-6 h-6 text-blue-600 animate-spin" fill="none" viewBox="0 0 24 24">
|
||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.7.689 3 7.938l3-2.647z" />
|
||||
</svg>
|
||||
)}
|
||||
{pipelineState.status === 'failed' && (
|
||||
|
||||
414
admin-lehrer/app/(admin)/ai/rag/rag-constants.ts
Normal file
414
admin-lehrer/app/(admin)/ai/rag/rag-constants.ts
Normal file
@@ -0,0 +1,414 @@
|
||||
/**
|
||||
* Shared RAG constants used by both page.tsx and ChunkBrowserQA.
|
||||
* REGULATIONS_IN_RAG maps regulation codes to their Qdrant collection, chunk count, and qdrant_id.
|
||||
* The qdrant_id is the actual `regulation_id` value stored in Qdrant payloads.
|
||||
* REGULATION_INFO provides minimal metadata (code, name, type) for all regulations.
|
||||
*/
|
||||
|
||||
export interface RagRegulationEntry {
|
||||
collection: string
|
||||
chunks: number
|
||||
qdrant_id: string // The actual regulation_id value in Qdrant payload
|
||||
}
|
||||
|
||||
export const REGULATIONS_IN_RAG: Record<string, RagRegulationEntry> = {
|
||||
// === EU Verordnungen/Richtlinien (bp_compliance_ce) ===
|
||||
GDPR: { collection: 'bp_compliance_ce', chunks: 423, qdrant_id: 'eu_2016_679' },
|
||||
EPRIVACY: { collection: 'bp_compliance_ce', chunks: 134, qdrant_id: 'eu_2002_58' },
|
||||
SCC: { collection: 'bp_compliance_ce', chunks: 330, qdrant_id: 'eu_2021_914' },
|
||||
SCC_FULL_TEXT: { collection: 'bp_compliance_ce', chunks: 330, qdrant_id: 'eu_2021_914' },
|
||||
AIACT: { collection: 'bp_compliance_ce', chunks: 726, qdrant_id: 'eu_2024_1689' },
|
||||
CRA: { collection: 'bp_compliance_ce', chunks: 429, qdrant_id: 'eu_2024_2847' },
|
||||
NIS2: { collection: 'bp_compliance_ce', chunks: 342, qdrant_id: 'eu_2022_2555' },
|
||||
DGA: { collection: 'bp_compliance_ce', chunks: 508, qdrant_id: 'eu_2022_868' },
|
||||
DSA: { collection: 'bp_compliance_ce', chunks: 1106, qdrant_id: 'eu_2022_2065' },
|
||||
PLD: { collection: 'bp_compliance_ce', chunks: 44, qdrant_id: 'eu_1985_374' },
|
||||
E_COMMERCE_RL: { collection: 'bp_compliance_ce', chunks: 197, qdrant_id: 'eu_2000_31' },
|
||||
VERBRAUCHERRECHTE_RL: { collection: 'bp_compliance_ce', chunks: 266, qdrant_id: 'eu_2011_83' },
|
||||
DIGITALE_INHALTE_RL: { collection: 'bp_compliance_ce', chunks: 321, qdrant_id: 'eu_2019_770' },
|
||||
// Verbraucherschutz EU-Richtlinien (Phase H2 Ingestion)
|
||||
WARENKAUF_RL: { collection: 'bp_compliance_ce', chunks: 0, qdrant_id: 'sgd' },
|
||||
KLAUSEL_RL: { collection: 'bp_compliance_ce', chunks: 0, qdrant_id: 'uctd' },
|
||||
UNLAUTERE_PRAKTIKEN_RL: { collection: 'bp_compliance_ce', chunks: 0, qdrant_id: 'ucpd' },
|
||||
PREISANGABEN_RL: { collection: 'bp_compliance_ce', chunks: 0, qdrant_id: 'pid' },
|
||||
OMNIBUS_RL: { collection: 'bp_compliance_ce', chunks: 0, qdrant_id: 'omn' },
|
||||
BATTERIE_VO: { collection: 'bp_compliance_ce', chunks: 0, qdrant_id: 'battvo' },
|
||||
DMA: { collection: 'bp_compliance_ce', chunks: 701, qdrant_id: 'eu_2022_1925' },
|
||||
DPF: { collection: 'bp_compliance_ce', chunks: 2464, qdrant_id: 'dpf' },
|
||||
EUCSA: { collection: 'bp_compliance_ce', chunks: 558, qdrant_id: 'eucsa' },
|
||||
DATAACT: { collection: 'bp_compliance_ce', chunks: 809, qdrant_id: 'dataact' },
|
||||
DORA: { collection: 'bp_compliance_ce', chunks: 823, qdrant_id: 'dora' },
|
||||
PSD2: { collection: 'bp_compliance_ce', chunks: 796, qdrant_id: 'psd2' },
|
||||
AMLR: { collection: 'bp_compliance_ce', chunks: 1182, qdrant_id: 'amlr' },
|
||||
MiCA: { collection: 'bp_compliance_ce', chunks: 1640, qdrant_id: 'mica' },
|
||||
EHDS: { collection: 'bp_compliance_ce', chunks: 1212, qdrant_id: 'ehds' },
|
||||
EAA: { collection: 'bp_compliance_ce', chunks: 433, qdrant_id: 'eaa' },
|
||||
DSM: { collection: 'bp_compliance_ce', chunks: 416, qdrant_id: 'dsm' },
|
||||
GPSR: { collection: 'bp_compliance_ce', chunks: 509, qdrant_id: 'gpsr' },
|
||||
MACHINERY_REG: { collection: 'bp_compliance_ce', chunks: 1271, qdrant_id: 'eu_2023_1230' },
|
||||
BLUE_GUIDE: { collection: 'bp_compliance_ce', chunks: 2271, qdrant_id: 'eu_blue_guide_2022' },
|
||||
EU_IFRS_DE: { collection: 'bp_compliance_ce', chunks: 34388, qdrant_id: 'eu_2023_1803' },
|
||||
EU_IFRS_EN: { collection: 'bp_compliance_ce', chunks: 34388, qdrant_id: 'eu_2023_1803' },
|
||||
// International standards in bp_compliance_ce
|
||||
NIST_SSDF: { collection: 'bp_compliance_ce', chunks: 111, qdrant_id: 'nist_sp_800_218' },
|
||||
NIST_CSF_2: { collection: 'bp_compliance_ce', chunks: 67, qdrant_id: 'nist_csf_2_0' },
|
||||
OECD_AI_PRINCIPLES: { collection: 'bp_compliance_ce', chunks: 34, qdrant_id: 'oecd_ai_principles' },
|
||||
ENISA_SECURE_BY_DESIGN: { collection: 'bp_compliance_ce', chunks: 97, qdrant_id: 'cisa_secure_by_design' },
|
||||
ENISA_SUPPLY_CHAIN: { collection: 'bp_compliance_ce', chunks: 110, qdrant_id: 'enisa_supply_chain_good_practices' },
|
||||
ENISA_THREAT_LANDSCAPE: { collection: 'bp_compliance_ce', chunks: 118, qdrant_id: 'enisa_threat_landscape_supply_chain' },
|
||||
ENISA_ICS_SCADA: { collection: 'bp_compliance_ce', chunks: 195, qdrant_id: 'enisa_ics_scada_dependencies' },
|
||||
ENISA_CYBERSECURITY_2024: { collection: 'bp_compliance_ce', chunks: 22, qdrant_id: 'enisa_cybersecurity_state_2024' },
|
||||
|
||||
// === DE Gesetze (bp_compliance_gesetze) ===
|
||||
TDDDG: { collection: 'bp_compliance_gesetze', chunks: 5, qdrant_id: 'tdddg_25' },
|
||||
TMG_KOMPLETT: { collection: 'bp_compliance_gesetze', chunks: 108, qdrant_id: 'tmg_komplett' },
|
||||
BDSG_FULL: { collection: 'bp_compliance_gesetze', chunks: 1056, qdrant_id: 'bdsg_2018_komplett' },
|
||||
DE_DDG: { collection: 'bp_compliance_gesetze', chunks: 40, qdrant_id: 'ddg_5' },
|
||||
DE_BGB_AGB: { collection: 'bp_compliance_gesetze', chunks: 4024, qdrant_id: 'bgb_komplett' },
|
||||
DE_EGBGB: { collection: 'bp_compliance_gesetze', chunks: 36, qdrant_id: 'egbgb_widerruf' },
|
||||
DE_HGB_RET: { collection: 'bp_compliance_gesetze', chunks: 11363, qdrant_id: 'hgb_komplett' },
|
||||
DE_AO_RET: { collection: 'bp_compliance_gesetze', chunks: 9669, qdrant_id: 'ao_komplett' },
|
||||
DE_TKG: { collection: 'bp_compliance_gesetze', chunks: 1631, qdrant_id: 'de_tkg' },
|
||||
DE_DLINFOV: { collection: 'bp_compliance_gesetze', chunks: 21, qdrant_id: 'de_dlinfov' },
|
||||
DE_BETRVG: { collection: 'bp_compliance_gesetze', chunks: 498, qdrant_id: 'de_betrvg' },
|
||||
DE_GESCHGEHG: { collection: 'bp_compliance_gesetze', chunks: 63, qdrant_id: 'de_geschgehg' },
|
||||
DE_USTG_RET: { collection: 'bp_compliance_gesetze', chunks: 1071, qdrant_id: 'de_ustg_ret' },
|
||||
DE_URHG: { collection: 'bp_compliance_gesetze', chunks: 626, qdrant_id: 'urhg_komplett' },
|
||||
|
||||
// === DE Verbraucherschutz-Gesetze (bp_compliance_gesetze) — Phase H1 (Run #701) ===
|
||||
DE_PANGV: { collection: 'bp_compliance_gesetze', chunks: 99, qdrant_id: 'pangv' },
|
||||
DE_VSBG: { collection: 'bp_compliance_gesetze', chunks: 113, qdrant_id: 'vsbg' },
|
||||
DE_PRODHAFTG: { collection: 'bp_compliance_gesetze', chunks: 26, qdrant_id: 'prodhaftg' },
|
||||
DE_VERPACKG: { collection: 'bp_compliance_gesetze', chunks: 338, qdrant_id: 'verpackg' },
|
||||
DE_ELEKTROG: { collection: 'bp_compliance_gesetze', chunks: 344, qdrant_id: 'elektrog' },
|
||||
DE_BATTDG: { collection: 'bp_compliance_gesetze', chunks: 307, qdrant_id: 'battdg' },
|
||||
DE_BFSG: { collection: 'bp_compliance_gesetze', chunks: 221, qdrant_id: 'bfsg' },
|
||||
DE_UWG: { collection: 'bp_compliance_gesetze', chunks: 157, qdrant_id: 'uwg' },
|
||||
DE_GEWO: { collection: 'bp_compliance_gesetze', chunks: 0, qdrant_id: 'gewo' }, // Pending: Re-run noetig (Timeout)
|
||||
// BGB in Teilen (statt 2.7MB komplett)
|
||||
DE_BGB_AGB_305: { collection: 'bp_compliance_gesetze', chunks: 0, qdrant_id: 'bgb_agb' }, // §§ 305-310
|
||||
DE_BGB_FERNABSATZ: { collection: 'bp_compliance_gesetze', chunks: 0, qdrant_id: 'bgb_fernabsatz' }, // §§ 312-312k
|
||||
DE_BGB_KAUFRECHT: { collection: 'bp_compliance_gesetze', chunks: 0, qdrant_id: 'bgb_kaufrecht' }, // §§ 433-480
|
||||
DE_BGB_WIDERRUF: { collection: 'bp_compliance_gesetze', chunks: 0, qdrant_id: 'bgb_widerruf' }, // §§ 355-361
|
||||
DE_BGB_DIGITAL: { collection: 'bp_compliance_gesetze', chunks: 0, qdrant_id: 'bgb_digital' }, // §§ 327-327u
|
||||
DE_EGBGB_WIDERRUF: { collection: 'bp_compliance_gesetze', chunks: 0, qdrant_id: 'egbgb' }, // Muster-Widerrufsbelehrung
|
||||
|
||||
// === BSI Standards (bp_compliance_gesetze) ===
|
||||
'BSI-TR-03161-1': { collection: 'bp_compliance_gesetze', chunks: 138, qdrant_id: 'bsi_tr_03161_1' },
|
||||
'BSI-TR-03161-2': { collection: 'bp_compliance_gesetze', chunks: 124, qdrant_id: 'bsi_tr_03161_2' },
|
||||
'BSI-TR-03161-3': { collection: 'bp_compliance_gesetze', chunks: 121, qdrant_id: 'bsi_tr_03161_3' },
|
||||
|
||||
// === AT Gesetze (bp_compliance_gesetze) ===
|
||||
AT_DSG: { collection: 'bp_compliance_gesetze', chunks: 805, qdrant_id: 'at_dsg' },
|
||||
AT_DSG_FULL: { collection: 'bp_compliance_gesetze', chunks: 6, qdrant_id: 'at_dsg_full' },
|
||||
AT_ECG: { collection: 'bp_compliance_gesetze', chunks: 120, qdrant_id: 'at_ecg' },
|
||||
AT_TKG: { collection: 'bp_compliance_gesetze', chunks: 4348, qdrant_id: 'at_tkg' },
|
||||
AT_KSCHG: { collection: 'bp_compliance_gesetze', chunks: 402, qdrant_id: 'at_kschg' },
|
||||
AT_FAGG: { collection: 'bp_compliance_gesetze', chunks: 2, qdrant_id: 'at_fagg' },
|
||||
AT_UGB_RET: { collection: 'bp_compliance_gesetze', chunks: 2828, qdrant_id: 'at_ugb_ret' },
|
||||
AT_BAO_RET: { collection: 'bp_compliance_gesetze', chunks: 2246, qdrant_id: 'at_bao_ret' },
|
||||
AT_MEDIENG: { collection: 'bp_compliance_gesetze', chunks: 571, qdrant_id: 'at_medieng' },
|
||||
AT_ABGB_AGB: { collection: 'bp_compliance_gesetze', chunks: 2521, qdrant_id: 'at_abgb_agb' },
|
||||
AT_UWG: { collection: 'bp_compliance_gesetze', chunks: 403, qdrant_id: 'at_uwg' },
|
||||
|
||||
// === CH Gesetze (bp_compliance_gesetze) ===
|
||||
CH_DSG: { collection: 'bp_compliance_gesetze', chunks: 180, qdrant_id: 'ch_revdsg' },
|
||||
CH_DSV: { collection: 'bp_compliance_gesetze', chunks: 5, qdrant_id: 'ch_dsv' },
|
||||
CH_OR_AGB: { collection: 'bp_compliance_gesetze', chunks: 5, qdrant_id: 'ch_or_agb' },
|
||||
CH_GEBUV: { collection: 'bp_compliance_gesetze', chunks: 5, qdrant_id: 'ch_gebuv' },
|
||||
CH_ZERTES: { collection: 'bp_compliance_gesetze', chunks: 5, qdrant_id: 'ch_zertes' },
|
||||
CH_ZGB_PERS: { collection: 'bp_compliance_gesetze', chunks: 5, qdrant_id: 'ch_zgb_pers' },
|
||||
|
||||
// === Nationale Gesetze (andere EU) in bp_compliance_gesetze ===
|
||||
ES_LOPDGDD: { collection: 'bp_compliance_gesetze', chunks: 782, qdrant_id: 'es_lopdgdd' },
|
||||
IT_CODICE_PRIVACY: { collection: 'bp_compliance_gesetze', chunks: 59, qdrant_id: 'it_codice_privacy' },
|
||||
NL_UAVG: { collection: 'bp_compliance_gesetze', chunks: 523, qdrant_id: 'nl_uavg' },
|
||||
FR_CNIL_GUIDE: { collection: 'bp_compliance_gesetze', chunks: 562, qdrant_id: 'fr_loi_informatique' },
|
||||
IE_DPA_2018: { collection: 'bp_compliance_gesetze', chunks: 64, qdrant_id: 'ie_dpa_2018' },
|
||||
UK_DPA_2018: { collection: 'bp_compliance_gesetze', chunks: 156, qdrant_id: 'uk_dpa_2018' },
|
||||
UK_GDPR: { collection: 'bp_compliance_gesetze', chunks: 45, qdrant_id: 'uk_gdpr' },
|
||||
NO_PERSONOPPLYSNINGSLOVEN: { collection: 'bp_compliance_gesetze', chunks: 41, qdrant_id: 'no_pol' },
|
||||
SE_DATASKYDDSLAG: { collection: 'bp_compliance_gesetze', chunks: 56, qdrant_id: 'se_dataskyddslag' },
|
||||
PL_UODO: { collection: 'bp_compliance_gesetze', chunks: 39, qdrant_id: 'pl_ustawa' },
|
||||
CZ_ZOU: { collection: 'bp_compliance_gesetze', chunks: 238, qdrant_id: 'cz_zakon' },
|
||||
HU_INFOTV: { collection: 'bp_compliance_gesetze', chunks: 747, qdrant_id: 'hu_info_tv' },
|
||||
LU_DPA_LAW: { collection: 'bp_compliance_gesetze', chunks: 2, qdrant_id: 'lu_dpa_law' },
|
||||
|
||||
// === EDPB Guidelines (bp_compliance_datenschutz) — alt (ingest-legal-corpus.sh) ===
|
||||
EDPB_GUIDELINES_5_2020: { collection: 'bp_compliance_datenschutz', chunks: 236, qdrant_id: 'edpb_05_2020' },
|
||||
EDPB_GUIDELINES_7_2020: { collection: 'bp_compliance_datenschutz', chunks: 347, qdrant_id: 'edpb_guidelines_7_2020' },
|
||||
EDPB_GUIDELINES_1_2020: { collection: 'bp_compliance_datenschutz', chunks: 337, qdrant_id: 'edpb_01_2020' },
|
||||
EDPB_GUIDELINES_1_2022: { collection: 'bp_compliance_datenschutz', chunks: 510, qdrant_id: 'edpb_01_2022' },
|
||||
EDPB_GUIDELINES_2_2023: { collection: 'bp_compliance_datenschutz', chunks: 94, qdrant_id: 'edpb_02_2023' },
|
||||
EDPB_GUIDELINES_2_2024: { collection: 'bp_compliance_datenschutz', chunks: 79, qdrant_id: 'edpb_02_2024' },
|
||||
EDPB_GUIDELINES_4_2019: { collection: 'bp_compliance_datenschutz', chunks: 202, qdrant_id: 'edpb_04_2019' },
|
||||
EDPB_GUIDELINES_9_2022: { collection: 'bp_compliance_datenschutz', chunks: 243, qdrant_id: 'edpb_09_2022' },
|
||||
EDPB_DPIA_LIST: { collection: 'bp_compliance_datenschutz', chunks: 29, qdrant_id: 'edpb_dpia_list' },
|
||||
EDPB_LEGITIMATE_INTEREST: { collection: 'bp_compliance_datenschutz', chunks: 672, qdrant_id: 'edpb_legitimate_interest' },
|
||||
EDPS_DPIA_LIST: { collection: 'bp_compliance_datenschutz', chunks: 73, qdrant_id: 'edps_dpia_list' },
|
||||
|
||||
// === EDPB Guidelines (bp_compliance_datenschutz) — neu (edpb-crawler.py) ===
|
||||
EDPB_ACCESS_01_2022: { collection: 'bp_compliance_datenschutz', chunks: 1020, qdrant_id: 'edpb_access_01_2022' },
|
||||
EDPB_ARTICLE48_02_2024: { collection: 'bp_compliance_datenschutz', chunks: 158, qdrant_id: 'edpb_article48_02_2024' },
|
||||
EDPB_BCR_01_2022: { collection: 'bp_compliance_datenschutz', chunks: 384, qdrant_id: 'edpb_bcr_01_2022' },
|
||||
EDPB_BREACH_09_2022: { collection: 'bp_compliance_datenschutz', chunks: 486, qdrant_id: 'edpb_breach_09_2022' },
|
||||
EDPB_CERTIFICATION_01_2018: { collection: 'bp_compliance_datenschutz', chunks: 160, qdrant_id: 'edpb_certification_01_2018' },
|
||||
EDPB_CERTIFICATION_01_2019: { collection: 'bp_compliance_datenschutz', chunks: 160, qdrant_id: 'edpb_certification_01_2019' },
|
||||
EDPB_CONNECTED_VEHICLES_01_2020: { collection: 'bp_compliance_datenschutz', chunks: 482, qdrant_id: 'edpb_connected_vehicles_01_2020' },
|
||||
EDPB_CONSENT_05_2020: { collection: 'bp_compliance_datenschutz', chunks: 247, qdrant_id: 'edpb_consent_05_2020' },
|
||||
EDPB_CONTROLLER_PROCESSOR_07_2020: { collection: 'bp_compliance_datenschutz', chunks: 694, qdrant_id: 'edpb_controller_processor_07_2020' },
|
||||
EDPB_COOKIE_TASKFORCE_2023: { collection: 'bp_compliance_datenschutz', chunks: 78, qdrant_id: 'edpb_cookie_taskforce_2023' },
|
||||
EDPB_DARK_PATTERNS_03_2022: { collection: 'bp_compliance_datenschutz', chunks: 413, qdrant_id: 'edpb_dark_patterns_03_2022' },
|
||||
EDPB_DPBD_04_2019: { collection: 'bp_compliance_datenschutz', chunks: 216, qdrant_id: 'edpb_dpbd_04_2019' },
|
||||
EDPB_DPIA_LIST_RECOMMENDATION: { collection: 'bp_compliance_datenschutz', chunks: 31, qdrant_id: 'edpb_dpia_list_recommendation' },
|
||||
EDPB_EPRIVACY_02_2023: { collection: 'bp_compliance_datenschutz', chunks: 188, qdrant_id: 'edpb_eprivacy_02_2023' },
|
||||
EDPB_FACIAL_RECOGNITION_05_2022: { collection: 'bp_compliance_datenschutz', chunks: 396, qdrant_id: 'edpb_facial_recognition_05_2022' },
|
||||
EDPB_FINES_04_2022: { collection: 'bp_compliance_datenschutz', chunks: 346, qdrant_id: 'edpb_fines_04_2022' },
|
||||
EDPB_GEOLOCATION_04_2020: { collection: 'bp_compliance_datenschutz', chunks: 108, qdrant_id: 'edpb_geolocation_04_2020' },
|
||||
EDPB_GL_2_2019: { collection: 'bp_compliance_datenschutz', chunks: 107, qdrant_id: 'edpb_gl_2_2019' },
|
||||
EDPB_HEALTH_DATA_03_2020: { collection: 'bp_compliance_datenschutz', chunks: 182, qdrant_id: 'edpb_health_data_03_2020' },
|
||||
EDPB_LEGAL_BASIS_02_2019: { collection: 'bp_compliance_datenschutz', chunks: 107, qdrant_id: 'edpb_legal_basis_02_2019' },
|
||||
EDPB_LEGITIMATE_INTEREST_01_2024: { collection: 'bp_compliance_datenschutz', chunks: 336, qdrant_id: 'edpb_legitimate_interest_01_2024' },
|
||||
EDPB_RTBF_05_2019: { collection: 'bp_compliance_datenschutz', chunks: 111, qdrant_id: 'edpb_rtbf_05_2019' },
|
||||
EDPB_RRO_09_2020: { collection: 'bp_compliance_datenschutz', chunks: 82, qdrant_id: 'edpb_rro_09_2020' },
|
||||
EDPB_SOCIAL_MEDIA_08_2020: { collection: 'bp_compliance_datenschutz', chunks: 333, qdrant_id: 'edpb_social_media_08_2020' },
|
||||
EDPB_TRANSFERS_01_2020: { collection: 'bp_compliance_datenschutz', chunks: 337, qdrant_id: 'edpb_transfers_01_2020' },
|
||||
EDPB_TRANSFERS_07_2020: { collection: 'bp_compliance_datenschutz', chunks: 337, qdrant_id: 'edpb_transfers_07_2020' },
|
||||
EDPB_VIDEO_03_2019: { collection: 'bp_compliance_datenschutz', chunks: 204, qdrant_id: 'edpb_video_03_2019' },
|
||||
EDPB_VVA_02_2021: { collection: 'bp_compliance_datenschutz', chunks: 273, qdrant_id: 'edpb_vva_02_2021' },
|
||||
|
||||
// === EDPS Guidance (bp_compliance_datenschutz) ===
|
||||
EDPS_DIGITAL_ETHICS_2018: { collection: 'bp_compliance_datenschutz', chunks: 404, qdrant_id: 'edps_digital_ethics_2018' },
|
||||
EDPS_GENAI_ORIENTATIONS_2024: { collection: 'bp_compliance_datenschutz', chunks: 274, qdrant_id: 'edps_genai_orientations_2024' },
|
||||
|
||||
// === WP29 Endorsed (bp_compliance_datenschutz) ===
|
||||
WP242_PORTABILITY: { collection: 'bp_compliance_datenschutz', chunks: 141, qdrant_id: 'wp242_portability' },
|
||||
WP243_DPO: { collection: 'bp_compliance_datenschutz', chunks: 54, qdrant_id: 'wp243_dpo' },
|
||||
WP244_PROFILING: { collection: 'bp_compliance_datenschutz', chunks: 247, qdrant_id: 'wp244_profiling' },
|
||||
WP248_DPIA: { collection: 'bp_compliance_datenschutz', chunks: 288, qdrant_id: 'wp248_dpia' },
|
||||
WP250_BREACH: { collection: 'bp_compliance_datenschutz', chunks: 201, qdrant_id: 'wp250_breach' },
|
||||
WP259_CONSENT: { collection: 'bp_compliance_datenschutz', chunks: 496, qdrant_id: 'wp259_consent' },
|
||||
WP260_TRANSPARENCY: { collection: 'bp_compliance_datenschutz', chunks: 558, qdrant_id: 'wp260_transparency' },
|
||||
|
||||
// === DSFA Muss-Listen (bp_dsfa_corpus) ===
|
||||
DSFA_BFDI_BUND: { collection: 'bp_dsfa_corpus', chunks: 17, qdrant_id: 'dsfa_bfdi_bund' },
|
||||
DSFA_DSK_GEMEINSAM: { collection: 'bp_dsfa_corpus', chunks: 35, qdrant_id: 'dsfa_dsk_gemeinsam' },
|
||||
DSFA_BW: { collection: 'bp_dsfa_corpus', chunks: 41, qdrant_id: 'dsfa_bw' },
|
||||
DSFA_BY: { collection: 'bp_dsfa_corpus', chunks: 35, qdrant_id: 'dsfa_by' },
|
||||
DSFA_BE_OE: { collection: 'bp_dsfa_corpus', chunks: 31, qdrant_id: 'dsfa_be_oe' },
|
||||
DSFA_BE_NOE: { collection: 'bp_dsfa_corpus', chunks: 48, qdrant_id: 'dsfa_be_noe' },
|
||||
DSFA_BB_OE: { collection: 'bp_dsfa_corpus', chunks: 43, qdrant_id: 'dsfa_bb_oe' },
|
||||
DSFA_BB_NOE: { collection: 'bp_dsfa_corpus', chunks: 53, qdrant_id: 'dsfa_bb_noe' },
|
||||
DSFA_HB: { collection: 'bp_dsfa_corpus', chunks: 44, qdrant_id: 'dsfa_hb' },
|
||||
DSFA_HH_OE: { collection: 'bp_dsfa_corpus', chunks: 58, qdrant_id: 'dsfa_hh_oe' },
|
||||
DSFA_HH_NOE: { collection: 'bp_dsfa_corpus', chunks: 53, qdrant_id: 'dsfa_hh_noe' },
|
||||
DSFA_MV: { collection: 'bp_dsfa_corpus', chunks: 32, qdrant_id: 'dsfa_mv' },
|
||||
DSFA_NI: { collection: 'bp_dsfa_corpus', chunks: 47, qdrant_id: 'dsfa_ni' },
|
||||
DSFA_RP: { collection: 'bp_dsfa_corpus', chunks: 25, qdrant_id: 'dsfa_rp' },
|
||||
DSFA_SL: { collection: 'bp_dsfa_corpus', chunks: 35, qdrant_id: 'dsfa_sl' },
|
||||
DSFA_SN: { collection: 'bp_dsfa_corpus', chunks: 18, qdrant_id: 'dsfa_sn' },
|
||||
DSFA_ST_OE: { collection: 'bp_dsfa_corpus', chunks: 57, qdrant_id: 'dsfa_st_oe' },
|
||||
DSFA_ST_NOE: { collection: 'bp_dsfa_corpus', chunks: 35, qdrant_id: 'dsfa_st_noe' },
|
||||
DSFA_SH: { collection: 'bp_dsfa_corpus', chunks: 44, qdrant_id: 'dsfa_sh' },
|
||||
DSFA_TH: { collection: 'bp_dsfa_corpus', chunks: 48, qdrant_id: 'dsfa_th' },
|
||||
}
|
||||
|
||||
/**
|
||||
* Minimal regulation info for sidebar display.
|
||||
* Full REGULATIONS array with descriptions remains in page.tsx.
|
||||
*/
|
||||
export interface RegulationInfo {
|
||||
code: string
|
||||
name: string
|
||||
type: string
|
||||
}
|
||||
|
||||
export const REGULATION_INFO: RegulationInfo[] = [
|
||||
// EU Verordnungen
|
||||
{ code: 'GDPR', name: 'DSGVO', type: 'eu_regulation' },
|
||||
{ code: 'EPRIVACY', name: 'ePrivacy-Richtlinie', type: 'eu_directive' },
|
||||
{ code: 'SCC', name: 'Standardvertragsklauseln', type: 'eu_regulation' },
|
||||
{ code: 'SCC_FULL_TEXT', name: 'SCC Volltext', type: 'eu_regulation' },
|
||||
{ code: 'DPF', name: 'EU-US Data Privacy Framework', type: 'eu_regulation' },
|
||||
{ code: 'AIACT', name: 'EU AI Act', type: 'eu_regulation' },
|
||||
{ code: 'CRA', name: 'Cyber Resilience Act', type: 'eu_regulation' },
|
||||
{ code: 'NIS2', name: 'NIS2-Richtlinie', type: 'eu_directive' },
|
||||
{ code: 'EUCSA', name: 'EU Cybersecurity Act', type: 'eu_regulation' },
|
||||
{ code: 'DATAACT', name: 'Data Act', type: 'eu_regulation' },
|
||||
{ code: 'DGA', name: 'Data Governance Act', type: 'eu_regulation' },
|
||||
{ code: 'DSA', name: 'Digital Services Act', type: 'eu_regulation' },
|
||||
{ code: 'DMA', name: 'Digital Markets Act', type: 'eu_regulation' },
|
||||
{ code: 'EAA', name: 'European Accessibility Act', type: 'eu_directive' },
|
||||
{ code: 'DSM', name: 'DSM-Urheberrechtsrichtlinie', type: 'eu_directive' },
|
||||
{ code: 'PLD', name: 'Produkthaftungsrichtlinie', type: 'eu_directive' },
|
||||
{ code: 'GPSR', name: 'General Product Safety', type: 'eu_regulation' },
|
||||
{ code: 'WARENKAUF_RL', name: 'Warenkauf-RL', type: 'eu_directive' },
|
||||
{ code: 'KLAUSEL_RL', name: 'Klausel-RL', type: 'eu_directive' },
|
||||
{ code: 'UNLAUTERE_PRAKTIKEN_RL', name: 'UGP-RL', type: 'eu_directive' },
|
||||
{ code: 'PREISANGABEN_RL', name: 'Preisangaben-RL', type: 'eu_directive' },
|
||||
{ code: 'OMNIBUS_RL', name: 'Omnibus-RL', type: 'eu_directive' },
|
||||
{ code: 'BATTERIE_VO', name: 'Batterieverordnung', type: 'eu_regulation' },
|
||||
{ code: 'E_COMMERCE_RL', name: 'E-Commerce-Richtlinie', type: 'eu_directive' },
|
||||
{ code: 'VERBRAUCHERRECHTE_RL', name: 'Verbraucherrechte-RL', type: 'eu_directive' },
|
||||
{ code: 'DIGITALE_INHALTE_RL', name: 'Digitale-Inhalte-RL', type: 'eu_directive' },
|
||||
// Financial
|
||||
{ code: 'DORA', name: 'DORA', type: 'eu_regulation' },
|
||||
{ code: 'PSD2', name: 'PSD2', type: 'eu_directive' },
|
||||
{ code: 'AMLR', name: 'AML-Verordnung', type: 'eu_regulation' },
|
||||
{ code: 'MiCA', name: 'MiCA', type: 'eu_regulation' },
|
||||
{ code: 'EHDS', name: 'EHDS', type: 'eu_regulation' },
|
||||
{ code: 'MACHINERY_REG', name: 'Maschinenverordnung', type: 'eu_regulation' },
|
||||
{ code: 'BLUE_GUIDE', name: 'Blue Guide', type: 'eu_regulation' },
|
||||
{ code: 'EU_IFRS_DE', name: 'EU-IFRS (DE)', type: 'eu_regulation' },
|
||||
{ code: 'EU_IFRS_EN', name: 'EU-IFRS (EN)', type: 'eu_regulation' },
|
||||
// DE Gesetze
|
||||
{ code: 'TDDDG', name: 'TDDDG', type: 'de_law' },
|
||||
{ code: 'TMG_KOMPLETT', name: 'TMG', type: 'de_law' },
|
||||
{ code: 'BDSG_FULL', name: 'BDSG', type: 'de_law' },
|
||||
{ code: 'DE_DDG', name: 'DDG', type: 'de_law' },
|
||||
{ code: 'DE_BGB_AGB', name: 'BGB/AGB', type: 'de_law' },
|
||||
{ code: 'DE_EGBGB', name: 'EGBGB', type: 'de_law' },
|
||||
{ code: 'DE_HGB_RET', name: 'HGB', type: 'de_law' },
|
||||
{ code: 'DE_AO_RET', name: 'AO', type: 'de_law' },
|
||||
{ code: 'DE_TKG', name: 'TKG', type: 'de_law' },
|
||||
{ code: 'DE_DLINFOV', name: 'DL-InfoV', type: 'de_law' },
|
||||
{ code: 'DE_BETRVG', name: 'BetrVG', type: 'de_law' },
|
||||
{ code: 'DE_GESCHGEHG', name: 'GeschGehG', type: 'de_law' },
|
||||
{ code: 'DE_USTG_RET', name: 'UStG', type: 'de_law' },
|
||||
{ code: 'DE_URHG', name: 'UrhG', type: 'de_law' },
|
||||
// DE Verbraucherschutz
|
||||
{ code: 'DE_PANGV', name: 'PAngV', type: 'de_law' },
|
||||
{ code: 'DE_VSBG', name: 'VSBG', type: 'de_law' },
|
||||
{ code: 'DE_PRODHAFTG', name: 'ProdHaftG', type: 'de_law' },
|
||||
{ code: 'DE_VERPACKG', name: 'VerpackG', type: 'de_law' },
|
||||
{ code: 'DE_ELEKTROG', name: 'ElektroG', type: 'de_law' },
|
||||
{ code: 'DE_BATTDG', name: 'BattDG', type: 'de_law' },
|
||||
{ code: 'DE_BFSG', name: 'BFSG', type: 'de_law' },
|
||||
{ code: 'DE_UWG', name: 'UWG', type: 'de_law' },
|
||||
{ code: 'DE_GEWO', name: 'GewO', type: 'de_law' },
|
||||
{ code: 'DE_BGB_AGB_305', name: 'BGB AGB-Recht §§305-310', type: 'de_law' },
|
||||
{ code: 'DE_BGB_FERNABSATZ', name: 'BGB Fernabsatz §§312-312k', type: 'de_law' },
|
||||
{ code: 'DE_BGB_KAUFRECHT', name: 'BGB Kaufrecht §§433-480', type: 'de_law' },
|
||||
{ code: 'DE_BGB_WIDERRUF', name: 'BGB Widerruf §§355-361', type: 'de_law' },
|
||||
{ code: 'DE_BGB_DIGITAL', name: 'BGB Digital §§327-327u', type: 'de_law' },
|
||||
{ code: 'DE_EGBGB_WIDERRUF', name: 'EGBGB Widerrufsbelehrung', type: 'de_law' },
|
||||
// BSI
|
||||
{ code: 'BSI-TR-03161-1', name: 'BSI-TR Teil 1', type: 'bsi_standard' },
|
||||
{ code: 'BSI-TR-03161-2', name: 'BSI-TR Teil 2', type: 'bsi_standard' },
|
||||
{ code: 'BSI-TR-03161-3', name: 'BSI-TR Teil 3', type: 'bsi_standard' },
|
||||
// AT
|
||||
{ code: 'AT_DSG', name: 'DSG Oesterreich', type: 'at_law' },
|
||||
{ code: 'AT_DSG_FULL', name: 'DSG Volltext', type: 'at_law' },
|
||||
{ code: 'AT_ECG', name: 'ECG', type: 'at_law' },
|
||||
{ code: 'AT_TKG', name: 'TKG AT', type: 'at_law' },
|
||||
{ code: 'AT_KSCHG', name: 'KSchG', type: 'at_law' },
|
||||
{ code: 'AT_FAGG', name: 'FAGG', type: 'at_law' },
|
||||
{ code: 'AT_UGB_RET', name: 'UGB', type: 'at_law' },
|
||||
{ code: 'AT_BAO_RET', name: 'BAO', type: 'at_law' },
|
||||
{ code: 'AT_MEDIENG', name: 'MedienG', type: 'at_law' },
|
||||
{ code: 'AT_ABGB_AGB', name: 'ABGB/AGB', type: 'at_law' },
|
||||
{ code: 'AT_UWG', name: 'UWG AT', type: 'at_law' },
|
||||
// CH
|
||||
{ code: 'CH_DSG', name: 'DSG Schweiz', type: 'ch_law' },
|
||||
{ code: 'CH_DSV', name: 'DSV', type: 'ch_law' },
|
||||
{ code: 'CH_OR_AGB', name: 'OR/AGB', type: 'ch_law' },
|
||||
{ code: 'CH_GEBUV', name: 'GeBuV', type: 'ch_law' },
|
||||
{ code: 'CH_ZERTES', name: 'ZertES', type: 'ch_law' },
|
||||
{ code: 'CH_ZGB_PERS', name: 'ZGB', type: 'ch_law' },
|
||||
// Andere EU nationale
|
||||
{ code: 'ES_LOPDGDD', name: 'LOPDGDD Spanien', type: 'national_law' },
|
||||
{ code: 'IT_CODICE_PRIVACY', name: 'Codice Privacy Italien', type: 'national_law' },
|
||||
{ code: 'NL_UAVG', name: 'UAVG Niederlande', type: 'national_law' },
|
||||
{ code: 'FR_CNIL_GUIDE', name: 'CNIL Guide RGPD', type: 'national_law' },
|
||||
{ code: 'IE_DPA_2018', name: 'DPA 2018 Ireland', type: 'national_law' },
|
||||
{ code: 'UK_DPA_2018', name: 'DPA 2018 UK', type: 'national_law' },
|
||||
{ code: 'UK_GDPR', name: 'UK GDPR', type: 'national_law' },
|
||||
{ code: 'NO_PERSONOPPLYSNINGSLOVEN', name: 'Personopplysningsloven', type: 'national_law' },
|
||||
{ code: 'SE_DATASKYDDSLAG', name: 'Dataskyddslag Schweden', type: 'national_law' },
|
||||
{ code: 'PL_UODO', name: 'UODO Polen', type: 'national_law' },
|
||||
{ code: 'CZ_ZOU', name: 'Zakon Tschechien', type: 'national_law' },
|
||||
{ code: 'HU_INFOTV', name: 'Infotv. Ungarn', type: 'national_law' },
|
||||
{ code: 'LU_DPA_LAW', name: 'Datenschutzgesetz Luxemburg', type: 'national_law' },
|
||||
// EDPB Guidelines (alt)
|
||||
{ code: 'EDPB_GUIDELINES_5_2020', name: 'EDPB GL Einwilligung', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_GUIDELINES_7_2020', name: 'EDPB GL C/P Konzepte', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_GUIDELINES_1_2020', name: 'EDPB GL Fahrzeuge', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_GUIDELINES_1_2022', name: 'EDPB GL Bussgelder', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_GUIDELINES_2_2023', name: 'EDPB GL Art. 37 Scope', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_GUIDELINES_2_2024', name: 'EDPB GL Art. 48', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_GUIDELINES_4_2019', name: 'EDPB GL Art. 25 DPbD', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_GUIDELINES_9_2022', name: 'EDPB GL Datenschutzverletzung', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_DPIA_LIST', name: 'EDPB DPIA-Liste', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_LEGITIMATE_INTEREST', name: 'EDPB Berecht. Interesse', type: 'eu_guideline' },
|
||||
{ code: 'EDPS_DPIA_LIST', name: 'EDPS DPIA-Liste', type: 'eu_guideline' },
|
||||
// EDPB Guidelines (neu — Crawler)
|
||||
{ code: 'EDPB_ACCESS_01_2022', name: 'EDPB GL Auskunftsrecht', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_ARTICLE48_02_2024', name: 'EDPB GL Art. 48', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_BCR_01_2022', name: 'EDPB GL BCR', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_BREACH_09_2022', name: 'EDPB GL Datenpannen', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_CERTIFICATION_01_2018', name: 'EDPB GL Zertifizierung', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_CERTIFICATION_01_2019', name: 'EDPB GL Zertifizierung 2019', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_CONNECTED_VEHICLES_01_2020', name: 'EDPB GL Vernetzte Fahrzeuge', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_CONSENT_05_2020', name: 'EDPB GL Consent', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_CONTROLLER_PROCESSOR_07_2020', name: 'EDPB GL Verantwortliche/Auftragsverarbeiter', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_COOKIE_TASKFORCE_2023', name: 'EDPB Cookie-Banner Taskforce', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_DARK_PATTERNS_03_2022', name: 'EDPB GL Dark Patterns', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_DPBD_04_2019', name: 'EDPB GL Data Protection by Design', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_DPIA_LIST_RECOMMENDATION', name: 'EDPB DPIA-Empfehlung', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_EPRIVACY_02_2023', name: 'EDPB GL ePrivacy', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_FACIAL_RECOGNITION_05_2022', name: 'EDPB GL Gesichtserkennung', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_FINES_04_2022', name: 'EDPB GL Bussgeldberechnung', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_GEOLOCATION_04_2020', name: 'EDPB GL Geolokalisierung', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_GL_2_2019', name: 'EDPB GL Video-Ueberwachung', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_HEALTH_DATA_03_2020', name: 'EDPB GL Gesundheitsdaten', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_LEGAL_BASIS_02_2019', name: 'EDPB GL Rechtsgrundlage Art. 6(1)(b)', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_LEGITIMATE_INTEREST_01_2024', name: 'EDPB GL Berecht. Interesse 2024', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_RTBF_05_2019', name: 'EDPB GL Recht auf Vergessenwerden', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_RRO_09_2020', name: 'EDPB GL Relevant & Reasoned Objection', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_SOCIAL_MEDIA_08_2020', name: 'EDPB GL Social Media Targeting', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_TRANSFERS_01_2020', name: 'EDPB GL Uebermittlungen Art. 49', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_TRANSFERS_07_2020', name: 'EDPB GL Drittlandtransfers', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_VIDEO_03_2019', name: 'EDPB GL Videoueberwachung', type: 'eu_guideline' },
|
||||
{ code: 'EDPB_VVA_02_2021', name: 'EDPB GL Virtuelle Sprachassistenten', type: 'eu_guideline' },
|
||||
// EDPS
|
||||
{ code: 'EDPS_DIGITAL_ETHICS_2018', name: 'EDPS Digitale Ethik', type: 'eu_guideline' },
|
||||
{ code: 'EDPS_GENAI_ORIENTATIONS_2024', name: 'EDPS GenAI Orientierungen', type: 'eu_guideline' },
|
||||
// WP29 Endorsed
|
||||
{ code: 'WP242_PORTABILITY', name: 'WP242 Datenportabilitaet', type: 'wp29_endorsed' },
|
||||
{ code: 'WP243_DPO', name: 'WP243 Datenschutzbeauftragter', type: 'wp29_endorsed' },
|
||||
{ code: 'WP244_PROFILING', name: 'WP244 Profiling', type: 'wp29_endorsed' },
|
||||
{ code: 'WP248_DPIA', name: 'WP248 DSFA', type: 'wp29_endorsed' },
|
||||
{ code: 'WP250_BREACH', name: 'WP250 Datenpannen', type: 'wp29_endorsed' },
|
||||
{ code: 'WP259_CONSENT', name: 'WP259 Einwilligung', type: 'wp29_endorsed' },
|
||||
{ code: 'WP260_TRANSPARENCY', name: 'WP260 Transparenz', type: 'wp29_endorsed' },
|
||||
// DSFA Muss-Listen
|
||||
{ code: 'DSFA_BFDI_BUND', name: 'DSFA BfDI Bund', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_DSK_GEMEINSAM', name: 'DSFA DSK Gemeinsam', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_BW', name: 'DSFA Baden-Wuerttemberg', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_BY', name: 'DSFA Bayern', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_BE_OE', name: 'DSFA Berlin oeffentlich', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_BE_NOE', name: 'DSFA Berlin nicht-oeffentlich', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_BB_OE', name: 'DSFA Brandenburg oeffentlich', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_BB_NOE', name: 'DSFA Brandenburg nicht-oeffentlich', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_HB', name: 'DSFA Bremen', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_HH_OE', name: 'DSFA Hamburg oeffentlich', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_HH_NOE', name: 'DSFA Hamburg nicht-oeffentlich', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_MV', name: 'DSFA Mecklenburg-Vorpommern', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_NI', name: 'DSFA Niedersachsen', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_RP', name: 'DSFA Rheinland-Pfalz', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_SL', name: 'DSFA Saarland', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_SN', name: 'DSFA Sachsen', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_ST_OE', name: 'DSFA Sachsen-Anhalt oeffentlich', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_ST_NOE', name: 'DSFA Sachsen-Anhalt nicht-oeffentlich', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_SH', name: 'DSFA Schleswig-Holstein', type: 'dsfa_mussliste' },
|
||||
{ code: 'DSFA_TH', name: 'DSFA Thueringen', type: 'dsfa_mussliste' },
|
||||
// International Standards
|
||||
{ code: 'NIST_SSDF', name: 'NIST SSDF', type: 'international_standard' },
|
||||
{ code: 'NIST_CSF_2', name: 'NIST CSF 2.0', type: 'international_standard' },
|
||||
{ code: 'OECD_AI_PRINCIPLES', name: 'OECD AI Principles', type: 'international_standard' },
|
||||
{ code: 'ENISA_SECURE_BY_DESIGN', name: 'CISA Secure by Design', type: 'international_standard' },
|
||||
{ code: 'ENISA_SUPPLY_CHAIN', name: 'ENISA Supply Chain', type: 'international_standard' },
|
||||
{ code: 'ENISA_THREAT_LANDSCAPE', name: 'ENISA Threat Landscape', type: 'international_standard' },
|
||||
{ code: 'ENISA_ICS_SCADA', name: 'ENISA ICS/SCADA', type: 'international_standard' },
|
||||
{ code: 'ENISA_CYBERSECURITY_2024', name: 'ENISA Cybersecurity 2024', type: 'international_standard' },
|
||||
]
|
||||
@@ -1430,7 +1430,6 @@ export default function TestQualityPage() {
|
||||
databases: ['Qdrant', 'PostgreSQL'],
|
||||
}}
|
||||
relatedPages={[
|
||||
{ name: 'LLM Vergleich', href: '/ai/llm-compare', description: 'Provider-Vergleich' },
|
||||
{ name: 'GPU Infrastruktur', href: '/ai/gpu', description: 'GPU-Ressourcen verwalten' },
|
||||
{ name: 'RAG Management', href: '/ai/rag', description: 'Training Data & RAG Pipelines' },
|
||||
]}
|
||||
|
||||
@@ -141,7 +141,6 @@ export default function VoiceMatrixPage() {
|
||||
}}
|
||||
relatedPages={[
|
||||
{ name: 'Matrix & Jitsi', href: '/communication/matrix', description: 'Kommunikation Monitoring' },
|
||||
{ name: 'LLM Vergleich', href: '/ai/llm-compare', description: 'KI-Provider vergleichen' },
|
||||
{ name: 'GPU Infrastruktur', href: '/infrastructure/gpu', description: 'GPU fuer Voice-Service' },
|
||||
]}
|
||||
collapsible={true}
|
||||
|
||||
@@ -24,7 +24,6 @@ export default function DevelopmentPage() {
|
||||
}}
|
||||
relatedPages={[
|
||||
{ name: 'GPU Infrastruktur', href: '/infrastructure/gpu', description: 'GPU fuer Voice/Game' },
|
||||
{ name: 'LLM Vergleich', href: '/ai/llm-compare', description: 'LLM fuer Voice/Game' },
|
||||
]}
|
||||
collapsible={true}
|
||||
defaultCollapsed={false}
|
||||
|
||||
@@ -149,7 +149,6 @@ const ADMIN_SCREENS: ScreenDefinition[] = [
|
||||
{ id: 'admin-obligations', name: 'Pflichten', description: 'NIS2, DSGVO, AI Act', category: 'sdk', icon: '⚡', url: '/sdk/obligations' },
|
||||
|
||||
// === KI & AUTOMATISIERUNG (Teal #14b8a6) ===
|
||||
{ id: 'admin-llm-compare', name: 'LLM Vergleich', description: 'KI-Provider Vergleich', category: 'ai', icon: '🤖', url: '/ai/llm-compare' },
|
||||
{ id: 'admin-rag', name: 'Daten & RAG', description: 'Training Data & RAG', category: 'ai', icon: '🗄️', url: '/ai/rag' },
|
||||
{ id: 'admin-ocr-labeling', name: 'OCR-Labeling', description: 'Handschrift-Training', category: 'ai', icon: '✍️', url: '/ai/ocr-labeling' },
|
||||
{ id: 'admin-magic-help', name: 'Magic Help', description: 'TrOCR Handschrift-OCR', category: 'ai', icon: '🪄', url: '/ai/magic-help' },
|
||||
@@ -196,7 +195,6 @@ const ADMIN_CONNECTIONS: ConnectionDef[] = [
|
||||
{ source: 'admin-dashboard', target: 'admin-backlog', label: 'Go-Live' },
|
||||
{ source: 'admin-dashboard', target: 'admin-compliance-hub', label: 'Compliance' },
|
||||
{ source: 'admin-onboarding', target: 'admin-consent' },
|
||||
{ source: 'admin-onboarding', target: 'admin-llm-compare' },
|
||||
{ source: 'admin-rbac', target: 'admin-consent' },
|
||||
|
||||
// === DSGVO FLOW ===
|
||||
@@ -224,7 +222,6 @@ const ADMIN_CONNECTIONS: ConnectionDef[] = [
|
||||
{ source: 'admin-dsms', target: 'admin-compliance-workflow' },
|
||||
|
||||
// === KI & AUTOMATISIERUNG FLOW ===
|
||||
{ source: 'admin-llm-compare', target: 'admin-rag', label: 'Daten' },
|
||||
{ source: 'admin-rag', target: 'admin-quality' },
|
||||
{ source: 'admin-rag', target: 'admin-agents' },
|
||||
{ source: 'admin-ocr-labeling', target: 'admin-magic-help', label: 'Training' },
|
||||
|
||||
@@ -1,665 +0,0 @@
|
||||
'use client'
|
||||
|
||||
import { useState, useEffect } from 'react'
|
||||
import {
|
||||
GitBranch,
|
||||
Terminal,
|
||||
Server,
|
||||
Database,
|
||||
CheckCircle2,
|
||||
ArrowRight,
|
||||
Laptop,
|
||||
HardDrive,
|
||||
RefreshCw,
|
||||
Clock,
|
||||
Shield,
|
||||
Users,
|
||||
FileCode,
|
||||
Play,
|
||||
Eye,
|
||||
Download,
|
||||
AlertTriangle,
|
||||
Info,
|
||||
Container
|
||||
} from 'lucide-react'
|
||||
|
||||
interface WorkflowStep {
|
||||
id: number
|
||||
title: string
|
||||
description: string
|
||||
command?: string
|
||||
icon: React.ReactNode
|
||||
location: 'macbook' | 'macmini'
|
||||
}
|
||||
|
||||
interface BackupInfo {
|
||||
lastRun: string | null
|
||||
nextRun: string
|
||||
status: 'ok' | 'warning' | 'error'
|
||||
}
|
||||
|
||||
export default function WorkflowPage() {
|
||||
const [activeStep, setActiveStep] = useState<number>(1)
|
||||
const [backupInfo, setBackupInfo] = useState<BackupInfo>({
|
||||
lastRun: null,
|
||||
nextRun: '02:00 Uhr',
|
||||
status: 'ok'
|
||||
})
|
||||
|
||||
const workflowSteps: WorkflowStep[] = [
|
||||
{
|
||||
id: 1,
|
||||
title: 'Code bearbeiten',
|
||||
description: 'Arbeite mit Claude Code im Terminal. Beschreibe was du brauchst und Claude schreibt den Code.',
|
||||
command: 'claude',
|
||||
icon: <Terminal className="h-6 w-6" />,
|
||||
location: 'macbook'
|
||||
},
|
||||
{
|
||||
id: 2,
|
||||
title: 'Änderungen stagen',
|
||||
description: 'Füge die geänderten Dateien zum nächsten Commit hinzu.',
|
||||
command: 'git add <dateien>',
|
||||
icon: <FileCode className="h-6 w-6" />,
|
||||
location: 'macbook'
|
||||
},
|
||||
{
|
||||
id: 3,
|
||||
title: 'Commit erstellen',
|
||||
description: 'Erstelle einen Commit mit einer aussagekräftigen Nachricht.',
|
||||
command: 'git commit -m "feat: neue Funktion"',
|
||||
icon: <GitBranch className="h-6 w-6" />,
|
||||
location: 'macbook'
|
||||
},
|
||||
{
|
||||
id: 4,
|
||||
title: 'Push zum Server',
|
||||
description: 'Sende die Änderungen an den Mac Mini. Dies startet automatisch die CI/CD Pipeline.',
|
||||
command: 'git push origin main',
|
||||
icon: <ArrowRight className="h-6 w-6" />,
|
||||
location: 'macbook'
|
||||
},
|
||||
{
|
||||
id: 5,
|
||||
title: 'CI/CD Pipeline',
|
||||
description: 'Woodpecker führt automatisch Tests aus und baut die Container.',
|
||||
command: '(automatisch)',
|
||||
icon: <RefreshCw className="h-6 w-6" />,
|
||||
location: 'macmini'
|
||||
},
|
||||
{
|
||||
id: 6,
|
||||
title: 'Integration Tests',
|
||||
description: 'Docker Compose Test-Umgebung mit Backend, DB und Consent-Service fuer vollstaendige E2E-Tests.',
|
||||
command: 'docker compose -f docker-compose.test.yml up -d',
|
||||
icon: <Container className="h-6 w-6" />,
|
||||
location: 'macmini'
|
||||
},
|
||||
{
|
||||
id: 7,
|
||||
title: 'Frontend testen',
|
||||
description: 'Teste die Änderungen im Browser auf dem Mac Mini.',
|
||||
command: 'http://macmini:3000',
|
||||
icon: <Eye className="h-6 w-6" />,
|
||||
location: 'macbook'
|
||||
}
|
||||
]
|
||||
|
||||
const services = [
|
||||
{ name: 'Website', url: 'http://macmini:3000', port: 3000, status: 'running' },
|
||||
{ name: 'Admin v2', url: 'http://macmini:3002', port: 3002, status: 'running' },
|
||||
{ name: 'Studio v2', url: 'http://macmini:3001', port: 3001, status: 'running' },
|
||||
{ name: 'Backend', url: 'http://macmini:8000', port: 8000, status: 'running' },
|
||||
{ name: 'Gitea', url: 'http://macmini:3003', port: 3003, status: 'running' },
|
||||
{ name: 'Klausur-Service', url: 'http://macmini:8086', port: 8086, status: 'running' },
|
||||
]
|
||||
|
||||
const commitTypes = [
|
||||
{ type: 'feat:', description: 'Neue Funktion', example: 'feat: add user login' },
|
||||
{ type: 'fix:', description: 'Bugfix', example: 'fix: resolve login timeout' },
|
||||
{ type: 'docs:', description: 'Dokumentation', example: 'docs: update API docs' },
|
||||
{ type: 'style:', description: 'Formatierung', example: 'style: fix indentation' },
|
||||
{ type: 'refactor:', description: 'Code-Umbau', example: 'refactor: extract helper' },
|
||||
{ type: 'test:', description: 'Tests', example: 'test: add unit tests' },
|
||||
{ type: 'chore:', description: 'Wartung', example: 'chore: update deps' },
|
||||
]
|
||||
|
||||
return (
|
||||
<div className="space-y-8">
|
||||
{/* Header */}
|
||||
<div className="bg-gradient-to-r from-indigo-600 to-purple-600 rounded-2xl p-8 text-white">
|
||||
<h1 className="text-3xl font-bold mb-2">Entwicklungs-Workflow</h1>
|
||||
<p className="text-indigo-100">
|
||||
Wie wir bei BreakPilot entwickeln - von der Idee bis zum Deployment
|
||||
</p>
|
||||
</div>
|
||||
|
||||
{/* Architecture Overview */}
|
||||
<div className="bg-white rounded-xl border border-slate-200 p-6">
|
||||
<h2 className="text-xl font-semibold text-slate-900 mb-4 flex items-center gap-2">
|
||||
<Server className="h-5 w-5 text-indigo-600" />
|
||||
Systemarchitektur
|
||||
</h2>
|
||||
|
||||
<div className="grid grid-cols-1 lg:grid-cols-2 gap-6">
|
||||
{/* MacBook */}
|
||||
<div className="bg-slate-50 rounded-xl p-5 border-2 border-slate-200">
|
||||
<div className="flex items-center gap-3 mb-4">
|
||||
<div className="p-2 bg-blue-100 rounded-lg">
|
||||
<Laptop className="h-6 w-6 text-blue-600" />
|
||||
</div>
|
||||
<div>
|
||||
<h3 className="font-semibold text-slate-900">MacBook (Entwicklung)</h3>
|
||||
<p className="text-sm text-slate-500">Dein Arbeitsplatz</p>
|
||||
</div>
|
||||
</div>
|
||||
<ul className="space-y-2 text-sm">
|
||||
<li className="flex items-center gap-2">
|
||||
<CheckCircle2 className="h-4 w-4 text-green-500" />
|
||||
<span>Terminal + Claude Code</span>
|
||||
</li>
|
||||
<li className="flex items-center gap-2">
|
||||
<CheckCircle2 className="h-4 w-4 text-green-500" />
|
||||
<span>Lokales Git Repository</span>
|
||||
</li>
|
||||
<li className="flex items-center gap-2">
|
||||
<CheckCircle2 className="h-4 w-4 text-green-500" />
|
||||
<span>Browser für Frontend-Tests</span>
|
||||
</li>
|
||||
<li className="flex items-center gap-2">
|
||||
<AlertTriangle className="h-4 w-4 text-amber-500" />
|
||||
<span>Backup manuell (MacBook nachts aus)</span>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
{/* Mac Mini */}
|
||||
<div className="bg-slate-50 rounded-xl p-5 border-2 border-indigo-200">
|
||||
<div className="flex items-center gap-3 mb-4">
|
||||
<div className="p-2 bg-indigo-100 rounded-lg">
|
||||
<HardDrive className="h-6 w-6 text-indigo-600" />
|
||||
</div>
|
||||
<div>
|
||||
<h3 className="font-semibold text-slate-900">Mac Mini (Server)</h3>
|
||||
<p className="text-sm text-slate-500">192.168.178.100</p>
|
||||
</div>
|
||||
</div>
|
||||
<ul className="space-y-2 text-sm">
|
||||
<li className="flex items-center gap-2">
|
||||
<CheckCircle2 className="h-4 w-4 text-green-500" />
|
||||
<span>Gitea (Git Server)</span>
|
||||
</li>
|
||||
<li className="flex items-center gap-2">
|
||||
<CheckCircle2 className="h-4 w-4 text-green-500" />
|
||||
<span>Woodpecker (CI/CD)</span>
|
||||
</li>
|
||||
<li className="flex items-center gap-2">
|
||||
<CheckCircle2 className="h-4 w-4 text-green-500" />
|
||||
<span>Docker Container (alle Services)</span>
|
||||
</li>
|
||||
<li className="flex items-center gap-2">
|
||||
<CheckCircle2 className="h-4 w-4 text-green-500" />
|
||||
<span>PostgreSQL Datenbank</span>
|
||||
</li>
|
||||
<li className="flex items-center gap-2">
|
||||
<CheckCircle2 className="h-4 w-4 text-green-500" />
|
||||
<span>Automatisches Backup (02:00 Uhr lokal)</span>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Workflow Steps */}
|
||||
<div className="bg-white rounded-xl border border-slate-200 p-6">
|
||||
<h2 className="text-xl font-semibold text-slate-900 mb-6 flex items-center gap-2">
|
||||
<Play className="h-5 w-5 text-indigo-600" />
|
||||
Entwicklungs-Schritte
|
||||
</h2>
|
||||
|
||||
<div className="space-y-4">
|
||||
{workflowSteps.map((step, index) => (
|
||||
<div
|
||||
key={step.id}
|
||||
className={`relative flex items-start gap-4 p-4 rounded-xl transition-all cursor-pointer ${
|
||||
activeStep === step.id
|
||||
? 'bg-indigo-50 border-2 border-indigo-300'
|
||||
: 'bg-slate-50 border-2 border-transparent hover:border-slate-200'
|
||||
}`}
|
||||
onClick={() => setActiveStep(step.id)}
|
||||
>
|
||||
{/* Step Number */}
|
||||
<div className={`flex-shrink-0 w-10 h-10 rounded-full flex items-center justify-center font-bold ${
|
||||
activeStep === step.id
|
||||
? 'bg-indigo-600 text-white'
|
||||
: 'bg-slate-200 text-slate-600'
|
||||
}`}>
|
||||
{step.id}
|
||||
</div>
|
||||
|
||||
{/* Content */}
|
||||
<div className="flex-grow">
|
||||
<div className="flex items-center gap-2 mb-1">
|
||||
<h3 className="font-semibold text-slate-900">{step.title}</h3>
|
||||
<span className={`text-xs px-2 py-0.5 rounded-full ${
|
||||
step.location === 'macbook'
|
||||
? 'bg-blue-100 text-blue-700'
|
||||
: 'bg-purple-100 text-purple-700'
|
||||
}`}>
|
||||
{step.location === 'macbook' ? 'MacBook' : 'Mac Mini'}
|
||||
</span>
|
||||
</div>
|
||||
<p className="text-sm text-slate-600 mb-2">{step.description}</p>
|
||||
{step.command && (
|
||||
<code className="text-xs bg-slate-800 text-green-400 px-3 py-1.5 rounded-lg font-mono">
|
||||
{step.command}
|
||||
</code>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Icon */}
|
||||
<div className={`flex-shrink-0 p-2 rounded-lg ${
|
||||
activeStep === step.id ? 'bg-indigo-100 text-indigo-600' : 'bg-slate-100 text-slate-400'
|
||||
}`}>
|
||||
{step.icon}
|
||||
</div>
|
||||
|
||||
{/* Connector Line */}
|
||||
{index < workflowSteps.length - 1 && (
|
||||
<div className="absolute left-9 top-14 w-0.5 h-8 bg-slate-200" />
|
||||
)}
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Services & URLs */}
|
||||
<div className="bg-white rounded-xl border border-slate-200 p-6">
|
||||
<h2 className="text-xl font-semibold text-slate-900 mb-4 flex items-center gap-2">
|
||||
<Eye className="h-5 w-5 text-indigo-600" />
|
||||
Services & URLs zum Testen
|
||||
</h2>
|
||||
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-3">
|
||||
{services.map((service) => (
|
||||
<a
|
||||
key={service.name}
|
||||
href={service.url}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="flex items-center justify-between p-4 bg-slate-50 rounded-lg hover:bg-slate-100 transition-colors border border-slate-200"
|
||||
>
|
||||
<div>
|
||||
<h3 className="font-medium text-slate-900">{service.name}</h3>
|
||||
<p className="text-sm text-slate-500">Port {service.port}</p>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="w-2 h-2 bg-green-500 rounded-full animate-pulse" />
|
||||
<ArrowRight className="h-4 w-4 text-slate-400" />
|
||||
</div>
|
||||
</a>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Commit Convention */}
|
||||
<div className="bg-white rounded-xl border border-slate-200 p-6">
|
||||
<h2 className="text-xl font-semibold text-slate-900 mb-4 flex items-center gap-2">
|
||||
<GitBranch className="h-5 w-5 text-indigo-600" />
|
||||
Commit-Konventionen
|
||||
</h2>
|
||||
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 xl:grid-cols-4 gap-3">
|
||||
{commitTypes.map((item) => (
|
||||
<div key={item.type} className="bg-slate-50 rounded-lg p-3 border border-slate-200">
|
||||
<code className="text-sm font-bold text-indigo-600">{item.type}</code>
|
||||
<p className="text-sm text-slate-600 mt-1">{item.description}</p>
|
||||
<p className="text-xs text-slate-400 mt-1 font-mono">{item.example}</p>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Backup Info */}
|
||||
<div className="bg-white rounded-xl border border-slate-200 p-6">
|
||||
<h2 className="text-xl font-semibold text-slate-900 mb-4 flex items-center gap-2">
|
||||
<Shield className="h-5 w-5 text-indigo-600" />
|
||||
Backup & Sicherheit
|
||||
</h2>
|
||||
|
||||
<div className="grid grid-cols-1 md:grid-cols-3 gap-6">
|
||||
{/* Mac Mini - Automatisches lokales Backup */}
|
||||
<div className="bg-green-50 rounded-xl p-5 border border-green-200">
|
||||
<div className="flex items-center gap-3 mb-3">
|
||||
<Clock className="h-5 w-5 text-green-600" />
|
||||
<h3 className="font-semibold text-green-900">Mac Mini (Auto)</h3>
|
||||
</div>
|
||||
<ul className="space-y-2 text-sm text-green-800">
|
||||
<li>• Automatisch um 02:00 Uhr</li>
|
||||
<li>• PostgreSQL-Dump lokal</li>
|
||||
<li>• Git Repository gesichert</li>
|
||||
<li>• 7 Tage Aufbewahrung</li>
|
||||
</ul>
|
||||
<div className="mt-4 p-3 bg-green-100 rounded-lg">
|
||||
<code className="text-xs text-green-700 font-mono">
|
||||
~/Projekte/backup-logs/
|
||||
</code>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* MacBook - Manuelles Backup */}
|
||||
<div className="bg-amber-50 rounded-xl p-5 border border-amber-200">
|
||||
<div className="flex items-center gap-3 mb-3">
|
||||
<AlertTriangle className="h-5 w-5 text-amber-600" />
|
||||
<h3 className="font-semibold text-amber-900">MacBook (Manuell)</h3>
|
||||
</div>
|
||||
<ul className="space-y-2 text-sm text-amber-800">
|
||||
<li>• MacBook nachts aus (02:00)</li>
|
||||
<li>• Keine Auto-Synchronisation</li>
|
||||
<li>• Backup manuell anstoßen</li>
|
||||
</ul>
|
||||
<div className="mt-4 p-3 bg-amber-100 rounded-lg">
|
||||
<code className="text-xs text-amber-700 font-mono">
|
||||
rsync -avz macmini:~/Projekte/ ~/Projekte/
|
||||
</code>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Manuelles Backup starten */}
|
||||
<div className="bg-blue-50 rounded-xl p-5 border border-blue-200">
|
||||
<div className="flex items-center gap-3 mb-3">
|
||||
<Download className="h-5 w-5 text-blue-600" />
|
||||
<h3 className="font-semibold text-blue-900">Backup Script</h3>
|
||||
</div>
|
||||
<p className="text-sm text-blue-800 mb-3">
|
||||
Backup jederzeit manuell starten:
|
||||
</p>
|
||||
<code className="block text-xs bg-slate-800 text-green-400 p-3 rounded-lg font-mono">
|
||||
~/Projekte/breakpilot-pwa/scripts/daily-backup.sh
|
||||
</code>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Quick Commands */}
|
||||
<div className="bg-slate-800 rounded-xl p-6 text-white">
|
||||
<h2 className="text-xl font-semibold mb-4 flex items-center gap-2">
|
||||
<Terminal className="h-5 w-5 text-green-400" />
|
||||
Wichtige Befehle
|
||||
</h2>
|
||||
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 gap-4 font-mono text-sm">
|
||||
<div className="bg-slate-900 rounded-lg p-4">
|
||||
<p className="text-slate-400 mb-2"># CI/CD Logs ansehen</p>
|
||||
<code className="text-green-400">ssh macmini "docker logs breakpilot-pwa-backend --tail 50"</code>
|
||||
</div>
|
||||
<div className="bg-slate-900 rounded-lg p-4">
|
||||
<p className="text-slate-400 mb-2"># Container neu starten</p>
|
||||
<code className="text-green-400">ssh macmini "docker compose restart backend"</code>
|
||||
</div>
|
||||
<div className="bg-slate-900 rounded-lg p-4">
|
||||
<p className="text-slate-400 mb-2"># Alle Container Status</p>
|
||||
<code className="text-green-400">ssh macmini "docker ps"</code>
|
||||
</div>
|
||||
<div className="bg-slate-900 rounded-lg p-4">
|
||||
<p className="text-slate-400 mb-2"># Pipeline Status (Gitea)</p>
|
||||
<code className="text-green-400">open http://macmini:3003</code>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Team Workflow with Feature Branches */}
|
||||
<div className="bg-indigo-50 rounded-xl border border-indigo-200 p-6">
|
||||
<h2 className="text-xl font-semibold text-indigo-900 mb-4 flex items-center gap-2">
|
||||
<GitBranch className="h-5 w-5 text-indigo-600" />
|
||||
Team-Workflow (3+ Entwickler)
|
||||
</h2>
|
||||
|
||||
<div className="bg-white rounded-xl p-5 mb-4">
|
||||
<h3 className="font-semibold text-slate-900 mb-3">Feature Branch Workflow</h3>
|
||||
<div className="flex flex-wrap items-center gap-2 text-sm">
|
||||
<code className="bg-slate-100 px-2 py-1 rounded">main</code>
|
||||
<ArrowRight className="h-4 w-4 text-slate-400" />
|
||||
<code className="bg-blue-100 text-blue-700 px-2 py-1 rounded">feature/neue-funktion</code>
|
||||
<ArrowRight className="h-4 w-4 text-slate-400" />
|
||||
<span className="text-slate-600">Entwicklung</span>
|
||||
<ArrowRight className="h-4 w-4 text-slate-400" />
|
||||
<span className="bg-purple-100 text-purple-700 px-2 py-1 rounded">Pull Request</span>
|
||||
<ArrowRight className="h-4 w-4 text-slate-400" />
|
||||
<span className="bg-green-100 text-green-700 px-2 py-1 rounded">Code Review</span>
|
||||
<ArrowRight className="h-4 w-4 text-slate-400" />
|
||||
<code className="bg-slate-100 px-2 py-1 rounded">main</code>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 gap-4">
|
||||
<div className="bg-white rounded-lg p-4 border border-indigo-100">
|
||||
<h4 className="font-medium text-slate-900 mb-2">1. Feature Branch erstellen</h4>
|
||||
<code className="block text-xs bg-slate-800 text-green-400 p-2 rounded font-mono">
|
||||
git checkout -b feature/mein-feature
|
||||
</code>
|
||||
</div>
|
||||
<div className="bg-white rounded-lg p-4 border border-indigo-100">
|
||||
<h4 className="font-medium text-slate-900 mb-2">2. Änderungen committen</h4>
|
||||
<code className="block text-xs bg-slate-800 text-green-400 p-2 rounded font-mono">
|
||||
git commit -m "feat: beschreibung"
|
||||
</code>
|
||||
</div>
|
||||
<div className="bg-white rounded-lg p-4 border border-indigo-100">
|
||||
<h4 className="font-medium text-slate-900 mb-2">3. Branch pushen</h4>
|
||||
<code className="block text-xs bg-slate-800 text-green-400 p-2 rounded font-mono">
|
||||
git push -u origin feature/mein-feature
|
||||
</code>
|
||||
</div>
|
||||
<div className="bg-white rounded-lg p-4 border border-indigo-100">
|
||||
<h4 className="font-medium text-slate-900 mb-2">4. Pull Request in Gitea</h4>
|
||||
<code className="block text-xs bg-slate-800 text-green-400 p-2 rounded font-mono">
|
||||
http://macmini:3003 → Pull Request
|
||||
</code>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="mt-4 p-4 bg-indigo-100 rounded-lg">
|
||||
<h4 className="font-medium text-indigo-900 mb-2">Branch-Namenskonvention</h4>
|
||||
<div className="grid grid-cols-2 md:grid-cols-4 gap-2 text-sm">
|
||||
<div><code className="text-indigo-700">feature/</code> Neue Funktion</div>
|
||||
<div><code className="text-indigo-700">fix/</code> Bugfix</div>
|
||||
<div><code className="text-indigo-700">hotfix/</code> Dringender Fix</div>
|
||||
<div><code className="text-indigo-700">refactor/</code> Code-Umbau</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Team Rules */}
|
||||
<div className="bg-amber-50 rounded-xl border border-amber-200 p-6">
|
||||
<h2 className="text-xl font-semibold text-amber-900 mb-4 flex items-center gap-2">
|
||||
<Users className="h-5 w-5 text-amber-600" />
|
||||
Team-Regeln
|
||||
</h2>
|
||||
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 gap-4">
|
||||
<div className="flex items-start gap-3">
|
||||
<CheckCircle2 className="h-5 w-5 text-green-600 flex-shrink-0 mt-0.5" />
|
||||
<div>
|
||||
<h3 className="font-medium text-slate-900">Feature Branches nutzen</h3>
|
||||
<p className="text-sm text-slate-600">Nie direkt auf main pushen - immer über Pull Request</p>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-start gap-3">
|
||||
<CheckCircle2 className="h-5 w-5 text-green-600 flex-shrink-0 mt-0.5" />
|
||||
<div>
|
||||
<h3 className="font-medium text-slate-900">Code Review erforderlich</h3>
|
||||
<p className="text-sm text-slate-600">Mindestens 1 Approval vor dem Merge</p>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-start gap-3">
|
||||
<CheckCircle2 className="h-5 w-5 text-green-600 flex-shrink-0 mt-0.5" />
|
||||
<div>
|
||||
<h3 className="font-medium text-slate-900">Tests müssen grün sein</h3>
|
||||
<p className="text-sm text-slate-600">CI/CD Pipeline muss erfolgreich durchlaufen</p>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-start gap-3">
|
||||
<CheckCircle2 className="h-5 w-5 text-green-600 flex-shrink-0 mt-0.5" />
|
||||
<div>
|
||||
<h3 className="font-medium text-slate-900">Aussagekräftige Commits</h3>
|
||||
<p className="text-sm text-slate-600">Nutze Conventional Commits (feat:, fix:, etc.)</p>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-start gap-3">
|
||||
<CheckCircle2 className="h-5 w-5 text-green-600 flex-shrink-0 mt-0.5" />
|
||||
<div>
|
||||
<h3 className="font-medium text-slate-900">Branch aktuell halten</h3>
|
||||
<p className="text-sm text-slate-600">Regelmäßig main in deinen Branch mergen</p>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-start gap-3">
|
||||
<AlertTriangle className="h-5 w-5 text-amber-600 flex-shrink-0 mt-0.5" />
|
||||
<div>
|
||||
<h3 className="font-medium text-slate-900">Nie Force-Push auf main</h3>
|
||||
<p className="text-sm text-slate-600">Geschichte von main nie überschreiben</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* CI/CD Infrastruktur - Automatisierte OAuth Integration */}
|
||||
<div className="bg-white rounded-xl border border-slate-200 p-6">
|
||||
<h2 className="text-xl font-semibold text-slate-900 mb-4 flex items-center gap-2">
|
||||
<Shield className="h-5 w-5 text-indigo-600" />
|
||||
CI/CD Infrastruktur (Automatisiert)
|
||||
</h2>
|
||||
|
||||
<div className="bg-blue-50 rounded-xl p-4 mb-6 border border-blue-200">
|
||||
<div className="flex items-start gap-3">
|
||||
<Info className="h-5 w-5 text-blue-600 flex-shrink-0 mt-0.5" />
|
||||
<div>
|
||||
<h4 className="font-medium text-blue-900">Warum automatisiert?</h4>
|
||||
<p className="text-sm text-blue-800 mt-1">
|
||||
Die OAuth-Integration zwischen Woodpecker und Gitea ist vollautomatisiert.
|
||||
Dies ist eine DevSecOps Best Practice: Credentials werden in HashiCorp Vault gespeichert
|
||||
und können bei Bedarf automatisch regeneriert werden.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="grid grid-cols-1 lg:grid-cols-2 gap-6">
|
||||
{/* Architektur */}
|
||||
<div className="bg-slate-50 rounded-xl p-5 border border-slate-200">
|
||||
<h3 className="font-semibold text-slate-900 mb-3">Architektur</h3>
|
||||
<div className="space-y-3 text-sm">
|
||||
<div className="flex items-center gap-3 p-2 bg-white rounded-lg border">
|
||||
<div className="w-3 h-3 bg-green-500 rounded-full" />
|
||||
<span className="font-medium">Gitea</span>
|
||||
<span className="text-slate-500">Port 3003</span>
|
||||
<span className="text-xs text-slate-400 ml-auto">Git Server</span>
|
||||
</div>
|
||||
<div className="flex items-center justify-center">
|
||||
<ArrowRight className="h-4 w-4 text-slate-400 rotate-90" />
|
||||
<span className="text-xs text-slate-500 ml-2">OAuth 2.0</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-3 p-2 bg-white rounded-lg border">
|
||||
<div className="w-3 h-3 bg-blue-500 rounded-full" />
|
||||
<span className="font-medium">Woodpecker</span>
|
||||
<span className="text-slate-500">Port 8090</span>
|
||||
<span className="text-xs text-slate-400 ml-auto">CI/CD Server</span>
|
||||
</div>
|
||||
<div className="flex items-center justify-center">
|
||||
<ArrowRight className="h-4 w-4 text-slate-400 rotate-90" />
|
||||
<span className="text-xs text-slate-500 ml-2">Credentials</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-3 p-2 bg-white rounded-lg border">
|
||||
<div className="w-3 h-3 bg-purple-500 rounded-full" />
|
||||
<span className="font-medium">Vault</span>
|
||||
<span className="text-slate-500">Port 8200</span>
|
||||
<span className="text-xs text-slate-400 ml-auto">Secrets Manager</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Credentials Speicherort */}
|
||||
<div className="bg-slate-50 rounded-xl p-5 border border-slate-200">
|
||||
<h3 className="font-semibold text-slate-900 mb-3">Credentials Speicherorte</h3>
|
||||
<div className="space-y-3 text-sm">
|
||||
<div className="p-3 bg-white rounded-lg border">
|
||||
<div className="flex items-center gap-2 mb-1">
|
||||
<Database className="h-4 w-4 text-purple-500" />
|
||||
<span className="font-medium">HashiCorp Vault</span>
|
||||
</div>
|
||||
<code className="text-xs bg-slate-100 px-2 py-1 rounded">
|
||||
secret/cicd/woodpecker
|
||||
</code>
|
||||
<p className="text-xs text-slate-500 mt-1">Client ID + Secret (Quelle der Wahrheit)</p>
|
||||
</div>
|
||||
<div className="p-3 bg-white rounded-lg border">
|
||||
<div className="flex items-center gap-2 mb-1">
|
||||
<FileCode className="h-4 w-4 text-blue-500" />
|
||||
<span className="font-medium">.env Datei</span>
|
||||
</div>
|
||||
<code className="text-xs bg-slate-100 px-2 py-1 rounded">
|
||||
WOODPECKER_GITEA_CLIENT/SECRET
|
||||
</code>
|
||||
<p className="text-xs text-slate-500 mt-1">Für Docker Compose (aus Vault geladen)</p>
|
||||
</div>
|
||||
<div className="p-3 bg-white rounded-lg border">
|
||||
<div className="flex items-center gap-2 mb-1">
|
||||
<Database className="h-4 w-4 text-green-500" />
|
||||
<span className="font-medium">Gitea PostgreSQL</span>
|
||||
</div>
|
||||
<code className="text-xs bg-slate-100 px-2 py-1 rounded">
|
||||
oauth2_application
|
||||
</code>
|
||||
<p className="text-xs text-slate-500 mt-1">OAuth App Registration (gehashtes Secret)</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Troubleshooting */}
|
||||
<div className="mt-6 bg-amber-50 rounded-xl p-5 border border-amber-200">
|
||||
<h3 className="font-semibold text-amber-900 mb-3 flex items-center gap-2">
|
||||
<AlertTriangle className="h-5 w-5 text-amber-600" />
|
||||
Troubleshooting: OAuth Fehler beheben
|
||||
</h3>
|
||||
<p className="text-sm text-amber-800 mb-3">
|
||||
Falls der Fehler "Client ID not registered" oder "user does not exist" auftritt:
|
||||
</p>
|
||||
<div className="bg-slate-800 rounded-lg p-4 font-mono text-sm">
|
||||
<p className="text-slate-400"># Credentials automatisch regenerieren</p>
|
||||
<p className="text-green-400">./scripts/sync-woodpecker-credentials.sh --regenerate</p>
|
||||
<p className="text-slate-400 mt-2"># Oder manuell: Vault → Gitea → .env → Restart</p>
|
||||
<p className="text-green-400">rsync .env macmini:~/Projekte/breakpilot-pwa/</p>
|
||||
<p className="text-green-400">ssh macmini "cd ~/Projekte/breakpilot-pwa && docker compose up -d --force-recreate woodpecker-server"</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Team Members Info */}
|
||||
<div className="bg-white rounded-xl border border-slate-200 p-6">
|
||||
<h2 className="text-xl font-semibold text-slate-900 mb-4 flex items-center gap-2">
|
||||
<Users className="h-5 w-5 text-indigo-600" />
|
||||
Team-Kommunikation
|
||||
</h2>
|
||||
|
||||
<div className="grid grid-cols-1 md:grid-cols-3 gap-4">
|
||||
<div className="bg-slate-50 rounded-lg p-4 text-center">
|
||||
<div className="text-3xl mb-2">💬</div>
|
||||
<h3 className="font-medium text-slate-900">Pull Request Kommentare</h3>
|
||||
<p className="text-sm text-slate-600 mt-1">Code-Diskussionen im PR</p>
|
||||
</div>
|
||||
<div className="bg-slate-50 rounded-lg p-4 text-center">
|
||||
<div className="text-3xl mb-2">📋</div>
|
||||
<h3 className="font-medium text-slate-900">Issues in Gitea</h3>
|
||||
<p className="text-sm text-slate-600 mt-1">Bugs & Features tracken</p>
|
||||
</div>
|
||||
<div className="bg-slate-50 rounded-lg p-4 text-center">
|
||||
<div className="text-3xl mb-2">🔔</div>
|
||||
<h3 className="font-medium text-slate-900">CI/CD Notifications</h3>
|
||||
<p className="text-sm text-slate-600 mt-1">Pipeline-Status per Mail</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -177,7 +177,6 @@ export default function GPUInfrastructurePage() {
|
||||
databases: ['PostgreSQL (Logs)'],
|
||||
}}
|
||||
relatedPages={[
|
||||
{ name: 'LLM Vergleich', href: '/ai/llm-compare', description: 'KI-Provider testen' },
|
||||
{ name: 'Security', href: '/infrastructure/security', description: 'DevSecOps Dashboard' },
|
||||
{ name: 'Builds', href: '/infrastructure/builds', description: 'CI/CD Pipeline' },
|
||||
]}
|
||||
|
||||
@@ -51,13 +51,9 @@ const INFRASTRUCTURE_COMPONENTS: Component[] = [
|
||||
// ===== DATABASES =====
|
||||
{ type: 'service', name: 'PostgreSQL', version: '16-alpine', category: 'database', port: '5432', description: 'Hauptdatenbank', license: 'PostgreSQL', sourceUrl: 'https://github.com/postgres/postgres' },
|
||||
{ type: 'service', name: 'Synapse PostgreSQL', version: '16-alpine', category: 'database', port: '-', description: 'Matrix Datenbank', license: 'PostgreSQL', sourceUrl: 'https://github.com/postgres/postgres' },
|
||||
{ type: 'service', name: 'ERPNext MariaDB', version: '10.6', category: 'database', port: '-', description: 'ERPNext Datenbank', license: 'GPL-2.0', sourceUrl: 'https://github.com/MariaDB/server' },
|
||||
{ type: 'service', name: 'MongoDB', version: '7.0', category: 'database', port: '27017', description: 'LibreChat Datenbank', license: 'SSPL-1.0', sourceUrl: 'https://github.com/mongodb/mongo' },
|
||||
|
||||
// ===== CACHE & QUEUE =====
|
||||
{ type: 'service', name: 'Valkey', version: '8-alpine', category: 'cache', port: '6379', description: 'In-Memory Cache & Sessions (Redis OSS Fork)', license: 'BSD-3-Clause', sourceUrl: 'https://github.com/valkey-io/valkey' },
|
||||
{ type: 'service', name: 'ERPNext Valkey Queue', version: 'alpine', category: 'cache', port: '-', description: 'Job Queue', license: 'BSD-3-Clause', sourceUrl: 'https://github.com/valkey-io/valkey' },
|
||||
{ type: 'service', name: 'ERPNext Valkey Cache', version: 'alpine', category: 'cache', port: '-', description: 'Cache Layer', license: 'BSD-3-Clause', sourceUrl: 'https://github.com/valkey-io/valkey' },
|
||||
|
||||
// ===== SEARCH ENGINES =====
|
||||
{ type: 'service', name: 'Qdrant', version: '1.7.4', category: 'search', port: '6333', description: 'Vector Database (RAG/Embeddings)', license: 'Apache-2.0', sourceUrl: 'https://github.com/qdrant/qdrant' },
|
||||
@@ -66,8 +62,6 @@ const INFRASTRUCTURE_COMPONENTS: Component[] = [
|
||||
|
||||
// ===== OBJECT STORAGE =====
|
||||
{ type: 'service', name: 'MinIO', version: 'latest', category: 'storage', port: '9000/9001', description: 'S3-kompatibel Object Storage', license: 'AGPL-3.0', sourceUrl: 'https://github.com/minio/minio' },
|
||||
{ type: 'service', name: 'IPFS (Kubo)', version: '0.24', category: 'storage', port: '5001', description: 'Dezentrales Speichersystem', license: 'MIT/Apache-2.0', sourceUrl: 'https://github.com/ipfs/kubo' },
|
||||
{ type: 'service', name: 'DSMS Gateway', version: '1.0', category: 'storage', port: '8082', description: 'IPFS REST API', license: 'Proprietary', sourceUrl: '-' },
|
||||
|
||||
// ===== SECURITY =====
|
||||
{ type: 'service', name: 'HashiCorp Vault', version: '1.15', category: 'security', port: '8200', description: 'Secrets Management', license: 'BUSL-1.1', sourceUrl: 'https://github.com/hashicorp/vault' },
|
||||
@@ -83,36 +77,19 @@ const INFRASTRUCTURE_COMPONENTS: Component[] = [
|
||||
{ type: 'service', name: 'Jibri', version: 'stable-9823', category: 'communication', port: '-', description: 'Recording & Streaming Service', license: 'Apache-2.0', sourceUrl: 'https://github.com/jitsi/jibri' },
|
||||
|
||||
// ===== APPLICATION SERVICES (Python) =====
|
||||
{ type: 'service', name: 'Python Backend (FastAPI)', version: '3.12', category: 'application', port: '8000', description: 'Haupt-Backend API, Studio & Alerts Agent', license: 'Proprietary', sourceUrl: '-' },
|
||||
{ type: 'service', name: 'Python Backend (FastAPI)', version: '3.12', category: 'application', port: '8000', description: 'Lehrer Backend API (Klausuren, E-Mail, Alerts)', license: 'Proprietary', sourceUrl: '-' },
|
||||
{ type: 'service', name: 'Klausur Service', version: '1.0', category: 'application', port: '8086', description: 'Abitur-Klausurkorrektur (BYOEH)', license: 'Proprietary', sourceUrl: '-' },
|
||||
{ type: 'service', name: 'Compliance Module', version: '2.0', category: 'application', port: '8000', description: 'GRC Framework (19 Regulations, 558 Requirements, AI)', license: 'Proprietary', sourceUrl: '-' },
|
||||
{ type: 'service', name: 'Transcription Worker', version: '1.0', category: 'application', port: '-', description: 'Whisper + pyannote Transkription', license: 'Proprietary', sourceUrl: '-' },
|
||||
|
||||
// ===== APPLICATION SERVICES (Go) =====
|
||||
{ type: 'service', name: 'Go Consent Service', version: '1.21', category: 'application', port: '8081', description: 'DSGVO Consent Management', license: 'Proprietary', sourceUrl: '-' },
|
||||
{ type: 'service', name: 'Go School Service', version: '1.21', category: 'application', port: '8084', description: 'Klausuren, Noten, Zeugnisse', license: 'Proprietary', sourceUrl: '-' },
|
||||
{ type: 'service', name: 'Go Billing Service', version: '1.21', category: 'application', port: '8083', description: 'Stripe Billing Integration', license: 'Proprietary', sourceUrl: '-' },
|
||||
|
||||
// ===== APPLICATION SERVICES (Node.js) =====
|
||||
{ type: 'service', name: 'Next.js Admin Frontend', version: '15.1', category: 'application', port: '3000', description: 'Admin Dashboard (React)', license: 'Proprietary', sourceUrl: '-' },
|
||||
{ type: 'service', name: 'H5P Content Service', version: 'latest', category: 'application', port: '8085', description: 'Interaktive Inhalte', license: 'MIT', sourceUrl: 'https://github.com/h5p/h5p-server' },
|
||||
{ type: 'service', name: 'Policy Vault (NestJS)', version: '1.0', category: 'application', port: '3001', description: 'Richtlinien-Verwaltung API', license: 'Proprietary', sourceUrl: '-' },
|
||||
{ type: 'service', name: 'Policy Vault (Angular)', version: '17', category: 'application', port: '4200', description: 'Richtlinien-Verwaltung UI', license: 'Proprietary', sourceUrl: '-' },
|
||||
|
||||
// ===== APPLICATION SERVICES (Vue) =====
|
||||
{ type: 'service', name: 'Creator Studio (Vue 3)', version: '3.4', category: 'application', port: '-', description: 'Content Creation UI', license: 'Proprietary', sourceUrl: '-' },
|
||||
|
||||
// ===== AI/LLM SERVICES =====
|
||||
{ type: 'service', name: 'LibreChat', version: 'latest', category: 'ai', port: '3080', description: 'Multi-LLM Chat Interface', license: 'MIT', sourceUrl: 'https://github.com/danny-avila/LibreChat' },
|
||||
{ type: 'service', name: 'RAGFlow', version: 'latest', category: 'ai', port: '9380', description: 'RAG Pipeline Service', license: 'Apache-2.0', sourceUrl: 'https://github.com/infiniflow/ragflow' },
|
||||
|
||||
// ===== ERP =====
|
||||
{ type: 'service', name: 'ERPNext', version: 'v15', category: 'erp', port: '8090', description: 'Open Source ERP System', license: 'GPL-3.0', sourceUrl: 'https://github.com/frappe/erpnext' },
|
||||
{ type: 'service', name: 'Next.js Admin Frontend', version: '15.1', category: 'application', port: '3002', description: 'Admin Lehrer Dashboard (React)', license: 'Proprietary', sourceUrl: '-' },
|
||||
|
||||
// ===== CI/CD & VERSION CONTROL =====
|
||||
{ type: 'service', name: 'Woodpecker CI', version: '2.x', category: 'cicd', port: '8082', description: 'Self-hosted CI/CD Pipeline (Drone Fork)', license: 'Apache-2.0', sourceUrl: 'https://github.com/woodpecker-ci/woodpecker' },
|
||||
{ type: 'service', name: 'Gitea', version: '1.21', category: 'cicd', port: '3003', description: 'Self-hosted Git Service', license: 'MIT', sourceUrl: 'https://github.com/go-gitea/gitea' },
|
||||
{ type: 'service', name: 'Dokploy', version: '0.26.7', category: 'cicd', port: '3000', description: 'Self-hosted PaaS (Vercel/Heroku Alternative)', license: 'Apache-2.0', sourceUrl: 'https://github.com/Dokploy/dokploy' },
|
||||
|
||||
// ===== DEVELOPMENT =====
|
||||
{ type: 'service', name: 'Mailpit', version: 'latest', category: 'development', port: '8025/1025', description: 'E-Mail Testing (SMTP Catch-All)', license: 'MIT', sourceUrl: 'https://github.com/axllent/mailpit' },
|
||||
@@ -184,10 +161,7 @@ const PYTHON_PACKAGES: Component[] = [
|
||||
{ type: 'library', name: 'structlog', version: '24.x', category: 'python', description: 'Structured Logging', license: 'Apache-2.0', sourceUrl: 'https://github.com/hynek/structlog' },
|
||||
{ type: 'library', name: 'feedparser', version: '6.x', category: 'python', description: 'RSS/Atom Feed Parser (Alerts Agent)', license: 'BSD-2-Clause', sourceUrl: 'https://github.com/kurtmckee/feedparser' },
|
||||
{ type: 'library', name: 'APScheduler', version: '3.x', category: 'python', description: 'AsyncIO Job Scheduler (Alerts Agent)', license: 'MIT', sourceUrl: 'https://github.com/agronholm/apscheduler' },
|
||||
{ type: 'library', name: 'beautifulsoup4', version: '4.x', category: 'python', description: 'HTML Parser (Email Parsing, Compliance Scraper)', license: 'MIT', sourceUrl: 'https://code.launchpad.net/beautifulsoup' },
|
||||
{ type: 'library', name: 'lxml', version: '5.x', category: 'python', description: 'XML/HTML Parser (EUR-Lex Scraping)', license: 'BSD-3-Clause', sourceUrl: 'https://github.com/lxml/lxml' },
|
||||
{ type: 'library', name: 'PyMuPDF', version: '1.24+', category: 'python', description: 'PDF Parser (BSI-TR Extraction)', license: 'AGPL-3.0', sourceUrl: 'https://github.com/pymupdf/PyMuPDF' },
|
||||
{ type: 'library', name: 'pdfplumber', version: '0.11+', category: 'python', description: 'PDF Table Extraction (Compliance Docs)', license: 'MIT', sourceUrl: 'https://github.com/jsvine/pdfplumber' },
|
||||
{ type: 'library', name: 'beautifulsoup4', version: '4.x', category: 'python', description: 'HTML Parser (Email Parsing)', license: 'MIT', sourceUrl: 'https://code.launchpad.net/beautifulsoup' },
|
||||
{ type: 'library', name: 'websockets', version: '14.x', category: 'python', description: 'WebSocket Support (Voice Streaming)', license: 'BSD-3-Clause', sourceUrl: 'https://github.com/python-websockets/websockets' },
|
||||
{ type: 'library', name: 'soundfile', version: '0.13+', category: 'python', description: 'Audio File Processing (Voice Service)', license: 'BSD-3-Clause', sourceUrl: 'https://github.com/bastibe/python-soundfile' },
|
||||
{ type: 'library', name: 'scipy', version: '1.14+', category: 'python', description: 'Signal Processing (Audio)', license: 'BSD-3-Clause', sourceUrl: 'https://github.com/scipy/scipy' },
|
||||
@@ -200,7 +174,8 @@ const GO_MODULES: Component[] = [
|
||||
{ type: 'library', name: 'gin-gonic/gin', version: '1.9+', category: 'go', description: 'Web Framework', license: 'MIT', sourceUrl: 'https://github.com/gin-gonic/gin' },
|
||||
{ type: 'library', name: 'gorm.io/gorm', version: '1.25+', category: 'go', description: 'ORM', license: 'MIT', sourceUrl: 'https://github.com/go-gorm/gorm' },
|
||||
{ type: 'library', name: 'golang-jwt/jwt', version: 'v5', category: 'go', description: 'JWT Library', license: 'MIT', sourceUrl: 'https://github.com/golang-jwt/jwt' },
|
||||
{ type: 'library', name: 'stripe/stripe-go', version: 'v76', category: 'go', description: 'Stripe SDK', license: 'MIT', sourceUrl: 'https://github.com/stripe/stripe-go' },
|
||||
{ type: 'library', name: 'opensearch-project/opensearch-go', version: '4.x', category: 'go', description: 'OpenSearch Client (edu-search-service)', license: 'Apache-2.0', sourceUrl: 'https://github.com/opensearch-project/opensearch-go' },
|
||||
{ type: 'library', name: 'lib/pq', version: '1.10+', category: 'go', description: 'PostgreSQL Driver (school-service)', license: 'MIT', sourceUrl: 'https://github.com/lib/pq' },
|
||||
{ type: 'library', name: 'spf13/viper', version: 'latest', category: 'go', description: 'Configuration', license: 'MIT', sourceUrl: 'https://github.com/spf13/viper' },
|
||||
{ type: 'library', name: 'uber-go/zap', version: 'latest', category: 'go', description: 'Structured Logging', license: 'MIT', sourceUrl: 'https://github.com/uber-go/zap' },
|
||||
{ type: 'library', name: 'swaggo/swag', version: 'latest', category: 'go', description: 'Swagger Docs', license: 'MIT', sourceUrl: 'https://github.com/swaggo/swag' },
|
||||
@@ -210,15 +185,10 @@ const GO_MODULES: Component[] = [
|
||||
const NODE_PACKAGES: Component[] = [
|
||||
{ type: 'library', name: 'Next.js', version: '15.1', category: 'nodejs', description: 'React Framework', license: 'MIT', sourceUrl: 'https://github.com/vercel/next.js' },
|
||||
{ type: 'library', name: 'React', version: '19', category: 'nodejs', description: 'UI Library', license: 'MIT', sourceUrl: 'https://github.com/facebook/react' },
|
||||
{ type: 'library', name: 'Vue.js', version: '3.4', category: 'nodejs', description: 'UI Framework (Creator Studio)', license: 'MIT', sourceUrl: 'https://github.com/vuejs/core' },
|
||||
{ type: 'library', name: 'Angular', version: '17', category: 'nodejs', description: 'UI Framework (Policy Vault)', license: 'MIT', sourceUrl: 'https://github.com/angular/angular' },
|
||||
{ type: 'library', name: 'NestJS', version: '10', category: 'nodejs', description: 'Node.js Framework', license: 'MIT', sourceUrl: 'https://github.com/nestjs/nest' },
|
||||
{ type: 'library', name: 'TypeScript', version: '5.x', category: 'nodejs', description: 'Type System', license: 'Apache-2.0', sourceUrl: 'https://github.com/microsoft/TypeScript' },
|
||||
{ type: 'library', name: 'Tailwind CSS', version: '3.4', category: 'nodejs', description: 'Utility CSS', license: 'MIT', sourceUrl: 'https://github.com/tailwindlabs/tailwindcss' },
|
||||
{ type: 'library', name: 'Prisma', version: '5.x', category: 'nodejs', description: 'ORM (Policy Vault)', license: 'Apache-2.0', sourceUrl: 'https://github.com/prisma/prisma' },
|
||||
{ type: 'library', name: 'Material Design Icons', version: 'latest', category: 'nodejs', description: 'Icon-System (Companion UI, Studio)', license: 'Apache-2.0', sourceUrl: 'https://github.com/google/material-design-icons' },
|
||||
{ type: 'library', name: 'Recharts', version: '2.12', category: 'nodejs', description: 'React Charts (Compliance Dashboard)', license: 'MIT', sourceUrl: 'https://github.com/recharts/recharts' },
|
||||
{ type: 'library', name: 'React Flow', version: '11.x', category: 'nodejs', description: 'Node-basierte Flow-Diagramme (Screen Flow)', license: 'MIT', sourceUrl: 'https://github.com/xyflow/xyflow' },
|
||||
{ type: 'library', name: 'Recharts', version: '2.12', category: 'nodejs', description: 'React Charts (Admin Dashboard)', license: 'MIT', sourceUrl: 'https://github.com/recharts/recharts' },
|
||||
{ type: 'library', name: 'Playwright', version: '1.50', category: 'nodejs', description: 'E2E Testing Framework (SDK Tests)', license: 'Apache-2.0', sourceUrl: 'https://github.com/microsoft/playwright' },
|
||||
{ type: 'library', name: 'Vitest', version: '4.x', category: 'nodejs', description: 'Unit Testing Framework', license: 'MIT', sourceUrl: 'https://github.com/vitest-dev/vitest' },
|
||||
{ type: 'library', name: 'jsPDF', version: '4.x', category: 'nodejs', description: 'PDF Generation (SDK Export)', license: 'MIT', sourceUrl: 'https://github.com/parallax/jsPDF' },
|
||||
@@ -357,9 +327,7 @@ export default function SBOMPage() {
|
||||
case 'communication': return 'bg-yellow-100 text-yellow-800'
|
||||
case 'storage': return 'bg-orange-100 text-orange-800'
|
||||
case 'search': return 'bg-pink-100 text-pink-800'
|
||||
case 'erp': return 'bg-indigo-100 text-indigo-800'
|
||||
case 'cache': return 'bg-cyan-100 text-cyan-800'
|
||||
case 'ai': return 'bg-violet-100 text-violet-800'
|
||||
case 'development': return 'bg-gray-100 text-gray-800'
|
||||
case 'cicd': return 'bg-orange-100 text-orange-800'
|
||||
case 'python': return 'bg-emerald-100 text-emerald-800'
|
||||
@@ -415,7 +383,7 @@ export default function SBOMPage() {
|
||||
<div>
|
||||
<PagePurpose
|
||||
title="SBOM"
|
||||
purpose="Software Bill of Materials - Alle Komponenten & Abhaengigkeiten der Breakpilot-Plattform. Wichtig fuer Supply-Chain-Security, Compliance-Audits und Lizenz-Pruefung."
|
||||
purpose="Software Bill of Materials - Alle Komponenten & Abhaengigkeiten der Breakpilot Lehrer-Plattform. Wichtig fuer Supply-Chain-Security, Compliance-Audits und Lizenz-Pruefung."
|
||||
audience={['DevOps', 'Compliance', 'Security', 'Auditoren']}
|
||||
gdprArticles={['Art. 32 (Sicherheit der Verarbeitung)']}
|
||||
architecture={{
|
||||
@@ -654,7 +622,7 @@ export default function SBOMPage() {
|
||||
const url = URL.createObjectURL(blob)
|
||||
const a = document.createElement('a')
|
||||
a.href = url
|
||||
a.download = `breakpilot-sbom-${new Date().toISOString().split('T')[0]}.json`
|
||||
a.download = `breakpilot-lehrer-sbom-${new Date().toISOString().split('T')[0]}.json`
|
||||
a.click()
|
||||
}}
|
||||
className="px-4 py-2 bg-orange-600 text-white rounded-lg hover:bg-orange-700 transition-colors flex items-center gap-2"
|
||||
|
||||
@@ -335,7 +335,6 @@ export default function RBACPage() {
|
||||
}}
|
||||
relatedPages={[
|
||||
{ name: 'Audit Trail', href: '/sdk/audit-report', description: 'LLM-Operationen protokollieren' },
|
||||
{ name: 'LLM Vergleich', href: '/ai/llm-compare', description: 'KI-Provider testen' },
|
||||
]}
|
||||
/>
|
||||
|
||||
|
||||
@@ -0,0 +1,163 @@
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest'
|
||||
|
||||
/**
|
||||
* Tests for Chunk-Browser logic:
|
||||
* - Collection dropdown has all 10 collections
|
||||
* - COLLECTION_TOTALS has expected keys
|
||||
* - Text search highlighting logic
|
||||
* - Pagination state management
|
||||
*/
|
||||
|
||||
// Replicate the COMPLIANCE_COLLECTIONS from the dropdown
|
||||
const COMPLIANCE_COLLECTIONS = [
|
||||
'bp_compliance_gesetze',
|
||||
'bp_compliance_ce',
|
||||
'bp_compliance_datenschutz',
|
||||
'bp_dsfa_corpus',
|
||||
'bp_compliance_recht',
|
||||
'bp_legal_templates',
|
||||
'bp_compliance_gdpr',
|
||||
'bp_compliance_schulrecht',
|
||||
'bp_dsfa_templates',
|
||||
'bp_dsfa_risks',
|
||||
] as const
|
||||
|
||||
// Replicate COLLECTION_TOTALS from page.tsx
|
||||
const COLLECTION_TOTALS: Record<string, number> = {
|
||||
bp_compliance_gesetze: 58304,
|
||||
bp_compliance_ce: 18183,
|
||||
bp_legal_templates: 7689,
|
||||
bp_compliance_datenschutz: 2448,
|
||||
bp_dsfa_corpus: 7867,
|
||||
bp_compliance_recht: 1425,
|
||||
bp_nibis_eh: 7996,
|
||||
total_legal: 76487,
|
||||
total_all: 103912,
|
||||
}
|
||||
|
||||
describe('Chunk-Browser Logic', () => {
|
||||
describe('COMPLIANCE_COLLECTIONS', () => {
|
||||
it('should have exactly 10 collections', () => {
|
||||
expect(COMPLIANCE_COLLECTIONS).toHaveLength(10)
|
||||
})
|
||||
|
||||
it('should include bp_compliance_ce for IFRS documents', () => {
|
||||
expect(COMPLIANCE_COLLECTIONS).toContain('bp_compliance_ce')
|
||||
})
|
||||
|
||||
it('should include bp_compliance_datenschutz for EFRAG/ENISA', () => {
|
||||
expect(COMPLIANCE_COLLECTIONS).toContain('bp_compliance_datenschutz')
|
||||
})
|
||||
|
||||
it('should include bp_compliance_gesetze as default', () => {
|
||||
expect(COMPLIANCE_COLLECTIONS[0]).toBe('bp_compliance_gesetze')
|
||||
})
|
||||
|
||||
it('should have all collection names starting with bp_', () => {
|
||||
COMPLIANCE_COLLECTIONS.forEach((col) => {
|
||||
expect(col).toMatch(/^bp_/)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('COLLECTION_TOTALS', () => {
|
||||
it('should have bp_compliance_ce key', () => {
|
||||
expect(COLLECTION_TOTALS).toHaveProperty('bp_compliance_ce')
|
||||
})
|
||||
|
||||
it('should have bp_compliance_datenschutz key', () => {
|
||||
expect(COLLECTION_TOTALS).toHaveProperty('bp_compliance_datenschutz')
|
||||
})
|
||||
|
||||
it('should have positive counts for all collections', () => {
|
||||
Object.values(COLLECTION_TOTALS).forEach((count) => {
|
||||
expect(count).toBeGreaterThan(0)
|
||||
})
|
||||
})
|
||||
|
||||
it('total_all should be greater than total_legal', () => {
|
||||
expect(COLLECTION_TOTALS.total_all).toBeGreaterThan(COLLECTION_TOTALS.total_legal)
|
||||
})
|
||||
})
|
||||
|
||||
describe('Text search filtering logic', () => {
|
||||
const mockChunks = [
|
||||
{ id: '1', text: 'DSGVO Artikel 1 Datenschutz', regulation_code: 'GDPR' },
|
||||
{ id: '2', text: 'IFRS 16 Leasing Standard', regulation_code: 'EU_IFRS' },
|
||||
{ id: '3', text: 'Datenschutz Grundverordnung', regulation_code: 'GDPR' },
|
||||
{ id: '4', text: 'ENISA Supply Chain Security', regulation_code: 'ENISA' },
|
||||
]
|
||||
|
||||
it('should filter chunks by text search (case insensitive)', () => {
|
||||
const search = 'datenschutz'
|
||||
const filtered = mockChunks.filter((c) =>
|
||||
c.text.toLowerCase().includes(search.toLowerCase())
|
||||
)
|
||||
expect(filtered).toHaveLength(2)
|
||||
})
|
||||
|
||||
it('should return all chunks when search is empty', () => {
|
||||
const search = ''
|
||||
const filtered = search
|
||||
? mockChunks.filter((c) => c.text.toLowerCase().includes(search.toLowerCase()))
|
||||
: mockChunks
|
||||
expect(filtered).toHaveLength(4)
|
||||
})
|
||||
|
||||
it('should return 0 chunks when no match', () => {
|
||||
const search = 'blockchain'
|
||||
const filtered = mockChunks.filter((c) =>
|
||||
c.text.toLowerCase().includes(search.toLowerCase())
|
||||
)
|
||||
expect(filtered).toHaveLength(0)
|
||||
})
|
||||
|
||||
it('should match IFRS chunks', () => {
|
||||
const search = 'IFRS'
|
||||
const filtered = mockChunks.filter((c) =>
|
||||
c.text.toLowerCase().includes(search.toLowerCase())
|
||||
)
|
||||
expect(filtered).toHaveLength(1)
|
||||
expect(filtered[0].regulation_code).toBe('EU_IFRS')
|
||||
})
|
||||
})
|
||||
|
||||
describe('Pagination state', () => {
|
||||
it('should start at page 0', () => {
|
||||
const currentPage = 0
|
||||
expect(currentPage).toBe(0)
|
||||
})
|
||||
|
||||
it('should increment page on next', () => {
|
||||
let currentPage = 0
|
||||
currentPage += 1
|
||||
expect(currentPage).toBe(1)
|
||||
})
|
||||
|
||||
it('should maintain offset history for back navigation', () => {
|
||||
const history: (string | null)[] = []
|
||||
history.push(null) // page 0 offset
|
||||
history.push('uuid-20') // page 1 offset
|
||||
history.push('uuid-40') // page 2 offset
|
||||
|
||||
// Go back to page 1
|
||||
const prevOffset = history[history.length - 2]
|
||||
expect(prevOffset).toBe('uuid-20')
|
||||
})
|
||||
|
||||
it('should reset state on collection change', () => {
|
||||
let chunkOffset: string | null = 'some-offset'
|
||||
let chunkHistory: (string | null)[] = [null, 'uuid-1']
|
||||
let chunkCurrentPage = 3
|
||||
|
||||
// Simulate collection change
|
||||
chunkOffset = null
|
||||
chunkHistory = []
|
||||
chunkCurrentPage = 0
|
||||
|
||||
expect(chunkOffset).toBeNull()
|
||||
expect(chunkHistory).toHaveLength(0)
|
||||
expect(chunkCurrentPage).toBe(0)
|
||||
})
|
||||
})
|
||||
})
|
||||
@@ -0,0 +1,90 @@
|
||||
import { describe, it, expect } from 'vitest'
|
||||
|
||||
/**
|
||||
* Tests for RAG page constants - REGULATIONS_IN_RAG, REGULATION_SOURCES, REGULATION_LICENSES
|
||||
*
|
||||
* These are defined inline in page.tsx, so we test the data structures
|
||||
* by importing a subset of the expected values.
|
||||
*/
|
||||
|
||||
// Expected IFRS entries in REGULATIONS_IN_RAG
|
||||
const EXPECTED_IFRS_ENTRIES = {
|
||||
EU_IFRS_DE: { collection: 'bp_compliance_ce', chunks: 0 },
|
||||
EU_IFRS_EN: { collection: 'bp_compliance_ce', chunks: 0 },
|
||||
EFRAG_ENDORSEMENT: { collection: 'bp_compliance_datenschutz', chunks: 0 },
|
||||
}
|
||||
|
||||
// Expected REGULATION_SOURCES URLs
|
||||
const EXPECTED_SOURCES = {
|
||||
GDPR: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32016R0679',
|
||||
EU_IFRS_DE: 'https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX:32023R1803',
|
||||
EU_IFRS_EN: 'https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX:32023R1803',
|
||||
EFRAG_ENDORSEMENT: 'https://www.efrag.org/activities/endorsement-status-report',
|
||||
ENISA_SECURE_DEV: 'https://www.enisa.europa.eu/publications/secure-development-best-practices',
|
||||
NIST_SSDF: 'https://csrc.nist.gov/pubs/sp/800/218/final',
|
||||
NIST_CSF: 'https://www.nist.gov/cyberframework',
|
||||
OECD_AI: 'https://oecd.ai/en/ai-principles',
|
||||
}
|
||||
|
||||
describe('RAG Page Constants', () => {
|
||||
describe('IFRS entries in REGULATIONS_IN_RAG', () => {
|
||||
it('should have EU_IFRS_DE entry with bp_compliance_ce collection', () => {
|
||||
expect(EXPECTED_IFRS_ENTRIES.EU_IFRS_DE.collection).toBe('bp_compliance_ce')
|
||||
})
|
||||
|
||||
it('should have EU_IFRS_EN entry with bp_compliance_ce collection', () => {
|
||||
expect(EXPECTED_IFRS_ENTRIES.EU_IFRS_EN.collection).toBe('bp_compliance_ce')
|
||||
})
|
||||
|
||||
it('should have EFRAG_ENDORSEMENT entry with bp_compliance_datenschutz collection', () => {
|
||||
expect(EXPECTED_IFRS_ENTRIES.EFRAG_ENDORSEMENT.collection).toBe('bp_compliance_datenschutz')
|
||||
})
|
||||
})
|
||||
|
||||
describe('REGULATION_SOURCES URLs', () => {
|
||||
it('should have valid EUR-Lex URLs for EU regulations', () => {
|
||||
expect(EXPECTED_SOURCES.GDPR).toMatch(/^https:\/\/eur-lex\.europa\.eu/)
|
||||
expect(EXPECTED_SOURCES.EU_IFRS_DE).toMatch(/^https:\/\/eur-lex\.europa\.eu/)
|
||||
expect(EXPECTED_SOURCES.EU_IFRS_EN).toMatch(/^https:\/\/eur-lex\.europa\.eu/)
|
||||
})
|
||||
|
||||
it('should have correct CELEX for IFRS DE (32023R1803)', () => {
|
||||
expect(EXPECTED_SOURCES.EU_IFRS_DE).toContain('32023R1803')
|
||||
})
|
||||
|
||||
it('should have correct CELEX for IFRS EN (32023R1803)', () => {
|
||||
expect(EXPECTED_SOURCES.EU_IFRS_EN).toContain('32023R1803')
|
||||
})
|
||||
|
||||
it('should have DE language for IFRS DE', () => {
|
||||
expect(EXPECTED_SOURCES.EU_IFRS_DE).toContain('/DE/')
|
||||
})
|
||||
|
||||
it('should have EN language for IFRS EN', () => {
|
||||
expect(EXPECTED_SOURCES.EU_IFRS_EN).toContain('/EN/')
|
||||
})
|
||||
|
||||
it('should have EFRAG URL for endorsement status', () => {
|
||||
expect(EXPECTED_SOURCES.EFRAG_ENDORSEMENT).toMatch(/^https:\/\/www\.efrag\.org/)
|
||||
})
|
||||
|
||||
it('should have ENISA URL for secure development', () => {
|
||||
expect(EXPECTED_SOURCES.ENISA_SECURE_DEV).toMatch(/^https:\/\/www\.enisa\.europa\.eu/)
|
||||
})
|
||||
|
||||
it('should have NIST URLs for SSDF and CSF', () => {
|
||||
expect(EXPECTED_SOURCES.NIST_SSDF).toMatch(/nist\.gov/)
|
||||
expect(EXPECTED_SOURCES.NIST_CSF).toMatch(/nist\.gov/)
|
||||
})
|
||||
|
||||
it('should have OECD URL for AI principles', () => {
|
||||
expect(EXPECTED_SOURCES.OECD_AI).toMatch(/oecd\.ai/)
|
||||
})
|
||||
|
||||
it('should all be valid HTTPS URLs', () => {
|
||||
Object.values(EXPECTED_SOURCES).forEach((url) => {
|
||||
expect(url).toMatch(/^https:\/\//)
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
249
admin-lehrer/app/api/legal-corpus/__tests__/route.test.ts
Normal file
249
admin-lehrer/app/api/legal-corpus/__tests__/route.test.ts
Normal file
@@ -0,0 +1,249 @@
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest'
|
||||
|
||||
// Mock fetch globally
|
||||
const mockFetch = vi.fn()
|
||||
global.fetch = mockFetch
|
||||
|
||||
// Mock NextRequest and NextResponse
|
||||
vi.mock('next/server', () => ({
|
||||
NextRequest: class MockNextRequest {
|
||||
url: string
|
||||
constructor(url: string) {
|
||||
this.url = url
|
||||
}
|
||||
},
|
||||
NextResponse: {
|
||||
json: (data: unknown, init?: { status?: number }) => ({
|
||||
data,
|
||||
status: init?.status || 200,
|
||||
}),
|
||||
},
|
||||
}))
|
||||
|
||||
describe('Legal Corpus API Proxy', () => {
|
||||
beforeEach(() => {
|
||||
mockFetch.mockClear()
|
||||
})
|
||||
|
||||
describe('scroll action', () => {
|
||||
it('should call Qdrant scroll endpoint with correct collection', async () => {
|
||||
const mockScrollResponse = {
|
||||
result: {
|
||||
points: [
|
||||
{ id: 'uuid-1', payload: { text: 'DSGVO Artikel 1', regulation_code: 'GDPR' } },
|
||||
{ id: 'uuid-2', payload: { text: 'DSGVO Artikel 2', regulation_code: 'GDPR' } },
|
||||
],
|
||||
next_page_offset: 'uuid-3',
|
||||
},
|
||||
}
|
||||
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () => Promise.resolve(mockScrollResponse),
|
||||
})
|
||||
|
||||
const { GET } = await import('../route')
|
||||
const request = { url: 'http://localhost/api/legal-corpus?action=scroll&collection=bp_compliance_ce&limit=20' }
|
||||
const response = await GET(request as any)
|
||||
|
||||
expect(mockFetch).toHaveBeenCalledTimes(1)
|
||||
const calledUrl = mockFetch.mock.calls[0][0]
|
||||
expect(calledUrl).toContain('/collections/bp_compliance_ce/points/scroll')
|
||||
|
||||
const body = JSON.parse(mockFetch.mock.calls[0][1].body)
|
||||
expect(body.limit).toBe(20)
|
||||
expect(body.with_payload).toBe(true)
|
||||
expect(body.with_vector).toBe(false)
|
||||
})
|
||||
|
||||
it('should pass offset parameter to Qdrant', async () => {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({ result: { points: [], next_page_offset: null } }),
|
||||
})
|
||||
|
||||
const { GET } = await import('../route')
|
||||
const request = { url: 'http://localhost/api/legal-corpus?action=scroll&collection=bp_compliance_gesetze&offset=some-uuid' }
|
||||
await GET(request as any)
|
||||
|
||||
const body = JSON.parse(mockFetch.mock.calls[0][1].body)
|
||||
expect(body.offset).toBe('some-uuid')
|
||||
})
|
||||
|
||||
it('should limit chunks to max 100', async () => {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({ result: { points: [], next_page_offset: null } }),
|
||||
})
|
||||
|
||||
const { GET } = await import('../route')
|
||||
const request = { url: 'http://localhost/api/legal-corpus?action=scroll&collection=bp_compliance_ce&limit=500' }
|
||||
await GET(request as any)
|
||||
|
||||
const body = JSON.parse(mockFetch.mock.calls[0][1].body)
|
||||
expect(body.limit).toBe(100)
|
||||
})
|
||||
|
||||
it('should apply text_search filter client-side', async () => {
|
||||
const mockScrollResponse = {
|
||||
result: {
|
||||
points: [
|
||||
{ id: 'uuid-1', payload: { text: 'DSGVO Artikel 1 Datenschutz' } },
|
||||
{ id: 'uuid-2', payload: { text: 'IFRS Standard 16 Leasing' } },
|
||||
{ id: 'uuid-3', payload: { text: 'Datenschutz Grundverordnung' } },
|
||||
],
|
||||
next_page_offset: null,
|
||||
},
|
||||
}
|
||||
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () => Promise.resolve(mockScrollResponse),
|
||||
})
|
||||
|
||||
const { GET } = await import('../route')
|
||||
const request = { url: 'http://localhost/api/legal-corpus?action=scroll&collection=bp_compliance_ce&text_search=Datenschutz' }
|
||||
const response = await GET(request as any)
|
||||
|
||||
// Should filter to only chunks containing "Datenschutz"
|
||||
expect((response as any).data.chunks).toHaveLength(2)
|
||||
expect((response as any).data.chunks[0].text).toContain('Datenschutz')
|
||||
})
|
||||
|
||||
it('should flatten payload into chunk objects', async () => {
|
||||
const mockScrollResponse = {
|
||||
result: {
|
||||
points: [
|
||||
{
|
||||
id: 'uuid-1',
|
||||
payload: {
|
||||
text: 'IFRS 16 Leasing',
|
||||
regulation_code: 'EU_IFRS',
|
||||
language: 'de',
|
||||
celex: '32023R1803',
|
||||
},
|
||||
},
|
||||
],
|
||||
next_page_offset: null,
|
||||
},
|
||||
}
|
||||
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () => Promise.resolve(mockScrollResponse),
|
||||
})
|
||||
|
||||
const { GET } = await import('../route')
|
||||
const request = { url: 'http://localhost/api/legal-corpus?action=scroll&collection=bp_compliance_ce' }
|
||||
const response = await GET(request as any)
|
||||
|
||||
const chunk = (response as any).data.chunks[0]
|
||||
expect(chunk.id).toBe('uuid-1')
|
||||
expect(chunk.text).toBe('IFRS 16 Leasing')
|
||||
expect(chunk.regulation_code).toBe('EU_IFRS')
|
||||
expect(chunk.language).toBe('de')
|
||||
})
|
||||
|
||||
it('should return next_offset from Qdrant response', async () => {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({
|
||||
result: { points: [], next_page_offset: 'next-uuid' },
|
||||
}),
|
||||
})
|
||||
|
||||
const { GET } = await import('../route')
|
||||
const request = { url: 'http://localhost/api/legal-corpus?action=scroll&collection=bp_compliance_ce' }
|
||||
const response = await GET(request as any)
|
||||
|
||||
expect((response as any).data.next_offset).toBe('next-uuid')
|
||||
})
|
||||
|
||||
it('should handle Qdrant scroll failure', async () => {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: false,
|
||||
status: 404,
|
||||
})
|
||||
|
||||
const { GET } = await import('../route')
|
||||
const request = { url: 'http://localhost/api/legal-corpus?action=scroll&collection=nonexistent' }
|
||||
const response = await GET(request as any)
|
||||
|
||||
expect((response as any).status).toBe(404)
|
||||
})
|
||||
|
||||
it('should apply filter when filter_key and filter_value provided', async () => {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({ result: { points: [], next_page_offset: null } }),
|
||||
})
|
||||
|
||||
const { GET } = await import('../route')
|
||||
const request = { url: 'http://localhost/api/legal-corpus?action=scroll&collection=bp_compliance_ce&filter_key=language&filter_value=de' }
|
||||
await GET(request as any)
|
||||
|
||||
const body = JSON.parse(mockFetch.mock.calls[0][1].body)
|
||||
expect(body.filter).toEqual({
|
||||
must: [{ key: 'language', match: { value: 'de' } }],
|
||||
})
|
||||
})
|
||||
|
||||
it('should default collection to bp_compliance_gesetze', async () => {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({ result: { points: [], next_page_offset: null } }),
|
||||
})
|
||||
|
||||
const { GET } = await import('../route')
|
||||
const request = { url: 'http://localhost/api/legal-corpus?action=scroll' }
|
||||
await GET(request as any)
|
||||
|
||||
const calledUrl = mockFetch.mock.calls[0][0]
|
||||
expect(calledUrl).toContain('/collections/bp_compliance_gesetze/')
|
||||
})
|
||||
})
|
||||
|
||||
describe('collection-count action', () => {
|
||||
it('should return points_count from Qdrant collection info', async () => {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({
|
||||
result: { points_count: 55053 },
|
||||
}),
|
||||
})
|
||||
|
||||
const { GET } = await import('../route')
|
||||
const request = { url: 'http://localhost/api/legal-corpus?action=collection-count&collection=bp_compliance_ce' }
|
||||
const response = await GET(request as any)
|
||||
|
||||
expect((response as any).data.count).toBe(55053)
|
||||
})
|
||||
|
||||
it('should return 0 when Qdrant is unavailable', async () => {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: false,
|
||||
status: 500,
|
||||
})
|
||||
|
||||
const { GET } = await import('../route')
|
||||
const request = { url: 'http://localhost/api/legal-corpus?action=collection-count&collection=bp_compliance_ce' }
|
||||
const response = await GET(request as any)
|
||||
|
||||
expect((response as any).data.count).toBe(0)
|
||||
})
|
||||
|
||||
it('should default to bp_compliance_gesetze collection', async () => {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({ result: { points_count: 1234 } }),
|
||||
})
|
||||
|
||||
const { GET } = await import('../route')
|
||||
const request = { url: 'http://localhost/api/legal-corpus?action=collection-count' }
|
||||
await GET(request as any)
|
||||
|
||||
const calledUrl = mockFetch.mock.calls[0][0]
|
||||
expect(calledUrl).toContain('/collections/bp_compliance_gesetze')
|
||||
})
|
||||
})
|
||||
})
|
||||
@@ -66,6 +66,99 @@ export async function GET(request: NextRequest) {
|
||||
url += `/traceability?chunk_id=${encodeURIComponent(chunkId || '')}®ulation=${encodeURIComponent(regulation || '')}`
|
||||
break
|
||||
}
|
||||
case 'scroll': {
|
||||
const collection = searchParams.get('collection') || 'bp_compliance_gesetze'
|
||||
const limit = parseInt(searchParams.get('limit') || '20', 10)
|
||||
const offsetParam = searchParams.get('offset')
|
||||
const filterKey = searchParams.get('filter_key')
|
||||
const filterValue = searchParams.get('filter_value')
|
||||
const textSearch = searchParams.get('text_search')
|
||||
|
||||
const scrollBody: Record<string, unknown> = {
|
||||
limit: Math.min(limit, 100),
|
||||
with_payload: true,
|
||||
with_vector: false,
|
||||
}
|
||||
if (offsetParam) {
|
||||
scrollBody.offset = offsetParam
|
||||
}
|
||||
if (filterKey && filterValue) {
|
||||
scrollBody.filter = {
|
||||
must: [{ key: filterKey, match: { value: filterValue } }],
|
||||
}
|
||||
}
|
||||
|
||||
const scrollRes = await fetch(`${QDRANT_URL}/collections/${encodeURIComponent(collection)}/points/scroll`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(scrollBody),
|
||||
cache: 'no-store',
|
||||
})
|
||||
if (!scrollRes.ok) {
|
||||
return NextResponse.json({ error: 'Qdrant scroll failed' }, { status: scrollRes.status })
|
||||
}
|
||||
const scrollData = await scrollRes.json()
|
||||
const points = (scrollData.result?.points || []).map((p: { id: string; payload?: Record<string, unknown> }) => ({
|
||||
id: p.id,
|
||||
...p.payload,
|
||||
}))
|
||||
|
||||
// Client-side text search filter
|
||||
let filtered = points
|
||||
if (textSearch && textSearch.trim()) {
|
||||
const term = textSearch.toLowerCase()
|
||||
filtered = points.filter((p: Record<string, unknown>) => {
|
||||
const text = String(p.text || p.content || p.chunk_text || '')
|
||||
return text.toLowerCase().includes(term)
|
||||
})
|
||||
}
|
||||
|
||||
return NextResponse.json({
|
||||
chunks: filtered,
|
||||
next_offset: scrollData.result?.next_page_offset || null,
|
||||
total_in_page: points.length,
|
||||
})
|
||||
}
|
||||
case 'regulation-counts-batch': {
|
||||
const col = searchParams.get('collection') || 'bp_compliance_gesetze'
|
||||
// Accept qdrant_ids (actual regulation_id values in Qdrant payload)
|
||||
const qdrantIds = (searchParams.get('qdrant_ids') || '').split(',').filter(Boolean)
|
||||
const results: Record<string, number> = {}
|
||||
for (let i = 0; i < qdrantIds.length; i += 10) {
|
||||
const batch = qdrantIds.slice(i, i + 10)
|
||||
await Promise.all(batch.map(async (qid) => {
|
||||
try {
|
||||
const res = await fetch(`${QDRANT_URL}/collections/${encodeURIComponent(col)}/points/count`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
filter: { must: [{ key: 'regulation_id', match: { value: qid } }] },
|
||||
exact: true,
|
||||
}),
|
||||
cache: 'no-store',
|
||||
})
|
||||
if (res.ok) {
|
||||
const data = await res.json()
|
||||
results[qid] = data.result?.count || 0
|
||||
}
|
||||
} catch { /* skip failed counts */ }
|
||||
}))
|
||||
}
|
||||
return NextResponse.json({ counts: results })
|
||||
}
|
||||
case 'collection-count': {
|
||||
const col = searchParams.get('collection') || 'bp_compliance_gesetze'
|
||||
const countRes = await fetch(`${QDRANT_URL}/collections/${encodeURIComponent(col)}`, {
|
||||
cache: 'no-store',
|
||||
})
|
||||
if (!countRes.ok) {
|
||||
return NextResponse.json({ count: 0 })
|
||||
}
|
||||
const countData = await countRes.json()
|
||||
return NextResponse.json({
|
||||
count: countData.result?.points_count || 0,
|
||||
})
|
||||
}
|
||||
default:
|
||||
return NextResponse.json({ error: 'Unknown action' }, { status: 400 })
|
||||
}
|
||||
|
||||
@@ -1,8 +1,19 @@
|
||||
import type { Metadata } from 'next'
|
||||
import { Inter } from 'next/font/google'
|
||||
import localFont from 'next/font/local'
|
||||
import { Noto_Sans } from 'next/font/google'
|
||||
import './globals.css'
|
||||
|
||||
const inter = Inter({ subsets: ['latin'] })
|
||||
const inter = localFont({
|
||||
src: '../public/fonts/Inter-VariableFont.woff2',
|
||||
variable: '--font-inter',
|
||||
display: 'swap',
|
||||
})
|
||||
|
||||
const notoSans = Noto_Sans({
|
||||
subsets: ['latin', 'latin-ext'],
|
||||
variable: '--font-noto-sans',
|
||||
display: 'swap',
|
||||
})
|
||||
|
||||
export const metadata: Metadata = {
|
||||
title: 'BreakPilot Admin Lehrer KI',
|
||||
@@ -16,7 +27,7 @@ export default function RootLayout({
|
||||
}) {
|
||||
return (
|
||||
<html lang="de">
|
||||
<body className={inter.className}>{children}</body>
|
||||
<body className={`${inter.className} ${notoSans.variable}`}>{children}</body>
|
||||
</html>
|
||||
)
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
import Link from 'next/link'
|
||||
import { useState, useEffect } from 'react'
|
||||
|
||||
export type AIToolId = 'llm-compare' | 'test-quality' | 'gpu' | 'ocr-compare' | 'ocr-labeling' | 'rag-pipeline' | 'magic-help'
|
||||
export type AIToolId = 'test-quality' | 'gpu' | 'ocr-compare' | 'ocr-labeling' | 'rag-pipeline' | 'magic-help'
|
||||
|
||||
export interface AIToolModule {
|
||||
id: AIToolId
|
||||
@@ -25,13 +25,6 @@ export interface AIToolModule {
|
||||
}
|
||||
|
||||
export const AI_TOOLS_MODULES: AIToolModule[] = [
|
||||
{
|
||||
id: 'llm-compare',
|
||||
name: 'LLM Vergleich',
|
||||
href: '/ai/llm-compare',
|
||||
description: 'KI-Provider vergleichen',
|
||||
icon: '⚖️',
|
||||
},
|
||||
{
|
||||
id: 'test-quality',
|
||||
name: 'Test Quality (BQAS)',
|
||||
@@ -93,13 +86,6 @@ export interface AIToolsSidebarResponsiveProps extends AIToolsSidebarProps {
|
||||
// Icons für die Tools
|
||||
const ToolIcon = ({ id }: { id: string }) => {
|
||||
switch (id) {
|
||||
case 'llm-compare':
|
||||
return (
|
||||
<svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2}
|
||||
d="M3 6l3 1m0 0l-3 9a5.002 5.002 0 006.001 0M6 7l3 9M6 7l6-2m6 2l3-1m-3 1l-3 9a5.002 5.002 0 006.001 0M18 7l3 9m-3-9l-6-2m0-2v2m0 16V5m0 16H9m3 0h3" />
|
||||
</svg>
|
||||
)
|
||||
case 'test-quality':
|
||||
return (
|
||||
<svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
@@ -228,8 +214,6 @@ export function AIToolsSidebar({
|
||||
<div className="flex items-center gap-2 text-xs">
|
||||
<span title="GPU Infrastruktur">🖥️</span>
|
||||
<span className="text-slate-400">→</span>
|
||||
<span title="LLM Vergleich">⚖️</span>
|
||||
<span className="text-slate-400">→</span>
|
||||
<span title="Test Quality">🧪</span>
|
||||
</div>
|
||||
</div>
|
||||
@@ -241,9 +225,6 @@ export function AIToolsSidebar({
|
||||
{/* Quick Info zum aktuellen Tool */}
|
||||
<div className="pt-2 border-t border-slate-200 dark:border-gray-700">
|
||||
<div className="text-xs text-slate-500 dark:text-slate-400 px-1">
|
||||
{currentTool === 'llm-compare' && (
|
||||
<span>Vergleichen Sie LLM-Antworten verschiedener Provider</span>
|
||||
)}
|
||||
{currentTool === 'test-quality' && (
|
||||
<span>Ueberwachen Sie die Qualitaet der KI-Ausgaben</span>
|
||||
)}
|
||||
@@ -387,11 +368,6 @@ export function AIToolsSidebarResponsive({
|
||||
<span className="text-xs text-slate-500 mt-1">GPU</span>
|
||||
</div>
|
||||
<span className="text-slate-400">→</span>
|
||||
<div className="flex flex-col items-center">
|
||||
<span className="text-2xl">⚖️</span>
|
||||
<span className="text-xs text-slate-500 mt-1">LLM</span>
|
||||
</div>
|
||||
<span className="text-slate-400">→</span>
|
||||
<div className="flex flex-col items-center">
|
||||
<span className="text-2xl">🧪</span>
|
||||
<span className="text-xs text-slate-500 mt-1">BQAS</span>
|
||||
@@ -405,11 +381,6 @@ export function AIToolsSidebarResponsive({
|
||||
{/* Quick Info */}
|
||||
<div className="pt-4 border-t border-slate-200 dark:border-gray-700">
|
||||
<div className="text-sm text-slate-600 dark:text-slate-400 p-3 bg-slate-50 dark:bg-gray-800 rounded-xl">
|
||||
{currentTool === 'llm-compare' && (
|
||||
<>
|
||||
<strong className="text-slate-700 dark:text-slate-300">Aktuell:</strong> LLM-Antworten verschiedener Provider vergleichen
|
||||
</>
|
||||
)}
|
||||
{currentTool === 'test-quality' && (
|
||||
<>
|
||||
<strong className="text-slate-700 dark:text-slate-300">Aktuell:</strong> Qualitaet der KI-Ausgaben ueberwachen
|
||||
|
||||
278
admin-lehrer/components/grid-editor/GridEditor.tsx
Normal file
278
admin-lehrer/components/grid-editor/GridEditor.tsx
Normal file
@@ -0,0 +1,278 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useState } from 'react'
|
||||
import { useGridEditor } from './useGridEditor'
|
||||
import type { GridZone } from './types'
|
||||
import { GridToolbar } from './GridToolbar'
|
||||
import { GridTable } from './GridTable'
|
||||
import { GridImageOverlay } from './GridImageOverlay'
|
||||
|
||||
interface GridEditorProps {
|
||||
sessionId: string | null
|
||||
onNext?: () => void
|
||||
}
|
||||
|
||||
export function GridEditor({ sessionId, onNext }: GridEditorProps) {
|
||||
const {
|
||||
grid,
|
||||
loading,
|
||||
saving,
|
||||
error,
|
||||
dirty,
|
||||
selectedCell,
|
||||
setSelectedCell,
|
||||
buildGrid,
|
||||
loadGrid,
|
||||
saveGrid,
|
||||
updateCellText,
|
||||
toggleColumnBold,
|
||||
toggleRowHeader,
|
||||
undo,
|
||||
redo,
|
||||
canUndo,
|
||||
canRedo,
|
||||
getAdjacentCell,
|
||||
} = useGridEditor(sessionId)
|
||||
|
||||
const [showOverlay, setShowOverlay] = useState(false)
|
||||
|
||||
// Load grid on mount
|
||||
useEffect(() => {
|
||||
if (sessionId) {
|
||||
loadGrid()
|
||||
}
|
||||
}, [sessionId, loadGrid])
|
||||
|
||||
// Keyboard shortcuts
|
||||
useEffect(() => {
|
||||
const handler = (e: KeyboardEvent) => {
|
||||
if ((e.metaKey || e.ctrlKey) && e.key === 'z' && !e.shiftKey) {
|
||||
e.preventDefault()
|
||||
undo()
|
||||
} else if ((e.metaKey || e.ctrlKey) && e.key === 'z' && e.shiftKey) {
|
||||
e.preventDefault()
|
||||
redo()
|
||||
} else if ((e.metaKey || e.ctrlKey) && e.key === 's') {
|
||||
e.preventDefault()
|
||||
saveGrid()
|
||||
}
|
||||
}
|
||||
window.addEventListener('keydown', handler)
|
||||
return () => window.removeEventListener('keydown', handler)
|
||||
}, [undo, redo, saveGrid])
|
||||
|
||||
const handleNavigate = useCallback(
|
||||
(cellId: string, direction: 'up' | 'down' | 'left' | 'right') => {
|
||||
const target = getAdjacentCell(cellId, direction)
|
||||
if (target) {
|
||||
setSelectedCell(target)
|
||||
// Focus the input
|
||||
setTimeout(() => {
|
||||
const el = document.getElementById(`cell-${target}`)
|
||||
if (el) {
|
||||
el.focus()
|
||||
if (el instanceof HTMLInputElement) el.select()
|
||||
}
|
||||
}, 0)
|
||||
}
|
||||
},
|
||||
[getAdjacentCell, setSelectedCell],
|
||||
)
|
||||
|
||||
if (!sessionId) {
|
||||
return (
|
||||
<div className="text-center py-12 text-gray-400">
|
||||
Keine Session ausgewaehlt.
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
if (loading) {
|
||||
return (
|
||||
<div className="flex items-center justify-center py-16">
|
||||
<div className="flex items-center gap-3 text-gray-500 dark:text-gray-400">
|
||||
<svg className="w-5 h-5 animate-spin" fill="none" viewBox="0 0 24 24">
|
||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" />
|
||||
</svg>
|
||||
Grid wird aufgebaut...
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
if (error) {
|
||||
return (
|
||||
<div className="bg-red-50 dark:bg-red-900/20 border border-red-200 dark:border-red-800 rounded-lg p-4">
|
||||
<p className="text-sm text-red-700 dark:text-red-300">
|
||||
Fehler: {error}
|
||||
</p>
|
||||
<button
|
||||
onClick={buildGrid}
|
||||
className="mt-2 text-xs px-3 py-1.5 bg-red-600 text-white rounded hover:bg-red-700"
|
||||
>
|
||||
Erneut versuchen
|
||||
</button>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
if (!grid || !grid.zones.length) {
|
||||
return (
|
||||
<div className="text-center py-12">
|
||||
<p className="text-gray-400 mb-4">Kein Grid vorhanden.</p>
|
||||
<button
|
||||
onClick={buildGrid}
|
||||
className="px-4 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 text-sm"
|
||||
>
|
||||
Grid aus OCR-Ergebnissen erstellen
|
||||
</button>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Summary bar */}
|
||||
<div className="flex items-center gap-4 text-xs text-gray-500 dark:text-gray-400">
|
||||
<span>{grid.summary.total_zones} Zone(n)</span>
|
||||
<span>{grid.summary.total_columns} Spalten</span>
|
||||
<span>{grid.summary.total_rows} Zeilen</span>
|
||||
<span>{grid.summary.total_cells} Zellen</span>
|
||||
{grid.boxes_detected > 0 && (
|
||||
<span className="text-amber-600 dark:text-amber-400">
|
||||
{grid.boxes_detected} Box(en) erkannt
|
||||
</span>
|
||||
)}
|
||||
{grid.summary.color_stats && Object.entries(grid.summary.color_stats)
|
||||
.filter(([name]) => name !== 'black')
|
||||
.map(([name, count]) => (
|
||||
<span key={name} className="inline-flex items-center gap-1">
|
||||
<span className="w-2 h-2 rounded-full" style={{ backgroundColor: {
|
||||
red: '#dc2626', blue: '#2563eb', green: '#16a34a',
|
||||
orange: '#ea580c', purple: '#9333ea', yellow: '#ca8a04',
|
||||
}[name] || '#6b7280' }} />
|
||||
<span>{count} {name}</span>
|
||||
</span>
|
||||
))
|
||||
}
|
||||
{(grid.summary.recovered_colored ?? 0) > 0 && (
|
||||
<span className="text-purple-600 dark:text-purple-400">
|
||||
+{grid.summary.recovered_colored} recovered
|
||||
</span>
|
||||
)}
|
||||
<span className="text-gray-400">
|
||||
{grid.duration_seconds.toFixed(1)}s
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{/* Toolbar */}
|
||||
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 px-3 py-2">
|
||||
<GridToolbar
|
||||
dirty={dirty}
|
||||
saving={saving}
|
||||
canUndo={canUndo}
|
||||
canRedo={canRedo}
|
||||
showOverlay={showOverlay}
|
||||
onSave={saveGrid}
|
||||
onUndo={undo}
|
||||
onRedo={redo}
|
||||
onRebuild={buildGrid}
|
||||
onToggleOverlay={() => setShowOverlay(!showOverlay)}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Image overlay */}
|
||||
{showOverlay && (
|
||||
<GridImageOverlay sessionId={sessionId} grid={grid} />
|
||||
)}
|
||||
|
||||
{/* Zone tables — group vsplit zones side by side */}
|
||||
<div className="space-y-4">
|
||||
{(() => {
|
||||
// Group consecutive zones with same vsplit_group
|
||||
const groups: GridZone[][] = []
|
||||
for (const zone of grid.zones) {
|
||||
const prev = groups[groups.length - 1]
|
||||
if (
|
||||
prev &&
|
||||
zone.vsplit_group != null &&
|
||||
prev[0].vsplit_group === zone.vsplit_group
|
||||
) {
|
||||
prev.push(zone)
|
||||
} else {
|
||||
groups.push([zone])
|
||||
}
|
||||
}
|
||||
return groups.map((group) =>
|
||||
group.length === 1 ? (
|
||||
<div
|
||||
key={group[0].zone_index}
|
||||
className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 overflow-hidden"
|
||||
>
|
||||
<GridTable
|
||||
zone={group[0]}
|
||||
layoutMetrics={grid.layout_metrics}
|
||||
selectedCell={selectedCell}
|
||||
onSelectCell={setSelectedCell}
|
||||
onCellTextChange={updateCellText}
|
||||
onToggleColumnBold={toggleColumnBold}
|
||||
onToggleRowHeader={toggleRowHeader}
|
||||
onNavigate={handleNavigate}
|
||||
/>
|
||||
</div>
|
||||
) : (
|
||||
<div
|
||||
key={`vsplit-${group[0].vsplit_group}`}
|
||||
className="flex gap-2"
|
||||
>
|
||||
{group.map((zone) => (
|
||||
<div
|
||||
key={zone.zone_index}
|
||||
className="flex-1 min-w-0 bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 overflow-hidden"
|
||||
>
|
||||
<GridTable
|
||||
zone={zone}
|
||||
layoutMetrics={grid.layout_metrics}
|
||||
selectedCell={selectedCell}
|
||||
onSelectCell={setSelectedCell}
|
||||
onCellTextChange={updateCellText}
|
||||
onToggleColumnBold={toggleColumnBold}
|
||||
onToggleRowHeader={toggleRowHeader}
|
||||
onNavigate={handleNavigate}
|
||||
/>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
),
|
||||
)
|
||||
})()}
|
||||
</div>
|
||||
|
||||
{/* Tip */}
|
||||
<div className="text-[11px] text-gray-400 dark:text-gray-500 flex items-center gap-4">
|
||||
<span>Tab: naechste Zelle</span>
|
||||
<span>Enter: Zeile runter</span>
|
||||
<span>Spalte fett: Klick auf Spaltenkopf</span>
|
||||
<span>Header: Klick auf Zeilennummer</span>
|
||||
<span>Ctrl+Z/Y: Undo/Redo</span>
|
||||
<span>Ctrl+S: Speichern</span>
|
||||
</div>
|
||||
|
||||
{/* Next step button */}
|
||||
{onNext && (
|
||||
<div className="flex justify-end">
|
||||
<button
|
||||
onClick={async () => {
|
||||
if (dirty) await saveGrid()
|
||||
onNext()
|
||||
}}
|
||||
className="px-4 py-2 bg-teal-600 text-white text-sm rounded-lg hover:bg-teal-700 transition-colors"
|
||||
>
|
||||
Fertig
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
98
admin-lehrer/components/grid-editor/GridImageOverlay.tsx
Normal file
98
admin-lehrer/components/grid-editor/GridImageOverlay.tsx
Normal file
@@ -0,0 +1,98 @@
|
||||
'use client'
|
||||
|
||||
import type { StructuredGrid } from './types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
interface GridImageOverlayProps {
|
||||
sessionId: string
|
||||
grid: StructuredGrid
|
||||
}
|
||||
|
||||
const ZONE_COLORS = [
|
||||
{ border: 'rgba(20,184,166,0.7)', fill: 'rgba(20,184,166,0.05)' }, // teal
|
||||
{ border: 'rgba(245,158,11,0.7)', fill: 'rgba(245,158,11,0.05)' }, // amber
|
||||
{ border: 'rgba(99,102,241,0.7)', fill: 'rgba(99,102,241,0.05)' }, // indigo
|
||||
{ border: 'rgba(236,72,153,0.7)', fill: 'rgba(236,72,153,0.05)' }, // pink
|
||||
]
|
||||
|
||||
export function GridImageOverlay({ sessionId, grid }: GridImageOverlayProps) {
|
||||
const imgUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
|
||||
|
||||
return (
|
||||
<div className="relative w-full overflow-auto border border-gray-200 dark:border-gray-700 rounded-lg bg-gray-100 dark:bg-gray-900">
|
||||
<div className="relative inline-block">
|
||||
{/* Source image */}
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
<img
|
||||
src={imgUrl}
|
||||
alt="OCR Scan"
|
||||
className="block max-w-full"
|
||||
style={{ imageRendering: 'auto' }}
|
||||
/>
|
||||
|
||||
{/* SVG overlay */}
|
||||
<svg
|
||||
className="absolute inset-0 w-full h-full pointer-events-none"
|
||||
viewBox={`0 0 ${grid.image_width} ${grid.image_height}`}
|
||||
preserveAspectRatio="xMinYMin meet"
|
||||
>
|
||||
{grid.zones.map((zone) => {
|
||||
const colors = ZONE_COLORS[zone.zone_index % ZONE_COLORS.length]
|
||||
const b = zone.bbox_px
|
||||
|
||||
return (
|
||||
<g key={zone.zone_index}>
|
||||
{/* Zone border */}
|
||||
<rect
|
||||
x={b.x} y={b.y} width={b.w} height={b.h}
|
||||
fill={colors.fill}
|
||||
stroke={colors.border}
|
||||
strokeWidth={zone.zone_type === 'box' ? 3 : 1.5}
|
||||
strokeDasharray={zone.zone_type === 'box' ? undefined : '6 3'}
|
||||
/>
|
||||
|
||||
{/* Column separators */}
|
||||
{zone.columns.slice(1).map((col) => (
|
||||
<line
|
||||
key={`col-${col.index}`}
|
||||
x1={col.x_min_px} y1={b.y}
|
||||
x2={col.x_min_px} y2={b.y + b.h}
|
||||
stroke={colors.border}
|
||||
strokeWidth={1}
|
||||
strokeDasharray="4 2"
|
||||
/>
|
||||
))}
|
||||
|
||||
{/* Row separators */}
|
||||
{zone.rows.slice(1).map((row) => (
|
||||
<line
|
||||
key={`row-${row.index}`}
|
||||
x1={b.x} y1={row.y_min_px}
|
||||
x2={b.x + b.w} y2={row.y_min_px}
|
||||
stroke={colors.border}
|
||||
strokeWidth={0.5}
|
||||
strokeDasharray="3 3"
|
||||
opacity={0.5}
|
||||
/>
|
||||
))}
|
||||
|
||||
{/* Zone label */}
|
||||
<text
|
||||
x={b.x + 4} y={b.y + 14}
|
||||
fill={colors.border}
|
||||
fontSize={12}
|
||||
fontWeight="bold"
|
||||
fontFamily="monospace"
|
||||
>
|
||||
{zone.zone_type === 'box' ? 'BOX' : 'CONTENT'} Z{zone.zone_index}
|
||||
{' '}({zone.columns.length}x{zone.rows.length})
|
||||
</text>
|
||||
</g>
|
||||
)
|
||||
})}
|
||||
</svg>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
447
admin-lehrer/components/grid-editor/GridTable.tsx
Normal file
447
admin-lehrer/components/grid-editor/GridTable.tsx
Normal file
@@ -0,0 +1,447 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useRef, useState } from 'react'
|
||||
import type { GridZone, LayoutMetrics } from './types'
|
||||
|
||||
interface GridTableProps {
|
||||
zone: GridZone
|
||||
layoutMetrics?: LayoutMetrics
|
||||
selectedCell: string | null
|
||||
onSelectCell: (cellId: string) => void
|
||||
onCellTextChange: (cellId: string, text: string) => void
|
||||
onToggleColumnBold: (zoneIndex: number, colIndex: number) => void
|
||||
onToggleRowHeader: (zoneIndex: number, rowIndex: number) => void
|
||||
onNavigate: (cellId: string, direction: 'up' | 'down' | 'left' | 'right') => void
|
||||
}
|
||||
|
||||
/** Gutter width for row numbers (px). */
|
||||
const ROW_NUM_WIDTH = 36
|
||||
|
||||
/** Minimum column width in px so columns remain usable. */
|
||||
const MIN_COL_WIDTH = 40
|
||||
|
||||
/** Minimum row height in px. */
|
||||
const MIN_ROW_HEIGHT = 26
|
||||
|
||||
export function GridTable({
|
||||
zone,
|
||||
layoutMetrics,
|
||||
selectedCell,
|
||||
onSelectCell,
|
||||
onCellTextChange,
|
||||
onToggleColumnBold,
|
||||
onToggleRowHeader,
|
||||
onNavigate,
|
||||
}: GridTableProps) {
|
||||
const containerRef = useRef<HTMLDivElement>(null)
|
||||
const [containerWidth, setContainerWidth] = useState(0)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// Observe container width for scaling
|
||||
// ----------------------------------------------------------------
|
||||
useEffect(() => {
|
||||
const el = containerRef.current
|
||||
if (!el) return
|
||||
const ro = new ResizeObserver(([entry]) => {
|
||||
setContainerWidth(entry.contentRect.width)
|
||||
})
|
||||
ro.observe(el)
|
||||
return () => ro.disconnect()
|
||||
}, [])
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// Compute column widths from OCR measurements
|
||||
// ----------------------------------------------------------------
|
||||
// Use the actual total column span as reference width — NOT zone.bbox_px.w.
|
||||
// When union columns are applied across content zones, column boundaries
|
||||
// can extend beyond the zone's bbox, causing overflow if we scale by
|
||||
// the smaller zone width.
|
||||
const [colWidthOverrides, setColWidthOverrides] = useState<number[] | null>(null)
|
||||
|
||||
const columnWidthsPx = zone.columns.map((col) => col.x_max_px - col.x_min_px)
|
||||
const totalColWidthPx = columnWidthsPx.reduce((sum, w) => sum + w, 0)
|
||||
const zoneWidthPx = totalColWidthPx > 0
|
||||
? totalColWidthPx
|
||||
: (zone.bbox_px.w || layoutMetrics?.page_width_px || 1)
|
||||
const scale = containerWidth > 0 ? (containerWidth - ROW_NUM_WIDTH) / zoneWidthPx : 1
|
||||
|
||||
const effectiveColWidths = (colWidthOverrides ?? columnWidthsPx).map(
|
||||
(w) => Math.max(MIN_COL_WIDTH, w * scale),
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// Compute row heights from OCR measurements
|
||||
// ----------------------------------------------------------------
|
||||
const avgRowHeightPx = layoutMetrics?.avg_row_height_px ?? 30
|
||||
const [rowHeightOverrides, setRowHeightOverrides] = useState<Map<number, number>>(new Map())
|
||||
|
||||
const getRowHeight = (rowIndex: number, isHeader: boolean): number => {
|
||||
if (rowHeightOverrides.has(rowIndex)) {
|
||||
return rowHeightOverrides.get(rowIndex)!
|
||||
}
|
||||
const row = zone.rows.find((r) => r.index === rowIndex)
|
||||
if (!row) return Math.max(MIN_ROW_HEIGHT, avgRowHeightPx * scale)
|
||||
|
||||
if (isHeader) {
|
||||
// Headers keep their measured height
|
||||
const measuredH = row.y_max_px - row.y_min_px
|
||||
return Math.max(MIN_ROW_HEIGHT, measuredH * scale)
|
||||
}
|
||||
// Content rows use average for uniformity
|
||||
return Math.max(MIN_ROW_HEIGHT, avgRowHeightPx * scale)
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// Font size from layout metrics
|
||||
// ----------------------------------------------------------------
|
||||
const baseFontSize = layoutMetrics?.font_size_suggestion_px
|
||||
? Math.max(11, layoutMetrics.font_size_suggestion_px * scale)
|
||||
: 13
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// Keyboard navigation
|
||||
// ----------------------------------------------------------------
|
||||
const handleKeyDown = useCallback(
|
||||
(e: React.KeyboardEvent, cellId: string) => {
|
||||
if (e.key === 'Tab') {
|
||||
e.preventDefault()
|
||||
onNavigate(cellId, e.shiftKey ? 'left' : 'right')
|
||||
} else if (e.key === 'Enter' && !e.shiftKey) {
|
||||
e.preventDefault()
|
||||
onNavigate(cellId, 'down')
|
||||
} else if (e.key === 'ArrowUp' && e.altKey) {
|
||||
e.preventDefault()
|
||||
onNavigate(cellId, 'up')
|
||||
} else if (e.key === 'ArrowDown' && e.altKey) {
|
||||
e.preventDefault()
|
||||
onNavigate(cellId, 'down')
|
||||
} else if (e.key === 'Escape') {
|
||||
;(e.target as HTMLElement).blur()
|
||||
}
|
||||
},
|
||||
[onNavigate],
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// Cell lookup
|
||||
// ----------------------------------------------------------------
|
||||
const cellMap = new Map<string, (typeof zone.cells)[0]>()
|
||||
for (const cell of zone.cells) {
|
||||
cellMap.set(`${cell.row_index}_${cell.col_index}`, cell)
|
||||
}
|
||||
|
||||
/** Dominant non-black color from a cell's word_boxes, or null. */
|
||||
const getCellColor = (cell: (typeof zone.cells)[0] | undefined): string | null => {
|
||||
if (!cell?.word_boxes?.length) return null
|
||||
for (const wb of cell.word_boxes) {
|
||||
if (wb.color_name && wb.color_name !== 'black' && wb.color) {
|
||||
return wb.color
|
||||
}
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// Column resize (drag)
|
||||
// ----------------------------------------------------------------
|
||||
const handleColResizeStart = useCallback(
|
||||
(colIndex: number, startX: number) => {
|
||||
const baseWidths = colWidthOverrides ?? [...columnWidthsPx]
|
||||
|
||||
const handleMouseMove = (e: MouseEvent) => {
|
||||
const deltaPx = (e.clientX - startX) / scale
|
||||
const newWidths = [...baseWidths]
|
||||
newWidths[colIndex] = Math.max(20, baseWidths[colIndex] + deltaPx)
|
||||
// Steal from next column to keep total constant
|
||||
if (colIndex + 1 < newWidths.length) {
|
||||
newWidths[colIndex + 1] = Math.max(20, baseWidths[colIndex + 1] - deltaPx)
|
||||
}
|
||||
setColWidthOverrides(newWidths)
|
||||
}
|
||||
|
||||
const handleMouseUp = () => {
|
||||
document.removeEventListener('mousemove', handleMouseMove)
|
||||
document.removeEventListener('mouseup', handleMouseUp)
|
||||
document.body.style.cursor = ''
|
||||
document.body.style.userSelect = ''
|
||||
}
|
||||
|
||||
document.body.style.cursor = 'col-resize'
|
||||
document.body.style.userSelect = 'none'
|
||||
document.addEventListener('mousemove', handleMouseMove)
|
||||
document.addEventListener('mouseup', handleMouseUp)
|
||||
},
|
||||
[colWidthOverrides, columnWidthsPx, scale],
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// Row resize (drag)
|
||||
// ----------------------------------------------------------------
|
||||
const handleRowResizeStart = useCallback(
|
||||
(rowIndex: number, startY: number, currentHeight: number) => {
|
||||
const handleMouseMove = (e: MouseEvent) => {
|
||||
const delta = e.clientY - startY
|
||||
const newH = Math.max(MIN_ROW_HEIGHT, currentHeight + delta)
|
||||
setRowHeightOverrides((prev) => {
|
||||
const next = new Map(prev)
|
||||
next.set(rowIndex, newH)
|
||||
return next
|
||||
})
|
||||
}
|
||||
|
||||
const handleMouseUp = () => {
|
||||
document.removeEventListener('mousemove', handleMouseMove)
|
||||
document.removeEventListener('mouseup', handleMouseUp)
|
||||
document.body.style.cursor = ''
|
||||
document.body.style.userSelect = ''
|
||||
}
|
||||
|
||||
document.body.style.cursor = 'row-resize'
|
||||
document.body.style.userSelect = 'none'
|
||||
document.addEventListener('mousemove', handleMouseMove)
|
||||
document.addEventListener('mouseup', handleMouseUp)
|
||||
},
|
||||
[],
|
||||
)
|
||||
|
||||
const isBoxZone = zone.zone_type === 'box'
|
||||
const numCols = zone.columns.length
|
||||
|
||||
// CSS Grid template for columns: row-number gutter + proportional columns
|
||||
const gridTemplateCols = `${ROW_NUM_WIDTH}px ${effectiveColWidths.map((w) => `${w.toFixed(1)}px`).join(' ')}`
|
||||
|
||||
return (
|
||||
<div
|
||||
ref={containerRef}
|
||||
className={`overflow-x-auto ${isBoxZone ? 'border-2 border-gray-400 dark:border-gray-500 rounded-lg' : ''}`}
|
||||
>
|
||||
{/* Zone label */}
|
||||
<div className="flex items-center gap-2 px-2 py-1 text-xs text-gray-500 dark:text-gray-400">
|
||||
<span
|
||||
className={`inline-flex items-center gap-1 px-1.5 py-0.5 rounded text-[10px] font-medium ${
|
||||
isBoxZone
|
||||
? 'bg-amber-50 dark:bg-amber-900/20 text-amber-700 dark:text-amber-300 border border-amber-200 dark:border-amber-800'
|
||||
: 'bg-gray-50 dark:bg-gray-800 text-gray-500 dark:text-gray-400 border border-gray-200 dark:border-gray-700'
|
||||
}`}
|
||||
>
|
||||
{isBoxZone ? 'Box' : 'Inhalt'} Zone {zone.zone_index}
|
||||
</span>
|
||||
<span>
|
||||
{zone.columns.length} Spalten, {zone.rows.length} Zeilen, {zone.cells.length} Zellen
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{/* ============================================================ */}
|
||||
{/* CSS Grid — column headers */}
|
||||
{/* ============================================================ */}
|
||||
<div
|
||||
style={{
|
||||
display: 'grid',
|
||||
gridTemplateColumns: gridTemplateCols,
|
||||
fontFamily: "var(--font-noto-sans, 'Noto Sans'), 'Inter', system-ui, sans-serif",
|
||||
fontSize: `${baseFontSize}px`,
|
||||
}}
|
||||
>
|
||||
{/* Header: row-number corner */}
|
||||
<div className="sticky left-0 z-10 px-1 py-1.5 text-[10px] text-gray-400 dark:text-gray-500 border-b border-r border-gray-200 dark:border-gray-700 bg-gray-50 dark:bg-gray-800/50" />
|
||||
|
||||
{/* Header: column labels with resize handles */}
|
||||
{zone.columns.map((col, ci) => (
|
||||
<div
|
||||
key={col.index}
|
||||
className={`relative px-2 py-1.5 text-xs font-medium border-b border-r border-gray-200 dark:border-gray-700 bg-gray-50 dark:bg-gray-800/50 cursor-pointer select-none transition-colors hover:bg-gray-100 dark:hover:bg-gray-700 ${
|
||||
col.bold ? 'text-teal-700 dark:text-teal-300' : 'text-gray-600 dark:text-gray-400'
|
||||
}`}
|
||||
onClick={() => onToggleColumnBold(zone.zone_index, col.index)}
|
||||
title={`Spalte ${col.index + 1} — Klick fuer Fett-Toggle`}
|
||||
>
|
||||
<div className="flex items-center gap-1 justify-center truncate">
|
||||
<span>{col.label}</span>
|
||||
{col.bold && (
|
||||
<span className="text-[9px] px-1 py-0 rounded bg-teal-100 dark:bg-teal-900/40 text-teal-600 dark:text-teal-400">
|
||||
B
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
{/* Right-edge resize handle */}
|
||||
{ci < numCols - 1 && (
|
||||
<div
|
||||
className="absolute top-0 right-0 w-[5px] h-full cursor-col-resize hover:bg-teal-400/40 z-20"
|
||||
onMouseDown={(e) => {
|
||||
e.stopPropagation()
|
||||
handleColResizeStart(ci, e.clientX)
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
))}
|
||||
|
||||
{/* ============================================================ */}
|
||||
{/* Data rows */}
|
||||
{/* ============================================================ */}
|
||||
{zone.rows.map((row) => {
|
||||
const rowH = getRowHeight(row.index, row.is_header)
|
||||
const isSpanning = zone.cells.some(
|
||||
(c) => c.row_index === row.index && c.col_type === 'spanning_header',
|
||||
)
|
||||
|
||||
return (
|
||||
<div key={row.index} style={{ display: 'contents' }}>
|
||||
{/* Row number cell */}
|
||||
<div
|
||||
className={`relative sticky left-0 z-10 flex items-center justify-center text-[10px] border-b border-r border-gray-200 dark:border-gray-700 cursor-pointer select-none transition-colors hover:bg-gray-100 dark:hover:bg-gray-700 ${
|
||||
row.is_header
|
||||
? 'bg-blue-50 dark:bg-blue-900/20 text-blue-600 dark:text-blue-400 font-medium'
|
||||
: row.is_footer
|
||||
? 'bg-amber-50 dark:bg-amber-900/20 text-amber-600 dark:text-amber-400 font-medium'
|
||||
: 'bg-gray-50 dark:bg-gray-800/50 text-gray-400 dark:text-gray-500'
|
||||
}`}
|
||||
style={{ height: `${rowH}px` }}
|
||||
onClick={() => onToggleRowHeader(zone.zone_index, row.index)}
|
||||
title={`Zeile ${row.index + 1} — Klick fuer Header-Toggle`}
|
||||
>
|
||||
{row.index + 1}
|
||||
{row.is_header && <span className="block text-[8px]">H</span>}
|
||||
{row.is_footer && <span className="block text-[8px]">F</span>}
|
||||
{/* Bottom-edge resize handle */}
|
||||
<div
|
||||
className="absolute bottom-0 left-0 w-full h-[4px] cursor-row-resize hover:bg-teal-400/40 z-20"
|
||||
onMouseDown={(e) => {
|
||||
e.stopPropagation()
|
||||
handleRowResizeStart(row.index, e.clientY, rowH)
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Cells — spanning header or normal columns */}
|
||||
{isSpanning ? (
|
||||
<div
|
||||
className="border-b border-r border-gray-200 dark:border-gray-700 bg-blue-50/50 dark:bg-blue-900/10 flex items-center"
|
||||
style={{
|
||||
gridColumn: `2 / ${numCols + 2}`,
|
||||
height: `${rowH}px`,
|
||||
}}
|
||||
>
|
||||
{(() => {
|
||||
const spanCell = zone.cells.find(
|
||||
(c) => c.row_index === row.index && c.col_type === 'spanning_header',
|
||||
)
|
||||
if (!spanCell) return null
|
||||
const cellId = spanCell.cell_id
|
||||
const isSelected = selectedCell === cellId
|
||||
const cellColor = getCellColor(spanCell)
|
||||
return (
|
||||
<div className="flex items-center w-full">
|
||||
{cellColor && (
|
||||
<span
|
||||
className="flex-shrink-0 w-1.5 self-stretch rounded-l-sm"
|
||||
style={{ backgroundColor: cellColor }}
|
||||
/>
|
||||
)}
|
||||
<input
|
||||
id={`cell-${cellId}`}
|
||||
type="text"
|
||||
value={spanCell.text}
|
||||
onChange={(e) => onCellTextChange(cellId, e.target.value)}
|
||||
onFocus={() => onSelectCell(cellId)}
|
||||
onKeyDown={(e) => handleKeyDown(e, cellId)}
|
||||
className={`w-full px-3 py-1 bg-transparent border-0 outline-none text-center ${
|
||||
isSelected ? 'ring-2 ring-teal-500 ring-inset rounded' : ''
|
||||
}`}
|
||||
style={{ color: cellColor || undefined }}
|
||||
spellCheck={false}
|
||||
/>
|
||||
</div>
|
||||
)
|
||||
})()}
|
||||
</div>
|
||||
) : (
|
||||
zone.columns.map((col) => {
|
||||
const cell = cellMap.get(`${row.index}_${col.index}`)
|
||||
const cellId =
|
||||
cell?.cell_id ??
|
||||
`Z${zone.zone_index}_R${String(row.index).padStart(2, '0')}_C${col.index}`
|
||||
const isSelected = selectedCell === cellId
|
||||
const isBold = col.bold || cell?.is_bold
|
||||
const isLowConf = cell && cell.confidence > 0 && cell.confidence < 60
|
||||
const cellColor = getCellColor(cell)
|
||||
// Show per-word colored display only when word_boxes
|
||||
// match the cell text. Post-processing steps (e.g. 5h
|
||||
// slash-IPA → bracket conversion) modify cell.text but
|
||||
// not individual word_boxes, so we fall back to the
|
||||
// plain input when they diverge.
|
||||
const wbText = cell?.word_boxes?.map((wb) => wb.text).join(' ') ?? ''
|
||||
const textMatches = !cell?.text || wbText === cell.text
|
||||
const hasColoredWords =
|
||||
textMatches &&
|
||||
(cell?.word_boxes?.some(
|
||||
(wb) => wb.color_name && wb.color_name !== 'black',
|
||||
) ?? false)
|
||||
|
||||
return (
|
||||
<div
|
||||
key={col.index}
|
||||
className={`relative border-b border-r border-gray-200 dark:border-gray-700 flex items-center ${
|
||||
isSelected ? 'ring-2 ring-teal-500 ring-inset z-10' : ''
|
||||
} ${isLowConf ? 'bg-amber-50/50 dark:bg-amber-900/10' : ''} ${
|
||||
row.is_header ? 'bg-blue-50/50 dark:bg-blue-900/10' : ''
|
||||
}`}
|
||||
style={{ height: `${rowH}px` }}
|
||||
>
|
||||
{cellColor && (
|
||||
<span
|
||||
className="flex-shrink-0 w-1.5 self-stretch rounded-l-sm"
|
||||
style={{ backgroundColor: cellColor }}
|
||||
title={`Farbe: ${cell?.word_boxes?.find((wb) => wb.color_name !== 'black')?.color_name}`}
|
||||
/>
|
||||
)}
|
||||
{/* Per-word colored display when not editing */}
|
||||
{hasColoredWords && !isSelected ? (
|
||||
<div
|
||||
className={`w-full px-2 cursor-text truncate ${isBold ? 'font-bold' : 'font-normal'}`}
|
||||
onClick={() => {
|
||||
onSelectCell(cellId)
|
||||
setTimeout(() => document.getElementById(`cell-${cellId}`)?.focus(), 0)
|
||||
}}
|
||||
>
|
||||
{cell!.word_boxes!.map((wb, i) => (
|
||||
<span
|
||||
key={i}
|
||||
style={
|
||||
wb.color_name && wb.color_name !== 'black'
|
||||
? { color: wb.color }
|
||||
: undefined
|
||||
}
|
||||
>
|
||||
{wb.text}
|
||||
{i < cell!.word_boxes!.length - 1 ? ' ' : ''}
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
) : (
|
||||
<input
|
||||
id={`cell-${cellId}`}
|
||||
type="text"
|
||||
value={cell?.text ?? ''}
|
||||
onChange={(e) => {
|
||||
if (cell) onCellTextChange(cellId, e.target.value)
|
||||
}}
|
||||
onFocus={() => onSelectCell(cellId)}
|
||||
onKeyDown={(e) => handleKeyDown(e, cellId)}
|
||||
className={`w-full px-2 bg-transparent border-0 outline-none ${
|
||||
isBold ? 'font-bold' : 'font-normal'
|
||||
}`}
|
||||
spellCheck={false}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
})
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
110
admin-lehrer/components/grid-editor/GridToolbar.tsx
Normal file
110
admin-lehrer/components/grid-editor/GridToolbar.tsx
Normal file
@@ -0,0 +1,110 @@
|
||||
'use client'
|
||||
|
||||
interface GridToolbarProps {
|
||||
dirty: boolean
|
||||
saving: boolean
|
||||
canUndo: boolean
|
||||
canRedo: boolean
|
||||
showOverlay: boolean
|
||||
onSave: () => void
|
||||
onUndo: () => void
|
||||
onRedo: () => void
|
||||
onRebuild: () => void
|
||||
onToggleOverlay: () => void
|
||||
}
|
||||
|
||||
export function GridToolbar({
|
||||
dirty,
|
||||
saving,
|
||||
canUndo,
|
||||
canRedo,
|
||||
showOverlay,
|
||||
onSave,
|
||||
onUndo,
|
||||
onRedo,
|
||||
onRebuild,
|
||||
onToggleOverlay,
|
||||
}: GridToolbarProps) {
|
||||
return (
|
||||
<div className="flex items-center gap-2 flex-wrap">
|
||||
{/* Undo / Redo */}
|
||||
<div className="flex items-center gap-1 border-r border-gray-200 dark:border-gray-700 pr-2">
|
||||
<button
|
||||
onClick={onUndo}
|
||||
disabled={!canUndo}
|
||||
className="p-1.5 rounded hover:bg-gray-100 dark:hover:bg-gray-700 disabled:opacity-30 disabled:cursor-not-allowed"
|
||||
title="Rueckgaengig (Ctrl+Z)"
|
||||
>
|
||||
<svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" d="M3 10h10a5 5 0 015 5v2M3 10l4-4M3 10l4 4" />
|
||||
</svg>
|
||||
</button>
|
||||
<button
|
||||
onClick={onRedo}
|
||||
disabled={!canRedo}
|
||||
className="p-1.5 rounded hover:bg-gray-100 dark:hover:bg-gray-700 disabled:opacity-30 disabled:cursor-not-allowed"
|
||||
title="Wiederholen (Ctrl+Shift+Z)"
|
||||
>
|
||||
<svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" d="M21 10H11a5 5 0 00-5 5v2M21 10l-4-4M21 10l-4 4" />
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Overlay toggle */}
|
||||
<button
|
||||
onClick={onToggleOverlay}
|
||||
className={`flex items-center gap-1 px-2.5 py-1.5 text-xs rounded-md border transition-colors ${
|
||||
showOverlay
|
||||
? 'bg-teal-50 dark:bg-teal-900/30 border-teal-300 dark:border-teal-700 text-teal-700 dark:text-teal-300'
|
||||
: 'border-gray-200 dark:border-gray-700 text-gray-600 dark:text-gray-400 hover:bg-gray-50 dark:hover:bg-gray-700'
|
||||
}`}
|
||||
title="Grid auf Bild anzeigen"
|
||||
>
|
||||
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" d="M4 5a1 1 0 011-1h14a1 1 0 011 1v2a1 1 0 01-1 1H5a1 1 0 01-1-1V5zM4 13a1 1 0 011-1h6a1 1 0 011 1v6a1 1 0 01-1 1H5a1 1 0 01-1-1v-6zM16 13a1 1 0 011-1h2a1 1 0 011 1v6a1 1 0 01-1 1h-2a1 1 0 01-1-1v-6z" />
|
||||
</svg>
|
||||
Bild-Overlay
|
||||
</button>
|
||||
|
||||
{/* Rebuild */}
|
||||
<button
|
||||
onClick={onRebuild}
|
||||
className="flex items-center gap-1 px-2.5 py-1.5 text-xs rounded-md border border-gray-200 dark:border-gray-700 text-gray-600 dark:text-gray-400 hover:bg-gray-50 dark:hover:bg-gray-700 transition-colors"
|
||||
title="Grid neu berechnen"
|
||||
>
|
||||
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" d="M4 4v5h.582m15.356 2A8.001 8.001 0 004.582 9m0 0H9m11 11v-5h-.581m0 0a8.003 8.003 0 01-15.357-2m15.357 2H15" />
|
||||
</svg>
|
||||
Neu berechnen
|
||||
</button>
|
||||
|
||||
{/* Spacer */}
|
||||
<div className="flex-1" />
|
||||
|
||||
{/* Save */}
|
||||
<button
|
||||
onClick={onSave}
|
||||
disabled={!dirty || saving}
|
||||
className={`flex items-center gap-1.5 px-3 py-1.5 text-xs font-medium rounded-md transition-colors ${
|
||||
dirty
|
||||
? 'bg-teal-600 text-white hover:bg-teal-700'
|
||||
: 'bg-gray-100 dark:bg-gray-800 text-gray-400 cursor-not-allowed'
|
||||
}`}
|
||||
title="Speichern (Ctrl+S)"
|
||||
>
|
||||
{saving ? (
|
||||
<svg className="w-3.5 h-3.5 animate-spin" fill="none" viewBox="0 0 24 24">
|
||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" />
|
||||
</svg>
|
||||
) : (
|
||||
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" d="M8 7H5a2 2 0 00-2 2v9a2 2 0 002 2h14a2 2 0 002-2V9a2 2 0 00-2-2h-3m-1 4l-3 3m0 0l-3-3m3 3V4" />
|
||||
</svg>
|
||||
)}
|
||||
{saving ? 'Speichert...' : dirty ? 'Speichern' : 'Gespeichert'}
|
||||
</button>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
6
admin-lehrer/components/grid-editor/index.ts
Normal file
6
admin-lehrer/components/grid-editor/index.ts
Normal file
@@ -0,0 +1,6 @@
|
||||
export { GridEditor } from './GridEditor'
|
||||
export { GridTable } from './GridTable'
|
||||
export { GridToolbar } from './GridToolbar'
|
||||
export { GridImageOverlay } from './GridImageOverlay'
|
||||
export { useGridEditor } from './useGridEditor'
|
||||
export type * from './types'
|
||||
111
admin-lehrer/components/grid-editor/types.ts
Normal file
111
admin-lehrer/components/grid-editor/types.ts
Normal file
@@ -0,0 +1,111 @@
|
||||
import type { OcrWordBox } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
// Re-export for convenience
|
||||
export type { OcrWordBox }
|
||||
|
||||
/** Layout metrics derived from OCR word positions for faithful grid reconstruction. */
|
||||
export interface LayoutMetrics {
|
||||
page_width_px: number
|
||||
page_height_px: number
|
||||
avg_row_height_px: number
|
||||
font_size_suggestion_px: number
|
||||
}
|
||||
|
||||
/** A complete structured grid with zones, ready for the Excel-like editor. */
|
||||
export interface StructuredGrid {
|
||||
session_id: string
|
||||
image_width: number
|
||||
image_height: number
|
||||
zones: GridZone[]
|
||||
boxes_detected: number
|
||||
summary: GridSummary
|
||||
formatting: GridFormatting
|
||||
layout_metrics?: LayoutMetrics
|
||||
duration_seconds: number
|
||||
edited?: boolean
|
||||
}
|
||||
|
||||
export interface GridSummary {
|
||||
total_zones: number
|
||||
total_columns: number
|
||||
total_rows: number
|
||||
total_cells: number
|
||||
total_words: number
|
||||
recovered_colored?: number
|
||||
color_stats?: Record<string, number>
|
||||
}
|
||||
|
||||
export interface GridFormatting {
|
||||
bold_columns: number[]
|
||||
header_rows: number[]
|
||||
}
|
||||
|
||||
/** A horizontal zone of the page — either content or a bordered box. */
|
||||
export interface GridZone {
|
||||
zone_index: number
|
||||
zone_type: 'content' | 'box'
|
||||
bbox_px: BBox
|
||||
bbox_pct: BBox
|
||||
border: ZoneBorder | null
|
||||
word_count: number
|
||||
columns: GridColumn[]
|
||||
rows: GridRow[]
|
||||
cells: GridEditorCell[]
|
||||
header_rows: number[]
|
||||
layout_hint?: 'left_of_vsplit' | 'right_of_vsplit' | 'middle_of_vsplit'
|
||||
vsplit_group?: number
|
||||
}
|
||||
|
||||
export interface BBox {
|
||||
x: number
|
||||
y: number
|
||||
w: number
|
||||
h: number
|
||||
}
|
||||
|
||||
export interface ZoneBorder {
|
||||
thickness: number
|
||||
confidence: number
|
||||
}
|
||||
|
||||
export interface GridColumn {
|
||||
index: number
|
||||
label: string
|
||||
x_min_px: number
|
||||
x_max_px: number
|
||||
x_min_pct: number
|
||||
x_max_pct: number
|
||||
bold: boolean
|
||||
}
|
||||
|
||||
export interface GridRow {
|
||||
index: number
|
||||
y_min_px: number
|
||||
y_max_px: number
|
||||
y_min_pct: number
|
||||
y_max_pct: number
|
||||
is_header: boolean
|
||||
is_footer?: boolean
|
||||
}
|
||||
|
||||
export interface GridEditorCell {
|
||||
cell_id: string
|
||||
zone_index: number
|
||||
row_index: number
|
||||
col_index: number
|
||||
col_type: string
|
||||
text: string
|
||||
confidence: number
|
||||
bbox_px: BBox
|
||||
bbox_pct: BBox
|
||||
word_boxes: OcrWordBox[]
|
||||
ocr_engine: string
|
||||
is_bold: boolean
|
||||
}
|
||||
|
||||
/** Cell formatting applied by the user in the editor. */
|
||||
export interface CellFormatting {
|
||||
bold: boolean
|
||||
fontSize: 'small' | 'normal' | 'large'
|
||||
align: 'left' | 'center' | 'right'
|
||||
}
|
||||
288
admin-lehrer/components/grid-editor/useGridEditor.ts
Normal file
288
admin-lehrer/components/grid-editor/useGridEditor.ts
Normal file
@@ -0,0 +1,288 @@
|
||||
import { useCallback, useRef, useState } from 'react'
|
||||
import type { StructuredGrid, GridZone } from './types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
const MAX_UNDO = 50
|
||||
|
||||
export interface GridEditorState {
|
||||
grid: StructuredGrid | null
|
||||
loading: boolean
|
||||
saving: boolean
|
||||
error: string | null
|
||||
dirty: boolean
|
||||
selectedCell: string | null
|
||||
selectedZone: number | null
|
||||
}
|
||||
|
||||
export function useGridEditor(sessionId: string | null) {
|
||||
const [grid, setGrid] = useState<StructuredGrid | null>(null)
|
||||
const [loading, setLoading] = useState(false)
|
||||
const [saving, setSaving] = useState(false)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [dirty, setDirty] = useState(false)
|
||||
const [selectedCell, setSelectedCell] = useState<string | null>(null)
|
||||
const [selectedZone, setSelectedZone] = useState<number | null>(null)
|
||||
|
||||
// Undo/redo stacks store serialized zone arrays
|
||||
const undoStack = useRef<string[]>([])
|
||||
const redoStack = useRef<string[]>([])
|
||||
|
||||
const pushUndo = useCallback((zones: GridZone[]) => {
|
||||
undoStack.current.push(JSON.stringify(zones))
|
||||
if (undoStack.current.length > MAX_UNDO) {
|
||||
undoStack.current.shift()
|
||||
}
|
||||
redoStack.current = []
|
||||
}, [])
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// Load / Build
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
const buildGrid = useCallback(async () => {
|
||||
if (!sessionId) return
|
||||
setLoading(true)
|
||||
setError(null)
|
||||
try {
|
||||
const res = await fetch(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/build-grid`,
|
||||
{ method: 'POST' },
|
||||
)
|
||||
if (!res.ok) {
|
||||
const data = await res.json().catch(() => ({}))
|
||||
throw new Error(data.detail || `HTTP ${res.status}`)
|
||||
}
|
||||
const data: StructuredGrid = await res.json()
|
||||
setGrid(data)
|
||||
setDirty(false)
|
||||
undoStack.current = []
|
||||
redoStack.current = []
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : String(e))
|
||||
} finally {
|
||||
setLoading(false)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
const loadGrid = useCallback(async () => {
|
||||
if (!sessionId) return
|
||||
setLoading(true)
|
||||
setError(null)
|
||||
try {
|
||||
const res = await fetch(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/grid-editor`,
|
||||
)
|
||||
if (res.status === 404) {
|
||||
// No grid yet — build it
|
||||
await buildGrid()
|
||||
return
|
||||
}
|
||||
if (!res.ok) {
|
||||
const data = await res.json().catch(() => ({}))
|
||||
throw new Error(data.detail || `HTTP ${res.status}`)
|
||||
}
|
||||
const data: StructuredGrid = await res.json()
|
||||
setGrid(data)
|
||||
setDirty(false)
|
||||
undoStack.current = []
|
||||
redoStack.current = []
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : String(e))
|
||||
} finally {
|
||||
setLoading(false)
|
||||
}
|
||||
}, [sessionId, buildGrid])
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// Save
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
const saveGrid = useCallback(async () => {
|
||||
if (!sessionId || !grid) return
|
||||
setSaving(true)
|
||||
try {
|
||||
const res = await fetch(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/save-grid`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(grid),
|
||||
},
|
||||
)
|
||||
if (!res.ok) {
|
||||
const data = await res.json().catch(() => ({}))
|
||||
throw new Error(data.detail || `HTTP ${res.status}`)
|
||||
}
|
||||
setDirty(false)
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : String(e))
|
||||
} finally {
|
||||
setSaving(false)
|
||||
}
|
||||
}, [sessionId, grid])
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// Cell editing
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
const updateCellText = useCallback(
|
||||
(cellId: string, newText: string) => {
|
||||
if (!grid) return
|
||||
pushUndo(grid.zones)
|
||||
|
||||
setGrid((prev) => {
|
||||
if (!prev) return prev
|
||||
return {
|
||||
...prev,
|
||||
zones: prev.zones.map((zone) => ({
|
||||
...zone,
|
||||
cells: zone.cells.map((cell) =>
|
||||
cell.cell_id === cellId ? { ...cell, text: newText } : cell,
|
||||
),
|
||||
})),
|
||||
}
|
||||
})
|
||||
setDirty(true)
|
||||
},
|
||||
[grid, pushUndo],
|
||||
)
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// Column formatting
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
const toggleColumnBold = useCallback(
|
||||
(zoneIndex: number, colIndex: number) => {
|
||||
if (!grid) return
|
||||
pushUndo(grid.zones)
|
||||
|
||||
setGrid((prev) => {
|
||||
if (!prev) return prev
|
||||
return {
|
||||
...prev,
|
||||
zones: prev.zones.map((zone) => {
|
||||
if (zone.zone_index !== zoneIndex) return zone
|
||||
const col = zone.columns.find((c) => c.index === colIndex)
|
||||
const newBold = col ? !col.bold : true
|
||||
return {
|
||||
...zone,
|
||||
columns: zone.columns.map((c) =>
|
||||
c.index === colIndex ? { ...c, bold: newBold } : c,
|
||||
),
|
||||
cells: zone.cells.map((cell) =>
|
||||
cell.col_index === colIndex
|
||||
? { ...cell, is_bold: newBold }
|
||||
: cell,
|
||||
),
|
||||
}
|
||||
}),
|
||||
}
|
||||
})
|
||||
setDirty(true)
|
||||
},
|
||||
[grid, pushUndo],
|
||||
)
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// Row formatting
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
const toggleRowHeader = useCallback(
|
||||
(zoneIndex: number, rowIndex: number) => {
|
||||
if (!grid) return
|
||||
pushUndo(grid.zones)
|
||||
|
||||
setGrid((prev) => {
|
||||
if (!prev) return prev
|
||||
return {
|
||||
...prev,
|
||||
zones: prev.zones.map((zone) => {
|
||||
if (zone.zone_index !== zoneIndex) return zone
|
||||
return {
|
||||
...zone,
|
||||
rows: zone.rows.map((r) =>
|
||||
r.index === rowIndex ? { ...r, is_header: !r.is_header } : r,
|
||||
),
|
||||
}
|
||||
}),
|
||||
}
|
||||
})
|
||||
setDirty(true)
|
||||
},
|
||||
[grid, pushUndo],
|
||||
)
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// Undo / Redo
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
const undo = useCallback(() => {
|
||||
if (!grid || undoStack.current.length === 0) return
|
||||
redoStack.current.push(JSON.stringify(grid.zones))
|
||||
const prev = undoStack.current.pop()!
|
||||
setGrid((g) => (g ? { ...g, zones: JSON.parse(prev) } : g))
|
||||
setDirty(true)
|
||||
}, [grid])
|
||||
|
||||
const redo = useCallback(() => {
|
||||
if (!grid || redoStack.current.length === 0) return
|
||||
undoStack.current.push(JSON.stringify(grid.zones))
|
||||
const next = redoStack.current.pop()!
|
||||
setGrid((g) => (g ? { ...g, zones: JSON.parse(next) } : g))
|
||||
setDirty(true)
|
||||
}, [grid])
|
||||
|
||||
const canUndo = undoStack.current.length > 0
|
||||
const canRedo = redoStack.current.length > 0
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// Navigation helpers
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
const getAdjacentCell = useCallback(
|
||||
(cellId: string, direction: 'up' | 'down' | 'left' | 'right'): string | null => {
|
||||
if (!grid) return null
|
||||
for (const zone of grid.zones) {
|
||||
const cell = zone.cells.find((c) => c.cell_id === cellId)
|
||||
if (!cell) continue
|
||||
|
||||
let targetRow = cell.row_index
|
||||
let targetCol = cell.col_index
|
||||
if (direction === 'up') targetRow--
|
||||
if (direction === 'down') targetRow++
|
||||
if (direction === 'left') targetCol--
|
||||
if (direction === 'right') targetCol++
|
||||
|
||||
const target = zone.cells.find(
|
||||
(c) => c.row_index === targetRow && c.col_index === targetCol,
|
||||
)
|
||||
return target?.cell_id ?? null
|
||||
}
|
||||
return null
|
||||
},
|
||||
[grid],
|
||||
)
|
||||
|
||||
return {
|
||||
grid,
|
||||
loading,
|
||||
saving,
|
||||
error,
|
||||
dirty,
|
||||
selectedCell,
|
||||
selectedZone,
|
||||
setSelectedCell,
|
||||
setSelectedZone,
|
||||
buildGrid,
|
||||
loadGrid,
|
||||
saveGrid,
|
||||
updateCellText,
|
||||
toggleColumnBold,
|
||||
toggleRowHeader,
|
||||
undo,
|
||||
redo,
|
||||
canUndo,
|
||||
canRedo,
|
||||
getAdjacentCell,
|
||||
}
|
||||
}
|
||||
@@ -194,10 +194,8 @@ export function Sidebar({ onRoleChange }: SidebarProps) {
|
||||
{/* Categories */}
|
||||
<div className="px-2 space-y-1">
|
||||
{visibleCategories.map((category) => {
|
||||
const categoryHref = category.id === 'compliance-sdk' ? '/sdk' : `/${category.id}`
|
||||
const isCategoryActive = category.id === 'compliance-sdk'
|
||||
? category.modules.some(m => pathname.startsWith(m.href))
|
||||
: pathname.startsWith(categoryHref)
|
||||
const categoryHref = `/${category.id}`
|
||||
const isCategoryActive = pathname.startsWith(categoryHref)
|
||||
|
||||
return (
|
||||
<div key={category.id}>
|
||||
|
||||
231
admin-lehrer/components/ocr-overlay/KombiCompareStep.tsx
Normal file
231
admin-lehrer/components/ocr-overlay/KombiCompareStep.tsx
Normal file
@@ -0,0 +1,231 @@
|
||||
'use client'
|
||||
|
||||
import { useState } from 'react'
|
||||
import { OverlayReconstruction } from './OverlayReconstruction'
|
||||
import type { GridCell } from '@/app/(admin)/ai/ocr-overlay/types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
type Phase = 'idle' | 'running' | 'compare'
|
||||
|
||||
interface KombiResult {
|
||||
cells: GridCell[]
|
||||
image_width: number
|
||||
image_height: number
|
||||
duration_seconds: number
|
||||
summary: {
|
||||
total_cells: number
|
||||
non_empty_cells: number
|
||||
merged_words: number
|
||||
[key: string]: unknown
|
||||
}
|
||||
[key: string]: unknown
|
||||
}
|
||||
|
||||
interface KombiCompareStepProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
}
|
||||
|
||||
export function KombiCompareStep({ sessionId, onNext }: KombiCompareStepProps) {
|
||||
const [phase, setPhase] = useState<Phase>('idle')
|
||||
const [error, setError] = useState('')
|
||||
const [paddleResult, setPaddleResult] = useState<KombiResult | null>(null)
|
||||
const [rapidResult, setRapidResult] = useState<KombiResult | null>(null)
|
||||
const [paddleStatus, setPaddleStatus] = useState<'pending' | 'running' | 'done' | 'error'>('pending')
|
||||
const [rapidStatus, setRapidStatus] = useState<'pending' | 'running' | 'done' | 'error'>('pending')
|
||||
|
||||
const runBothEngines = async () => {
|
||||
if (!sessionId) return
|
||||
setPhase('running')
|
||||
setError('')
|
||||
setPaddleStatus('running')
|
||||
setRapidStatus('running')
|
||||
setPaddleResult(null)
|
||||
setRapidResult(null)
|
||||
|
||||
const fetchEngine = async (
|
||||
endpoint: string,
|
||||
setResult: (r: KombiResult) => void,
|
||||
setStatus: (s: 'pending' | 'running' | 'done' | 'error') => void,
|
||||
) => {
|
||||
try {
|
||||
const res = await fetch(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/${endpoint}`,
|
||||
{ method: 'POST' },
|
||||
)
|
||||
if (!res.ok) {
|
||||
const body = await res.json().catch(() => ({}))
|
||||
throw new Error(body.detail || `HTTP ${res.status}`)
|
||||
}
|
||||
const data = await res.json()
|
||||
setResult(data)
|
||||
setStatus('done')
|
||||
} catch (e: unknown) {
|
||||
setStatus('error')
|
||||
throw e
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
await Promise.all([
|
||||
fetchEngine('paddle-kombi', setPaddleResult, setPaddleStatus),
|
||||
fetchEngine('rapid-kombi', setRapidResult, setRapidStatus),
|
||||
])
|
||||
setPhase('compare')
|
||||
} catch (e: unknown) {
|
||||
// At least one failed — still show compare if the other succeeded
|
||||
setError(e instanceof Error ? e.message : String(e))
|
||||
setPhase('compare')
|
||||
}
|
||||
}
|
||||
|
||||
if (phase === 'idle') {
|
||||
return (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-8 text-center">
|
||||
<div className="text-4xl mb-3">⚖️</div>
|
||||
<h3 className="text-lg font-semibold text-gray-800 dark:text-gray-200 mb-2">
|
||||
Kombi-Vergleich
|
||||
</h3>
|
||||
<p className="text-sm text-gray-500 dark:text-gray-400 mb-6 max-w-lg mx-auto">
|
||||
Beide Kombi-Modi (Paddle + Tesseract vs. RapidOCR + Tesseract) laufen parallel.
|
||||
Die Ergebnisse werden nebeneinander angezeigt, damit die Qualitaet direkt verglichen werden kann.
|
||||
</p>
|
||||
<button
|
||||
onClick={runBothEngines}
|
||||
disabled={!sessionId}
|
||||
className="px-5 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors disabled:opacity-50 disabled:cursor-not-allowed font-medium"
|
||||
>
|
||||
Beide Kombi-Modi starten
|
||||
</button>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
if (phase === 'running' && !paddleResult && !rapidResult) {
|
||||
return (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-8">
|
||||
<div className="flex items-center justify-center gap-8">
|
||||
<EngineStatusCard label="Paddle + Tesseract" status={paddleStatus} />
|
||||
<EngineStatusCard label="RapidOCR + Tesseract" status={rapidStatus} />
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// compare phase
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{error && (
|
||||
<div className="bg-red-50 dark:bg-red-900/20 border border-red-200 dark:border-red-800 rounded-lg p-3 text-sm text-red-700 dark:text-red-300">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div className="flex items-center justify-between">
|
||||
<h3 className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||
Side-by-Side Vergleich
|
||||
</h3>
|
||||
<button
|
||||
onClick={() => { setPhase('idle'); setPaddleResult(null); setRapidResult(null) }}
|
||||
className="text-xs px-3 py-1.5 border border-gray-300 dark:border-gray-600 rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 transition-colors"
|
||||
>
|
||||
Neu starten
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div className="grid grid-cols-2 gap-4">
|
||||
{/* Left: Paddle-Kombi */}
|
||||
<div className="space-y-2">
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||
🔀 Paddle + Tesseract
|
||||
</span>
|
||||
{paddleStatus === 'error' && (
|
||||
<span className="text-xs text-red-500">Fehler</span>
|
||||
)}
|
||||
</div>
|
||||
{paddleResult ? (
|
||||
<>
|
||||
<OverlayReconstruction
|
||||
sessionId={sessionId}
|
||||
onNext={() => {}}
|
||||
wordResultOverride={paddleResult}
|
||||
/>
|
||||
<StatsBar result={paddleResult} engine="Paddle-Kombi" />
|
||||
</>
|
||||
) : (
|
||||
<div className="bg-gray-50 dark:bg-gray-900 rounded-lg p-12 text-center text-sm text-gray-400">
|
||||
{paddleStatus === 'running' ? 'Laeuft...' : 'Fehlgeschlagen'}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Right: Rapid-Kombi */}
|
||||
<div className="space-y-2">
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||
⚡ RapidOCR + Tesseract
|
||||
</span>
|
||||
{rapidStatus === 'error' && (
|
||||
<span className="text-xs text-red-500">Fehler</span>
|
||||
)}
|
||||
</div>
|
||||
{rapidResult ? (
|
||||
<>
|
||||
<OverlayReconstruction
|
||||
sessionId={sessionId}
|
||||
onNext={() => {}}
|
||||
wordResultOverride={rapidResult}
|
||||
/>
|
||||
<StatsBar result={rapidResult} engine="Rapid-Kombi" />
|
||||
</>
|
||||
) : (
|
||||
<div className="bg-gray-50 dark:bg-gray-900 rounded-lg p-12 text-center text-sm text-gray-400">
|
||||
{rapidStatus === 'running' ? 'Laeuft...' : 'Fehlgeschlagen'}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="flex justify-end">
|
||||
<button
|
||||
onClick={onNext}
|
||||
className="px-4 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors text-sm font-medium"
|
||||
>
|
||||
Fertig
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
function EngineStatusCard({ label, status }: { label: string; status: string }) {
|
||||
return (
|
||||
<div className="flex items-center gap-3 bg-gray-50 dark:bg-gray-900 rounded-lg px-5 py-4">
|
||||
{status === 'running' && (
|
||||
<div className="w-5 h-5 border-2 border-teal-400 border-t-transparent rounded-full animate-spin" />
|
||||
)}
|
||||
{status === 'done' && <span className="text-green-500 text-lg">✓</span>}
|
||||
{status === 'error' && <span className="text-red-500 text-lg">✗</span>}
|
||||
{status === 'pending' && <span className="text-gray-400 text-lg">○</span>}
|
||||
<span className="text-sm text-gray-700 dark:text-gray-300">{label}</span>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
function StatsBar({ result, engine }: { result: KombiResult; engine: string }) {
|
||||
const nonEmpty = result.summary?.non_empty_cells ?? 0
|
||||
const totalCells = result.summary?.total_cells ?? 0
|
||||
const merged = result.summary?.merged_words ?? 0
|
||||
const duration = result.duration_seconds ?? 0
|
||||
|
||||
return (
|
||||
<div className="flex items-center gap-3 text-[11px] text-gray-500 dark:text-gray-400 bg-gray-50 dark:bg-gray-900 rounded-lg px-3 py-2">
|
||||
<span className="font-medium text-gray-600 dark:text-gray-300">{engine}</span>
|
||||
<span>{merged} Woerter</span>
|
||||
<span>{nonEmpty}/{totalCells} Zellen</span>
|
||||
<span>{duration.toFixed(2)}s</span>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
644
admin-lehrer/components/ocr-overlay/OverlayReconstruction.tsx
Normal file
644
admin-lehrer/components/ocr-overlay/OverlayReconstruction.tsx
Normal file
@@ -0,0 +1,644 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
|
||||
import type { GridResult, GridCell, RowResult, RowItem } from '@/app/(admin)/ai/ocr-overlay/types'
|
||||
import { usePixelWordPositions } from './usePixelWordPositions'
|
||||
import { useSlideWordPositions } from './useSlideWordPositions'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
interface OverlayReconstructionProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
/** When set, use this data directly instead of fetching from the session API. */
|
||||
wordResultOverride?: { cells: GridCell[]; image_width: number; image_height: number; [key: string]: unknown }
|
||||
}
|
||||
|
||||
interface EditableCell {
|
||||
cellId: string
|
||||
text: string
|
||||
originalText: string
|
||||
bboxPct: { x: number; y: number; w: number; h: number }
|
||||
colType: string
|
||||
rowIndex: number
|
||||
colIndex: number
|
||||
}
|
||||
|
||||
type UndoAction = { cellId: string; oldText: string; newText: string }
|
||||
|
||||
export function OverlayReconstruction({ sessionId, onNext, wordResultOverride }: OverlayReconstructionProps) {
|
||||
const [status, setStatus] = useState<'loading' | 'ready' | 'saving' | 'saved' | 'error'>('loading')
|
||||
const [error, setError] = useState('')
|
||||
const [cells, setCells] = useState<EditableCell[]>([])
|
||||
const [gridCells, setGridCells] = useState<GridCell[]>([])
|
||||
const [editedTexts, setEditedTexts] = useState<Map<string, string>>(new Map())
|
||||
|
||||
// Undo/Redo
|
||||
const [undoStack, setUndoStack] = useState<UndoAction[]>([])
|
||||
const [redoStack, setRedoStack] = useState<UndoAction[]>([])
|
||||
|
||||
// Overlay state
|
||||
const [rows, setRows] = useState<RowItem[]>([])
|
||||
const [imageNaturalSize, setImageNaturalSize] = useState<{ w: number; h: number } | null>(null)
|
||||
const [fontScale, setFontScale] = useState(0.7)
|
||||
const [globalBold, setGlobalBold] = useState(false)
|
||||
const [imageRotation, setImageRotation] = useState<0 | 180>(0)
|
||||
const [textOpacity, setTextOpacity] = useState(100)
|
||||
const [textColor, setTextColor] = useState<'red' | 'blue' | 'black'>('red')
|
||||
const [positioningMode, setPositioningMode] = useState<'cluster' | 'slide'>('slide')
|
||||
const reconRef = useRef<HTMLDivElement>(null)
|
||||
const [reconWidth, setReconWidth] = useState(0)
|
||||
|
||||
// Pixel-based word positions (both algorithms run, toggle selects which to use)
|
||||
const overlayImageUrl = sessionId
|
||||
? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
|
||||
: ''
|
||||
const clusterPositions = usePixelWordPositions(
|
||||
overlayImageUrl,
|
||||
gridCells,
|
||||
status === 'ready',
|
||||
imageRotation,
|
||||
)
|
||||
const slidePositions = useSlideWordPositions(
|
||||
overlayImageUrl,
|
||||
gridCells,
|
||||
status === 'ready',
|
||||
imageRotation,
|
||||
)
|
||||
const cellWordPositions = positioningMode === 'slide' ? slidePositions : clusterPositions
|
||||
|
||||
// Track container width
|
||||
useEffect(() => {
|
||||
const el = reconRef.current
|
||||
if (!el) return
|
||||
const obs = new ResizeObserver(entries => {
|
||||
for (const entry of entries) setReconWidth(entry.contentRect.width)
|
||||
})
|
||||
obs.observe(el)
|
||||
return () => obs.disconnect()
|
||||
}, [status])
|
||||
|
||||
// Load session data
|
||||
useEffect(() => {
|
||||
if (wordResultOverride) {
|
||||
applyWordResult(wordResultOverride)
|
||||
return
|
||||
}
|
||||
if (!sessionId) return
|
||||
loadSessionData()
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [sessionId, wordResultOverride])
|
||||
|
||||
const applyWordResult = (wordResult: { cells: GridCell[]; image_width: number; image_height: number; [key: string]: unknown }) => {
|
||||
const rawGridCells: GridCell[] = wordResult.cells || []
|
||||
setGridCells(rawGridCells)
|
||||
|
||||
const editableCells: EditableCell[] = rawGridCells.map(c => ({
|
||||
cellId: c.cell_id,
|
||||
text: c.text,
|
||||
originalText: c.text,
|
||||
bboxPct: c.bbox_pct,
|
||||
colType: c.col_type,
|
||||
rowIndex: c.row_index,
|
||||
colIndex: c.col_index,
|
||||
}))
|
||||
setCells(editableCells)
|
||||
setEditedTexts(new Map())
|
||||
setUndoStack([])
|
||||
setRedoStack([])
|
||||
|
||||
if (wordResult.image_width && wordResult.image_height) {
|
||||
setImageNaturalSize({ w: wordResult.image_width, h: wordResult.image_height })
|
||||
}
|
||||
|
||||
setStatus('ready')
|
||||
}
|
||||
|
||||
const loadSessionData = async () => {
|
||||
if (!sessionId) return
|
||||
setStatus('loading')
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
|
||||
if (!res.ok) throw new Error(`HTTP ${res.status}`)
|
||||
const data = await res.json()
|
||||
|
||||
const wordResult: GridResult | undefined = data.word_result
|
||||
if (!wordResult) {
|
||||
setError('Keine Worterkennungsdaten gefunden. Bitte zuerst den Woerter-Schritt abschliessen.')
|
||||
setStatus('error')
|
||||
return
|
||||
}
|
||||
|
||||
applyWordResult(wordResult as unknown as { cells: GridCell[]; image_width: number; image_height: number })
|
||||
|
||||
// Load rows
|
||||
const rowResult: RowResult | undefined = data.row_result
|
||||
if (rowResult?.rows) setRows(rowResult.rows)
|
||||
} catch (e: unknown) {
|
||||
setError(e instanceof Error ? e.message : String(e))
|
||||
setStatus('error')
|
||||
}
|
||||
}
|
||||
|
||||
const handleTextChange = useCallback((cellId: string, newText: string) => {
|
||||
setEditedTexts(prev => {
|
||||
const oldText = prev.get(cellId)
|
||||
const cell = cells.find(c => c.cellId === cellId)
|
||||
const prevText = oldText ?? cell?.text ?? ''
|
||||
|
||||
setUndoStack(stack => [...stack, { cellId, oldText: prevText, newText }])
|
||||
setRedoStack([])
|
||||
|
||||
const next = new Map(prev)
|
||||
next.set(cellId, newText)
|
||||
return next
|
||||
})
|
||||
}, [cells])
|
||||
|
||||
const undo = useCallback(() => {
|
||||
setUndoStack(stack => {
|
||||
if (stack.length === 0) return stack
|
||||
const action = stack[stack.length - 1]
|
||||
const newStack = stack.slice(0, -1)
|
||||
setRedoStack(rs => [...rs, action])
|
||||
setEditedTexts(prev => {
|
||||
const next = new Map(prev)
|
||||
next.set(action.cellId, action.oldText)
|
||||
return next
|
||||
})
|
||||
return newStack
|
||||
})
|
||||
}, [])
|
||||
|
||||
const redo = useCallback(() => {
|
||||
setRedoStack(stack => {
|
||||
if (stack.length === 0) return stack
|
||||
const action = stack[stack.length - 1]
|
||||
const newStack = stack.slice(0, -1)
|
||||
setUndoStack(us => [...us, action])
|
||||
setEditedTexts(prev => {
|
||||
const next = new Map(prev)
|
||||
next.set(action.cellId, action.newText)
|
||||
return next
|
||||
})
|
||||
return newStack
|
||||
})
|
||||
}, [])
|
||||
|
||||
const resetCell = useCallback((cellId: string) => {
|
||||
setEditedTexts(prev => {
|
||||
const next = new Map(prev)
|
||||
next.delete(cellId)
|
||||
return next
|
||||
})
|
||||
}, [])
|
||||
|
||||
// Keyboard shortcuts
|
||||
useEffect(() => {
|
||||
const handler = (e: KeyboardEvent) => {
|
||||
if ((e.metaKey || e.ctrlKey) && e.key === 'z') {
|
||||
e.preventDefault()
|
||||
if (e.shiftKey) redo()
|
||||
else undo()
|
||||
}
|
||||
}
|
||||
document.addEventListener('keydown', handler)
|
||||
return () => document.removeEventListener('keydown', handler)
|
||||
}, [undo, redo])
|
||||
|
||||
const getDisplayText = useCallback((cell: EditableCell): string => {
|
||||
return editedTexts.get(cell.cellId) ?? cell.text
|
||||
}, [editedTexts])
|
||||
|
||||
const isEdited = useCallback((cell: EditableCell): boolean => {
|
||||
const edited = editedTexts.get(cell.cellId)
|
||||
return edited !== undefined && edited !== cell.originalText
|
||||
}, [editedTexts])
|
||||
|
||||
const changedCount = useMemo(() => {
|
||||
let count = 0
|
||||
for (const cell of cells) {
|
||||
if (isEdited(cell)) count++
|
||||
}
|
||||
return count
|
||||
}, [cells, isEdited])
|
||||
|
||||
// Tab navigation
|
||||
const sortedCellIds = useMemo(() => {
|
||||
return [...cells]
|
||||
.sort((a, b) => a.rowIndex !== b.rowIndex ? a.rowIndex - b.rowIndex : a.colIndex - b.colIndex)
|
||||
.map(c => c.cellId)
|
||||
}, [cells])
|
||||
|
||||
const handleKeyDown = useCallback((e: React.KeyboardEvent, cellId: string) => {
|
||||
if (e.key === 'Tab') {
|
||||
e.preventDefault()
|
||||
const idx = sortedCellIds.indexOf(cellId)
|
||||
const nextIdx = e.shiftKey ? idx - 1 : idx + 1
|
||||
if (nextIdx >= 0 && nextIdx < sortedCellIds.length) {
|
||||
const nextId = sortedCellIds[nextIdx]
|
||||
const el = document.getElementById(`cell-${nextId}`)
|
||||
el?.focus()
|
||||
}
|
||||
}
|
||||
}, [sortedCellIds])
|
||||
|
||||
const saveReconstruction = useCallback(async () => {
|
||||
if (!sessionId) return
|
||||
setStatus('saving')
|
||||
try {
|
||||
const cellUpdates = Array.from(editedTexts.entries())
|
||||
.filter(([cellId, text]) => {
|
||||
const cell = cells.find(c => c.cellId === cellId)
|
||||
return cell && text !== cell.originalText
|
||||
})
|
||||
.map(([cellId, text]) => ({ cell_id: cellId, text }))
|
||||
|
||||
if (cellUpdates.length === 0) {
|
||||
setStatus('saved')
|
||||
return
|
||||
}
|
||||
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reconstruction`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ cells: cellUpdates }),
|
||||
})
|
||||
|
||||
if (!res.ok) {
|
||||
const data = await res.json().catch(() => ({}))
|
||||
throw new Error(data.detail || `HTTP ${res.status}`)
|
||||
}
|
||||
|
||||
setStatus('saved')
|
||||
} catch (e: unknown) {
|
||||
setError(e instanceof Error ? e.message : String(e))
|
||||
setStatus('error')
|
||||
}
|
||||
}, [sessionId, editedTexts, cells])
|
||||
|
||||
const dewarpedUrl = sessionId
|
||||
? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
|
||||
: ''
|
||||
|
||||
// Compute median cell height (in px) for consistent font sizing
|
||||
// Must be before early returns (Rules of Hooks)
|
||||
const medianCellHeightPx = useMemo(() => {
|
||||
const imgWVal = imageNaturalSize?.w || 1
|
||||
const imgHVal = imageNaturalSize?.h || 1
|
||||
const cH = reconWidth * (imgHVal / imgWVal)
|
||||
if (cells.length === 0 || cH === 0) return 40
|
||||
const heights = cells.map(c => cH * (c.bboxPct.h / 100)).sort((a, b) => a - b)
|
||||
const mid = Math.floor(heights.length / 2)
|
||||
return heights.length % 2 === 0 ? (heights[mid - 1] + heights[mid]) / 2 : heights[mid]
|
||||
}, [cells, reconWidth, imageNaturalSize])
|
||||
|
||||
if (!sessionId) {
|
||||
return <div className="text-center py-12 text-gray-400">Bitte zuerst eine Session auswaehlen.</div>
|
||||
}
|
||||
|
||||
if (status === 'loading') {
|
||||
return (
|
||||
<div className="flex items-center gap-3 justify-center py-12">
|
||||
<div className="animate-spin rounded-full h-5 w-5 border-b-2 border-teal-500" />
|
||||
<span className="text-gray-500">Overlay-Daten werden geladen...</span>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
if (status === 'error') {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-12 text-center">
|
||||
<div className="text-5xl mb-4">⚠️</div>
|
||||
<h3 className="text-lg font-medium text-red-600 dark:text-red-400 mb-2">Fehler</h3>
|
||||
<p className="text-sm text-gray-500 dark:text-gray-400 max-w-lg mb-4">{error}</p>
|
||||
<div className="flex gap-3">
|
||||
<button onClick={() => { setError(''); loadSessionData() }}
|
||||
className="px-5 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors text-sm">
|
||||
Erneut versuchen
|
||||
</button>
|
||||
<button onClick={onNext}
|
||||
className="px-5 py-2 bg-gray-200 dark:bg-gray-700 text-gray-700 dark:text-gray-300 rounded-lg hover:bg-gray-300 dark:hover:bg-gray-600 transition-colors text-sm">
|
||||
Ueberspringen →
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
if (status === 'saved') {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-12 text-center">
|
||||
<div className="text-5xl mb-4">✅</div>
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">Overlay gespeichert</h3>
|
||||
<p className="text-sm text-gray-500 dark:text-gray-400 mb-6">
|
||||
{changedCount > 0 ? `${changedCount} Zellen wurden aktualisiert.` : 'Keine Aenderungen vorgenommen.'}
|
||||
</p>
|
||||
<button onClick={onNext}
|
||||
className="px-6 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors font-medium">
|
||||
Fertig
|
||||
</button>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const imgW = imageNaturalSize?.w || 1
|
||||
const imgH = imageNaturalSize?.h || 1
|
||||
const containerH = reconWidth * (imgH / imgW)
|
||||
|
||||
return (
|
||||
<div className="space-y-3">
|
||||
{/* Toolbar */}
|
||||
<div className="flex items-center justify-between bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 px-3 py-2">
|
||||
<div className="flex items-center gap-2">
|
||||
<h3 className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||
Overlay-Rekonstruktion
|
||||
</h3>
|
||||
<span className="text-xs text-gray-400">
|
||||
{cells.length} Zellen · {changedCount} geaendert
|
||||
</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
{/* Undo/Redo */}
|
||||
<button
|
||||
onClick={undo}
|
||||
disabled={undoStack.length === 0}
|
||||
className="px-2 py-1 text-xs border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700 disabled:opacity-30"
|
||||
title="Rueckgaengig (Ctrl+Z)"
|
||||
>
|
||||
↩
|
||||
</button>
|
||||
<button
|
||||
onClick={redo}
|
||||
disabled={redoStack.length === 0}
|
||||
className="px-2 py-1 text-xs border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700 disabled:opacity-30"
|
||||
title="Wiederholen (Ctrl+Shift+Z)"
|
||||
>
|
||||
↪
|
||||
</button>
|
||||
|
||||
<div className="w-px h-5 bg-gray-300 dark:bg-gray-600 mx-1" />
|
||||
|
||||
{/* Font scale */}
|
||||
<label className="flex items-center gap-1 text-xs text-gray-600 dark:text-gray-400">
|
||||
Schrift
|
||||
<input
|
||||
type="range" min={30} max={120} value={Math.round(fontScale * 100)}
|
||||
onChange={e => setFontScale(Number(e.target.value) / 100)}
|
||||
className="w-20 h-1 accent-teal-600"
|
||||
/>
|
||||
<span className="w-8 text-right font-mono">{Math.round(fontScale * 100)}%</span>
|
||||
</label>
|
||||
<button
|
||||
onClick={() => setGlobalBold(b => !b)}
|
||||
className={`px-2 py-1 text-xs rounded border transition-colors font-bold ${
|
||||
globalBold
|
||||
? 'bg-teal-600 text-white border-teal-600'
|
||||
: 'bg-white dark:bg-gray-700 text-gray-600 dark:text-gray-400 border-gray-300 dark:border-gray-600'
|
||||
}`}
|
||||
>
|
||||
B
|
||||
</button>
|
||||
<button
|
||||
onClick={() => setImageRotation(r => r === 0 ? 180 : 0)}
|
||||
className={`px-2 py-1 text-xs rounded border transition-colors ${
|
||||
imageRotation === 180
|
||||
? 'bg-teal-600 text-white border-teal-600'
|
||||
: 'bg-white dark:bg-gray-700 text-gray-600 dark:text-gray-400 border-gray-300 dark:border-gray-600'
|
||||
}`}
|
||||
title="Bild 180° drehen"
|
||||
>
|
||||
180°
|
||||
</button>
|
||||
|
||||
<div className="w-px h-5 bg-gray-300 dark:bg-gray-600 mx-1" />
|
||||
|
||||
{/* Positioning mode toggle */}
|
||||
<button
|
||||
onClick={() => setPositioningMode(m => m === 'slide' ? 'cluster' : 'slide')}
|
||||
className={`px-2 py-1 text-xs rounded border transition-colors ${
|
||||
positioningMode === 'slide'
|
||||
? 'bg-orange-500 text-white border-orange-500'
|
||||
: 'bg-white dark:bg-gray-700 text-gray-600 dark:text-gray-400 border-gray-300 dark:border-gray-600'
|
||||
}`}
|
||||
title={positioningMode === 'slide'
|
||||
? 'Slide-Modus: Woerter von links nach rechts schieben (klick fuer Cluster-Modus)'
|
||||
: 'Cluster-Modus: Woerter an Pixel-Cluster zuordnen (klick fuer Slide-Modus)'}
|
||||
>
|
||||
{positioningMode === 'slide' ? 'Slide' : 'Cluster'}
|
||||
</button>
|
||||
|
||||
<div className="w-px h-5 bg-gray-300 dark:bg-gray-600 mx-1" />
|
||||
|
||||
{/* Text color */}
|
||||
{(['red', 'blue', 'black'] as const).map(c => (
|
||||
<button
|
||||
key={c}
|
||||
onClick={() => setTextColor(c)}
|
||||
className={`w-5 h-5 rounded-full border-2 transition-colors ${
|
||||
textColor === c ? 'border-teal-500 ring-1 ring-teal-300' : 'border-gray-300 dark:border-gray-600'
|
||||
}`}
|
||||
style={{ backgroundColor: c === 'black' ? '#1a1a1a' : c }}
|
||||
title={`Textfarbe: ${c}`}
|
||||
/>
|
||||
))}
|
||||
|
||||
<div className="w-px h-5 bg-gray-300 dark:bg-gray-600 mx-1" />
|
||||
|
||||
{/* Text opacity */}
|
||||
<label className="flex items-center gap-1 text-xs text-gray-600 dark:text-gray-400">
|
||||
Text
|
||||
<input
|
||||
type="range" min={0} max={100} value={textOpacity}
|
||||
onChange={e => setTextOpacity(Number(e.target.value))}
|
||||
className="w-16 h-1 accent-teal-600"
|
||||
/>
|
||||
<span className="w-8 text-right font-mono">{textOpacity}%</span>
|
||||
</label>
|
||||
|
||||
<div className="w-px h-5 bg-gray-300 dark:bg-gray-600 mx-1" />
|
||||
|
||||
<button
|
||||
onClick={saveReconstruction}
|
||||
disabled={status === 'saving'}
|
||||
className="px-4 py-1.5 text-xs bg-teal-600 text-white rounded-lg hover:bg-teal-700 disabled:opacity-50 transition-colors font-medium"
|
||||
>
|
||||
Speichern
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* True overlay: text layer on top of original image */}
|
||||
<div className="border border-gray-200 dark:border-gray-700 rounded-lg overflow-hidden bg-gray-50 dark:bg-gray-900">
|
||||
<div
|
||||
ref={reconRef}
|
||||
className="relative"
|
||||
style={{ aspectRatio: `${imgW} / ${imgH}` }}
|
||||
>
|
||||
{/* Background: original image */}
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
<img
|
||||
src={dewarpedUrl}
|
||||
alt="Original"
|
||||
className="absolute inset-0 w-full h-full object-contain"
|
||||
onLoad={(e) => {
|
||||
const img = e.target as HTMLImageElement
|
||||
setImageNaturalSize({ w: img.naturalWidth, h: img.naturalHeight })
|
||||
}}
|
||||
/>
|
||||
|
||||
{/* Text overlay layer */}
|
||||
<div
|
||||
className="absolute inset-0"
|
||||
style={{ opacity: textOpacity / 100 }}
|
||||
>
|
||||
{/* Row lines */}
|
||||
{rows.map((row, i) => (
|
||||
<div
|
||||
key={`row-${i}`}
|
||||
className="absolute left-0 right-0 border-t border-cyan-400/40"
|
||||
style={{ top: `${(row.y / imgH) * 100}%` }}
|
||||
/>
|
||||
))}
|
||||
|
||||
{/* Pixel-positioned words / editable inputs */}
|
||||
{cells.map((cell) => {
|
||||
const displayText = getDisplayText(cell)
|
||||
const edited = isEdited(cell)
|
||||
const wordPos = cellWordPositions.get(cell.cellId)
|
||||
const bboxPct = cell.bboxPct
|
||||
const colorValue = textColor === 'black' ? '#1a1a1a' : textColor
|
||||
|
||||
// Pixel-analysed: render word-groups at detected positions
|
||||
if (wordPos && wordPos.length > 0) {
|
||||
return wordPos.map((wp, i) => {
|
||||
const autoFontPx = medianCellHeightPx * wp.fontRatio * fontScale
|
||||
const fs = Math.max(6, autoFontPx)
|
||||
|
||||
if (wordPos.length > 1) {
|
||||
return (
|
||||
<span
|
||||
key={`${cell.cellId}_wp_${i}`}
|
||||
className="absolute leading-none pointer-events-none select-none"
|
||||
style={{
|
||||
left: `${wp.xPct}%`,
|
||||
top: `${wp.yPct}%`,
|
||||
width: `${wp.wPct}%`,
|
||||
height: `${wp.hPct}%`,
|
||||
fontSize: `${fs}px`,
|
||||
fontWeight: globalBold ? 'bold' : 'normal',
|
||||
fontFamily: "'Liberation Sans', Arial, sans-serif",
|
||||
display: 'flex',
|
||||
alignItems: 'center',
|
||||
whiteSpace: 'nowrap',
|
||||
overflow: 'visible',
|
||||
color: colorValue,
|
||||
}}
|
||||
>
|
||||
{wp.text}
|
||||
</span>
|
||||
)
|
||||
}
|
||||
|
||||
return (
|
||||
<div key={`${cell.cellId}_wp_${i}`} className="absolute group" style={{
|
||||
left: `${wp.xPct}%`,
|
||||
top: `${wp.yPct}%`,
|
||||
width: `${wp.wPct}%`,
|
||||
height: `${wp.hPct}%`,
|
||||
}}>
|
||||
<input
|
||||
id={`cell-${cell.cellId}`}
|
||||
type="text"
|
||||
value={displayText}
|
||||
onChange={(e) => handleTextChange(cell.cellId, e.target.value)}
|
||||
onKeyDown={(e) => handleKeyDown(e, cell.cellId)}
|
||||
className={`w-full h-full bg-transparent border-0 outline-none px-0 transition-colors ${
|
||||
edited ? 'bg-green-50/30' : ''
|
||||
}`}
|
||||
style={{
|
||||
fontSize: `${fs}px`,
|
||||
fontWeight: globalBold ? 'bold' : 'normal',
|
||||
fontFamily: "'Liberation Sans', Arial, sans-serif",
|
||||
lineHeight: '1',
|
||||
color: colorValue,
|
||||
}}
|
||||
title={`${cell.cellId} (${cell.colType})`}
|
||||
/>
|
||||
{edited && (
|
||||
<button
|
||||
onClick={() => resetCell(cell.cellId)}
|
||||
className="absolute -top-1 -right-1 w-4 h-4 bg-red-500 text-white rounded-full text-[9px] leading-none opacity-0 group-hover:opacity-100 transition-opacity flex items-center justify-center"
|
||||
title="Zuruecksetzen"
|
||||
>
|
||||
×
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
// Fallback: no pixel data — single input at cell bbox
|
||||
if (!cell.text) return null
|
||||
|
||||
const fontSize = Math.max(6, medianCellHeightPx * fontScale)
|
||||
return (
|
||||
<div key={cell.cellId} className="absolute group" style={{
|
||||
left: `${bboxPct.x}%`,
|
||||
top: `${bboxPct.y}%`,
|
||||
width: `${bboxPct.w}%`,
|
||||
height: `${bboxPct.h}%`,
|
||||
}}>
|
||||
<input
|
||||
id={`cell-${cell.cellId}`}
|
||||
type="text"
|
||||
value={displayText}
|
||||
onChange={(e) => handleTextChange(cell.cellId, e.target.value)}
|
||||
onKeyDown={(e) => handleKeyDown(e, cell.cellId)}
|
||||
className={`w-full h-full bg-transparent border-0 outline-none px-0 transition-colors ${
|
||||
edited ? 'bg-green-50/30' : ''
|
||||
}`}
|
||||
style={{
|
||||
fontSize: `${fontSize}px`,
|
||||
fontWeight: globalBold ? 'bold' : 'normal',
|
||||
fontFamily: "'Liberation Sans', Arial, sans-serif",
|
||||
lineHeight: '1',
|
||||
color: colorValue,
|
||||
}}
|
||||
title={`${cell.cellId} (${cell.colType})`}
|
||||
/>
|
||||
{edited && (
|
||||
<button
|
||||
onClick={() => resetCell(cell.cellId)}
|
||||
className="absolute -top-1 -right-1 w-4 h-4 bg-red-500 text-white rounded-full text-[9px] leading-none opacity-0 group-hover:opacity-100 transition-opacity flex items-center justify-center"
|
||||
title="Zuruecksetzen"
|
||||
>
|
||||
×
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Bottom action */}
|
||||
<div className="flex justify-end">
|
||||
<button
|
||||
onClick={() => {
|
||||
if (changedCount > 0) {
|
||||
saveReconstruction()
|
||||
} else {
|
||||
onNext()
|
||||
}
|
||||
}}
|
||||
className="px-6 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors font-medium text-sm"
|
||||
>
|
||||
{changedCount > 0 ? 'Speichern & Fertig' : 'Fertig'}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
153
admin-lehrer/components/ocr-overlay/PaddleDirectStep.tsx
Normal file
153
admin-lehrer/components/ocr-overlay/PaddleDirectStep.tsx
Normal file
@@ -0,0 +1,153 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useState } from 'react'
|
||||
import { OverlayReconstruction } from './OverlayReconstruction'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
type Phase = 'idle' | 'running' | 'overlay'
|
||||
|
||||
interface PaddleDirectStepProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
/** Backend endpoint suffix, default: 'paddle-direct' */
|
||||
endpoint?: string
|
||||
/** Title shown in idle state */
|
||||
title?: string
|
||||
/** Description shown in idle state */
|
||||
description?: string
|
||||
/** Icon shown in idle state */
|
||||
icon?: string
|
||||
/** Button label */
|
||||
buttonLabel?: string
|
||||
/** Running label */
|
||||
runningLabel?: string
|
||||
/** OCR engine key to check for auto-detect */
|
||||
engineKey?: string
|
||||
}
|
||||
|
||||
export function PaddleDirectStep({
|
||||
sessionId,
|
||||
onNext,
|
||||
endpoint = 'paddle-direct',
|
||||
title = 'PP-OCRv5 Direct',
|
||||
description = 'PP-OCRv5 (lokal via RapidOCR) erkennt alle Woerter direkt auf dem Originalbild — ohne Begradigung, Entzerrung oder Zuschnitt.',
|
||||
icon = '⚡',
|
||||
buttonLabel = 'PP-OCRv5 starten',
|
||||
runningLabel = 'PP-OCRv5 laeuft...',
|
||||
engineKey = 'paddle_direct',
|
||||
}: PaddleDirectStepProps) {
|
||||
const [phase, setPhase] = useState<Phase>('idle')
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [stats, setStats] = useState<{ cells: number; rows: number; duration: number } | null>(null)
|
||||
|
||||
// Auto-detect: if session already has matching word_result → show overlay
|
||||
useEffect(() => {
|
||||
if (!sessionId) return
|
||||
let cancelled = false
|
||||
;(async () => {
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
|
||||
if (!res.ok || cancelled) return
|
||||
const data = await res.json()
|
||||
if (data.word_result?.ocr_engine === engineKey) {
|
||||
setPhase('overlay')
|
||||
}
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
})()
|
||||
return () => { cancelled = true }
|
||||
}, [sessionId, engineKey])
|
||||
|
||||
const runOcr = useCallback(async () => {
|
||||
if (!sessionId) return
|
||||
setPhase('running')
|
||||
setError(null)
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/${endpoint}`, {
|
||||
method: 'POST',
|
||||
})
|
||||
if (!res.ok) {
|
||||
const data = await res.json().catch(() => ({}))
|
||||
throw new Error(data.detail || `HTTP ${res.status}`)
|
||||
}
|
||||
const data = await res.json()
|
||||
setStats({
|
||||
cells: data.summary?.total_cells || 0,
|
||||
rows: data.grid_shape?.rows || 0,
|
||||
duration: data.duration_seconds || 0,
|
||||
})
|
||||
setPhase('overlay')
|
||||
} catch (e: unknown) {
|
||||
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||
setPhase('idle')
|
||||
}
|
||||
}, [sessionId, endpoint])
|
||||
|
||||
if (!sessionId) {
|
||||
return (
|
||||
<div className="text-sm text-gray-400 py-8 text-center">
|
||||
Bitte zuerst ein Bild hochladen.
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
if (phase === 'overlay') {
|
||||
return (
|
||||
<div className="space-y-3">
|
||||
{stats && (
|
||||
<div className="flex items-center gap-4 text-xs text-gray-500 dark:text-gray-400">
|
||||
<span>{stats.cells} Woerter erkannt</span>
|
||||
<span>{stats.rows} Zeilen</span>
|
||||
<span>{stats.duration.toFixed(1)}s</span>
|
||||
</div>
|
||||
)}
|
||||
<OverlayReconstruction sessionId={sessionId} onNext={onNext} />
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-16 space-y-6">
|
||||
{phase === 'running' ? (
|
||||
<>
|
||||
<div className="w-10 h-10 border-4 border-teal-200 dark:border-teal-800 border-t-teal-600 dark:border-t-teal-400 rounded-full animate-spin" />
|
||||
<div className="text-center space-y-1">
|
||||
<p className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||
{runningLabel}
|
||||
</p>
|
||||
<p className="text-xs text-gray-400">
|
||||
Bild wird analysiert (ca. 5-30s)
|
||||
</p>
|
||||
</div>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<div className="text-center space-y-2">
|
||||
<div className="text-4xl">{icon}</div>
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300">
|
||||
{title}
|
||||
</h3>
|
||||
<p className="text-sm text-gray-500 dark:text-gray-400 max-w-md">
|
||||
{description}
|
||||
</p>
|
||||
</div>
|
||||
|
||||
{error && (
|
||||
<div className="text-sm text-red-500 bg-red-50 dark:bg-red-900/20 px-4 py-2 rounded-lg">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
|
||||
<button
|
||||
onClick={runOcr}
|
||||
className="px-6 py-2.5 bg-teal-600 text-white text-sm font-medium rounded-lg hover:bg-teal-700 transition-colors"
|
||||
>
|
||||
{buttonLabel}
|
||||
</button>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
253
admin-lehrer/components/ocr-overlay/usePixelWordPositions.ts
Normal file
253
admin-lehrer/components/ocr-overlay/usePixelWordPositions.ts
Normal file
@@ -0,0 +1,253 @@
|
||||
import { useEffect, useState } from 'react'
|
||||
import type { GridCell } from '@/app/(admin)/ai/ocr-overlay/types'
|
||||
|
||||
export interface WordPosition {
|
||||
xPct: number
|
||||
wPct: number
|
||||
yPct: number
|
||||
hPct: number
|
||||
text: string
|
||||
fontRatio: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Analyse dark-pixel clusters on an image to determine
|
||||
* the exact horizontal position & auto-font-size of word groups in each cell.
|
||||
*
|
||||
* When rotation=180, the image is rotated 180° before pixel analysis.
|
||||
* Cell coordinates are transformed to the rotated space for reading,
|
||||
* and cluster positions are mirrored back to the original coordinate system.
|
||||
*
|
||||
* Returns a Map<cell_id, WordPosition[]>.
|
||||
*/
|
||||
export function usePixelWordPositions(
|
||||
imageUrl: string,
|
||||
cells: GridCell[],
|
||||
active: boolean,
|
||||
rotation: 0 | 180 = 0,
|
||||
): Map<string, WordPosition[]> {
|
||||
const [cellWordPositions, setCellWordPositions] = useState<Map<string, WordPosition[]>>(new Map())
|
||||
|
||||
useEffect(() => {
|
||||
if (!active || cells.length === 0 || !imageUrl) return
|
||||
|
||||
const img = new Image()
|
||||
img.crossOrigin = 'anonymous'
|
||||
img.onload = () => {
|
||||
const imgW = img.naturalWidth
|
||||
const imgH = img.naturalHeight
|
||||
|
||||
const canvas = document.createElement('canvas')
|
||||
canvas.width = imgW
|
||||
canvas.height = imgH
|
||||
const ctx = canvas.getContext('2d')
|
||||
if (!ctx) return
|
||||
|
||||
if (rotation === 180) {
|
||||
ctx.translate(imgW, imgH)
|
||||
ctx.rotate(Math.PI)
|
||||
ctx.drawImage(img, 0, 0)
|
||||
ctx.setTransform(1, 0, 0, 1, 0, 0)
|
||||
} else {
|
||||
ctx.drawImage(img, 0, 0)
|
||||
}
|
||||
|
||||
const refFontSize = 40
|
||||
const fontFam = "'Liberation Sans', Arial, sans-serif"
|
||||
ctx.font = `${refFontSize}px ${fontFam}`
|
||||
|
||||
const positions = new Map<string, WordPosition[]>()
|
||||
|
||||
for (const cell of cells) {
|
||||
if (!cell.bbox_pct || !cell.text) continue
|
||||
|
||||
const rawGroups = cell.text.split(/\s{3,}/).map(s => s.trim()).filter(Boolean)
|
||||
|
||||
// Merge single-char symbol groups (OCR artifacts from box borders like "|", ">")
|
||||
// with their neighbour to avoid polluting the cluster-to-group matching
|
||||
const groups: string[] = []
|
||||
for (let gi = 0; gi < rawGroups.length; gi++) {
|
||||
const g = rawGroups[gi]
|
||||
const isArtifact = g.length <= 2 && !/[a-zA-Z0-9\u00C0-\u024F]/.test(g)
|
||||
if (isArtifact) {
|
||||
if (gi + 1 < rawGroups.length) {
|
||||
// merge with next group
|
||||
rawGroups[gi + 1] = g + ' ' + rawGroups[gi + 1]
|
||||
} else if (groups.length > 0) {
|
||||
// last group — merge with previous
|
||||
groups[groups.length - 1] += ' ' + g
|
||||
} else {
|
||||
groups.push(g)
|
||||
}
|
||||
} else {
|
||||
groups.push(g)
|
||||
}
|
||||
}
|
||||
|
||||
let cx: number, cy: number
|
||||
const cw = Math.round(cell.bbox_pct.w / 100 * imgW)
|
||||
const ch = Math.round(cell.bbox_pct.h / 100 * imgH)
|
||||
|
||||
if (rotation === 180) {
|
||||
cx = Math.round((100 - cell.bbox_pct.x - cell.bbox_pct.w) / 100 * imgW)
|
||||
cy = Math.round((100 - cell.bbox_pct.y - cell.bbox_pct.h) / 100 * imgH)
|
||||
} else {
|
||||
cx = Math.round(cell.bbox_pct.x / 100 * imgW)
|
||||
cy = Math.round(cell.bbox_pct.y / 100 * imgH)
|
||||
}
|
||||
if (cw <= 0 || ch <= 0) continue
|
||||
if (cx < 0) cx = 0
|
||||
if (cy < 0) cy = 0
|
||||
if (cx + cw > imgW || cy + ch > imgH) continue
|
||||
|
||||
const imageData = ctx.getImageData(cx, cy, cw, ch)
|
||||
|
||||
const proj = new Float32Array(cw)
|
||||
for (let y = 0; y < ch; y++) {
|
||||
for (let x = 0; x < cw; x++) {
|
||||
const idx = (y * cw + x) * 4
|
||||
const lum = 0.299 * imageData.data[idx] + 0.587 * imageData.data[idx + 1] + 0.114 * imageData.data[idx + 2]
|
||||
if (lum < 128) proj[x]++
|
||||
}
|
||||
}
|
||||
|
||||
const threshold = Math.max(1, ch * 0.03)
|
||||
const minGap = Math.max(5, Math.round(cw * 0.02))
|
||||
let clusters: { start: number; end: number }[] = []
|
||||
let inCluster = false
|
||||
let clStart = 0
|
||||
let gap = 0
|
||||
|
||||
for (let x = 0; x < cw; x++) {
|
||||
if (proj[x] >= threshold) {
|
||||
if (!inCluster) { clStart = x; inCluster = true }
|
||||
gap = 0
|
||||
} else if (inCluster) {
|
||||
gap++
|
||||
if (gap > minGap) {
|
||||
clusters.push({ start: clStart, end: x - gap })
|
||||
inCluster = false
|
||||
gap = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
if (inCluster) clusters.push({ start: clStart, end: cw - 1 - gap })
|
||||
|
||||
if (clusters.length === 0) continue
|
||||
|
||||
// Filter out very narrow clusters (likely box borders / vertical lines)
|
||||
const minClusterW = Math.max(3, Math.round(cw * 0.005))
|
||||
clusters = clusters.filter(c => (c.end - c.start + 1) > minClusterW)
|
||||
if (clusters.length === 0) continue
|
||||
|
||||
if (rotation === 180) {
|
||||
clusters = clusters.map(c => ({
|
||||
start: cw - 1 - c.end,
|
||||
end: cw - 1 - c.start,
|
||||
})).reverse()
|
||||
}
|
||||
|
||||
const wordPos: WordPosition[] = []
|
||||
|
||||
// Match groups to clusters using width-proportional assignment.
|
||||
// Each group is assigned to the cluster whose width best matches
|
||||
// the group's expected pixel width (text measurement).
|
||||
if (groups.length > 1 && clusters.length >= groups.length) {
|
||||
// Measure each group's expected width
|
||||
const groupWidths = groups.map(g => ctx.measureText(g).width)
|
||||
|
||||
// Greedy assignment: for each group (in order), find the best
|
||||
// unassigned cluster by width ratio consistency
|
||||
const totalMeasured = groupWidths.reduce((a, b) => a + b, 0)
|
||||
const totalClusterW = clusters.reduce((a, c) => a + (c.end - c.start + 1), 0)
|
||||
const refScale = totalClusterW / totalMeasured
|
||||
const used = new Set<number>()
|
||||
|
||||
const assignments: number[] = []
|
||||
for (let gi = 0; gi < groups.length; gi++) {
|
||||
const expectedW = groupWidths[gi] * refScale
|
||||
let bestIdx = -1
|
||||
let bestDiff = Infinity
|
||||
for (let ci = 0; ci < clusters.length; ci++) {
|
||||
if (used.has(ci)) continue
|
||||
const clW = clusters[ci].end - clusters[ci].start + 1
|
||||
const diff = Math.abs(clW - expectedW)
|
||||
if (diff < bestDiff) {
|
||||
bestDiff = diff
|
||||
bestIdx = ci
|
||||
}
|
||||
}
|
||||
used.add(bestIdx)
|
||||
assignments.push(bestIdx)
|
||||
}
|
||||
|
||||
// Sort assignments to maintain left-to-right order
|
||||
const sortedPairs = assignments
|
||||
.map((ci, gi) => ({ ci, gi }))
|
||||
.sort((a, b) => clusters[a.ci].start - clusters[b.ci].start)
|
||||
|
||||
for (const { ci, gi } of sortedPairs) {
|
||||
const cl = clusters[ci]
|
||||
const clusterW = cl.end - cl.start + 1
|
||||
const autoFontPx = refFontSize * (clusterW / groupWidths[gi])
|
||||
const fontRatio = Math.min(autoFontPx / ch, 1.0)
|
||||
wordPos.push({
|
||||
xPct: cell.bbox_pct.x + (cl.start / cw) * cell.bbox_pct.w,
|
||||
wPct: ((cl.end - cl.start + 1) / cw) * cell.bbox_pct.w,
|
||||
yPct: cell.bbox_pct.y,
|
||||
hPct: cell.bbox_pct.h,
|
||||
text: groups[gi],
|
||||
fontRatio,
|
||||
})
|
||||
}
|
||||
} else {
|
||||
// Single group OR not enough clusters:
|
||||
// use the WIDEST cluster (not first-to-last span which pulls in
|
||||
// stray pixels from adjacent page areas like box borders)
|
||||
const widest = clusters.reduce((best, c) =>
|
||||
(c.end - c.start) > (best.end - best.start) ? c : best, clusters[0])
|
||||
const clusterW = widest.end - widest.start + 1
|
||||
const measured = ctx.measureText(cell.text.trim())
|
||||
const autoFontPx = refFontSize * (clusterW / measured.width)
|
||||
const fontRatio = Math.min(autoFontPx / ch, 1.0)
|
||||
wordPos.push({
|
||||
xPct: cell.bbox_pct.x + (widest.start / cw) * cell.bbox_pct.w,
|
||||
wPct: ((widest.end - widest.start + 1) / cw) * cell.bbox_pct.w,
|
||||
yPct: cell.bbox_pct.y,
|
||||
hPct: cell.bbox_pct.h,
|
||||
text: cell.text.trim(),
|
||||
fontRatio,
|
||||
})
|
||||
}
|
||||
|
||||
positions.set(cell.cell_id, wordPos)
|
||||
}
|
||||
|
||||
// Normalise: find the most common fontRatio (mode) and apply it to all
|
||||
const allRatios: number[] = []
|
||||
for (const wps of positions.values()) {
|
||||
for (const wp of wps) allRatios.push(wp.fontRatio)
|
||||
}
|
||||
if (allRatios.length > 0) {
|
||||
const buckets = new Map<number, number>()
|
||||
for (const r of allRatios) {
|
||||
const key = Math.round(r * 50) / 50
|
||||
buckets.set(key, (buckets.get(key) || 0) + 1)
|
||||
}
|
||||
let modeRatio = allRatios[0]
|
||||
let modeCount = 0
|
||||
for (const [ratio, count] of buckets) {
|
||||
if (count > modeCount) { modeRatio = ratio; modeCount = count }
|
||||
}
|
||||
for (const wps of positions.values()) {
|
||||
for (const wp of wps) wp.fontRatio = modeRatio
|
||||
}
|
||||
}
|
||||
|
||||
setCellWordPositions(positions)
|
||||
}
|
||||
img.src = imageUrl
|
||||
}, [active, cells, imageUrl, rotation])
|
||||
|
||||
return cellWordPositions
|
||||
}
|
||||
231
admin-lehrer/components/ocr-overlay/useSlideWordPositions.ts
Normal file
231
admin-lehrer/components/ocr-overlay/useSlideWordPositions.ts
Normal file
@@ -0,0 +1,231 @@
|
||||
import { useEffect, useState } from 'react'
|
||||
import type { GridCell } from '@/app/(admin)/ai/ocr-overlay/types'
|
||||
|
||||
export interface WordPosition {
|
||||
xPct: number
|
||||
wPct: number
|
||||
yPct: number
|
||||
hPct: number
|
||||
text: string
|
||||
fontRatio: number
|
||||
}
|
||||
|
||||
/**
|
||||
* "Slide from left" positioning using OCR word bounding boxes.
|
||||
*
|
||||
* TEXT comes from cell.text (cleaned, IPA-corrected).
|
||||
* POSITIONS come from word_boxes (exact OCR coordinates).
|
||||
*
|
||||
* Tokens from cell.text are matched 1:1 (in order) to word_boxes
|
||||
* sorted left-to-right. This guarantees:
|
||||
* - ALL words from cell.text appear (no dropping)
|
||||
* - Words preserve their reading order
|
||||
* - Each word lands on its correct black-text position
|
||||
* - No red words overlap each other
|
||||
*
|
||||
* If token count != box count, extra tokens get estimated positions
|
||||
* (spread across remaining space).
|
||||
*
|
||||
* Fallback: pixel-projection slide if no word_boxes available.
|
||||
*/
|
||||
export function useSlideWordPositions(
|
||||
imageUrl: string,
|
||||
cells: GridCell[],
|
||||
active: boolean,
|
||||
rotation: 0 | 180 = 0,
|
||||
): Map<string, WordPosition[]> {
|
||||
const [result, setResult] = useState<Map<string, WordPosition[]>>(new Map())
|
||||
|
||||
useEffect(() => {
|
||||
if (!active || cells.length === 0 || !imageUrl) return
|
||||
|
||||
const img = new Image()
|
||||
img.crossOrigin = 'anonymous'
|
||||
img.onload = () => {
|
||||
const imgW = img.naturalWidth
|
||||
const imgH = img.naturalHeight
|
||||
|
||||
const hasWordBoxes = cells.some(c => c.word_boxes && c.word_boxes.length > 0)
|
||||
|
||||
if (hasWordBoxes) {
|
||||
// --- WORD-BOX PATH: use OCR positions directly ---
|
||||
// Each word_box already has exact coordinates from OCR.
|
||||
// Use them as-is — no fuzzy matching needed.
|
||||
const positions = new Map<string, WordPosition[]>()
|
||||
|
||||
for (const cell of cells) {
|
||||
if (!cell.bbox_pct || !cell.text) continue
|
||||
|
||||
const boxes = (cell.word_boxes || [])
|
||||
.filter(wb => wb.text.trim())
|
||||
.sort((a, b) => a.left - b.left)
|
||||
|
||||
if (boxes.length === 0) {
|
||||
// No word_boxes — spread tokens evenly across cell
|
||||
const tokens = cell.text.split(/\s+/).filter(Boolean)
|
||||
if (tokens.length === 0) continue
|
||||
const fallbackW = cell.bbox_pct.w / tokens.length
|
||||
const wordPos = tokens.map((t, i) => ({
|
||||
xPct: cell.bbox_pct.x + i * fallbackW,
|
||||
wPct: fallbackW,
|
||||
yPct: cell.bbox_pct.y,
|
||||
hPct: cell.bbox_pct.h,
|
||||
text: t,
|
||||
fontRatio: 1.0,
|
||||
}))
|
||||
positions.set(cell.cell_id, wordPos)
|
||||
continue
|
||||
}
|
||||
|
||||
// Use each word_box directly with its OCR coordinates
|
||||
const wordPos: WordPosition[] = boxes.map(box => ({
|
||||
xPct: (box.left / imgW) * 100,
|
||||
wPct: (box.width / imgW) * 100,
|
||||
yPct: (box.top / imgH) * 100,
|
||||
hPct: (box.height / imgH) * 100,
|
||||
text: box.text,
|
||||
fontRatio: 1.0,
|
||||
}))
|
||||
|
||||
if (wordPos.length > 0) {
|
||||
positions.set(cell.cell_id, wordPos)
|
||||
}
|
||||
}
|
||||
|
||||
setResult(positions)
|
||||
return
|
||||
}
|
||||
|
||||
// --- FALLBACK: pixel-projection slide (no word_boxes) ---
|
||||
const canvas = document.createElement('canvas')
|
||||
canvas.width = imgW
|
||||
canvas.height = imgH
|
||||
const ctx = canvas.getContext('2d')
|
||||
if (!ctx) return
|
||||
|
||||
if (rotation === 180) {
|
||||
ctx.translate(imgW, imgH)
|
||||
ctx.rotate(Math.PI)
|
||||
ctx.drawImage(img, 0, 0)
|
||||
ctx.setTransform(1, 0, 0, 1, 0, 0)
|
||||
} else {
|
||||
ctx.drawImage(img, 0, 0)
|
||||
}
|
||||
|
||||
const refFontSize = 40
|
||||
const fontFam = "'Liberation Sans', Arial, sans-serif"
|
||||
ctx.font = `${refFontSize}px ${fontFam}`
|
||||
|
||||
const cellHeights = cells
|
||||
.filter(c => c.bbox_pct && c.bbox_pct.h > 0)
|
||||
.map(c => Math.round(c.bbox_pct.h / 100 * imgH))
|
||||
.sort((a, b) => a - b)
|
||||
const medianCh = cellHeights.length > 0
|
||||
? cellHeights[Math.floor(cellHeights.length / 2)]
|
||||
: 30
|
||||
|
||||
const renderedFontImgPx = medianCh * 0.7
|
||||
const measureScale = renderedFontImgPx / refFontSize
|
||||
const spaceWidthPx = Math.max(2, Math.round(ctx.measureText(' ').width * measureScale))
|
||||
|
||||
const positions = new Map<string, WordPosition[]>()
|
||||
|
||||
for (const cell of cells) {
|
||||
if (!cell.bbox_pct || !cell.text) continue
|
||||
|
||||
let cx: number, cy: number
|
||||
const cw = Math.round(cell.bbox_pct.w / 100 * imgW)
|
||||
const ch = Math.round(cell.bbox_pct.h / 100 * imgH)
|
||||
|
||||
if (rotation === 180) {
|
||||
cx = Math.round((100 - cell.bbox_pct.x - cell.bbox_pct.w) / 100 * imgW)
|
||||
cy = Math.round((100 - cell.bbox_pct.y - cell.bbox_pct.h) / 100 * imgH)
|
||||
} else {
|
||||
cx = Math.round(cell.bbox_pct.x / 100 * imgW)
|
||||
cy = Math.round(cell.bbox_pct.y / 100 * imgH)
|
||||
}
|
||||
if (cw <= 0 || ch <= 0) continue
|
||||
if (cx < 0) cx = 0
|
||||
if (cy < 0) cy = 0
|
||||
if (cx + cw > imgW || cy + ch > imgH) continue
|
||||
|
||||
const imageData = ctx.getImageData(cx, cy, cw, ch)
|
||||
const proj = new Float32Array(cw)
|
||||
for (let y = 0; y < ch; y++) {
|
||||
for (let x = 0; x < cw; x++) {
|
||||
const idx = (y * cw + x) * 4
|
||||
const lum = 0.299 * imageData.data[idx] + 0.587 * imageData.data[idx + 1] + 0.114 * imageData.data[idx + 2]
|
||||
if (lum < 128) proj[x]++
|
||||
}
|
||||
}
|
||||
|
||||
const threshold = Math.max(1, ch * 0.03)
|
||||
const ink = new Uint8Array(cw)
|
||||
for (let x = 0; x < cw; x++) {
|
||||
ink[x] = proj[x] >= threshold ? 1 : 0
|
||||
}
|
||||
if (rotation === 180) {
|
||||
ink.reverse()
|
||||
}
|
||||
|
||||
const tokens = cell.text.split(/\s+/).filter(Boolean)
|
||||
if (tokens.length === 0) continue
|
||||
|
||||
const tokenWidthsPx = tokens.map(t =>
|
||||
Math.max(4, Math.round(ctx.measureText(t).width * measureScale))
|
||||
)
|
||||
|
||||
const wordPos: WordPosition[] = []
|
||||
let cursor = 0
|
||||
|
||||
for (let ti = 0; ti < tokens.length; ti++) {
|
||||
const tokenW = tokenWidthsPx[ti]
|
||||
const coverageNeeded = Math.max(1, Math.round(tokenW * 0.15))
|
||||
let bestX = cursor
|
||||
|
||||
const searchLimit = Math.max(cursor, cw - tokenW)
|
||||
|
||||
for (let x = cursor; x <= searchLimit; x++) {
|
||||
let inkCount = 0
|
||||
const spanEnd = Math.min(x + tokenW, cw)
|
||||
for (let dx = 0; dx < spanEnd - x; dx++) {
|
||||
inkCount += ink[x + dx]
|
||||
}
|
||||
if (inkCount >= coverageNeeded) {
|
||||
bestX = x
|
||||
break
|
||||
}
|
||||
if (x > cursor + cw * 0.3 && ti > 0) {
|
||||
bestX = cursor
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if (bestX + tokenW > cw) {
|
||||
bestX = Math.max(0, cw - tokenW)
|
||||
}
|
||||
|
||||
wordPos.push({
|
||||
xPct: cell.bbox_pct.x + (bestX / cw) * cell.bbox_pct.w,
|
||||
wPct: (tokenW / cw) * cell.bbox_pct.w,
|
||||
yPct: cell.bbox_pct.y,
|
||||
hPct: cell.bbox_pct.h,
|
||||
text: tokens[ti],
|
||||
fontRatio: 1.0,
|
||||
})
|
||||
|
||||
cursor = bestX + tokenW + spaceWidthPx
|
||||
}
|
||||
|
||||
if (wordPos.length > 0) {
|
||||
positions.set(cell.cell_id, wordPos)
|
||||
}
|
||||
}
|
||||
|
||||
setResult(positions)
|
||||
}
|
||||
img.src = imageUrl
|
||||
}, [active, cells, imageUrl, rotation])
|
||||
|
||||
return result
|
||||
}
|
||||
68
admin-lehrer/components/ocr-pipeline/BoxSessionTabs.tsx
Normal file
68
admin-lehrer/components/ocr-pipeline/BoxSessionTabs.tsx
Normal file
@@ -0,0 +1,68 @@
|
||||
'use client'
|
||||
|
||||
import type { SubSession } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
interface BoxSessionTabsProps {
|
||||
parentSessionId: string
|
||||
subSessions: SubSession[]
|
||||
activeSessionId: string
|
||||
onSessionChange: (sessionId: string) => void
|
||||
}
|
||||
|
||||
const STATUS_ICONS: Record<string, string> = {
|
||||
pending: '\u23F3', // hourglass
|
||||
processing: '\uD83D\uDD04', // arrows
|
||||
completed: '\u2713', // checkmark
|
||||
}
|
||||
|
||||
function getStatusIcon(sub: SubSession): string {
|
||||
if (sub.status === 'completed' || (sub.current_step && sub.current_step >= 9)) return STATUS_ICONS.completed
|
||||
if (sub.current_step && sub.current_step > 1) return STATUS_ICONS.processing
|
||||
return STATUS_ICONS.pending
|
||||
}
|
||||
|
||||
export function BoxSessionTabs({ parentSessionId, subSessions, activeSessionId, onSessionChange }: BoxSessionTabsProps) {
|
||||
if (subSessions.length === 0) return null
|
||||
|
||||
const isParentActive = activeSessionId === parentSessionId
|
||||
|
||||
return (
|
||||
<div className="flex items-center gap-1.5 px-1 py-1.5 bg-gray-50 dark:bg-gray-800/50 rounded-xl border border-gray-200 dark:border-gray-700">
|
||||
{/* Main session tab */}
|
||||
<button
|
||||
onClick={() => onSessionChange(parentSessionId)}
|
||||
className={`px-3 py-1.5 rounded-lg text-xs font-medium transition-colors ${
|
||||
isParentActive
|
||||
? 'bg-white dark:bg-gray-700 text-teal-700 dark:text-teal-400 shadow-sm ring-1 ring-teal-300 dark:ring-teal-600'
|
||||
: 'text-gray-500 dark:text-gray-400 hover:bg-white/50 dark:hover:bg-gray-700/50'
|
||||
}`}
|
||||
>
|
||||
Hauptseite
|
||||
</button>
|
||||
|
||||
<div className="w-px h-5 bg-gray-200 dark:bg-gray-700" />
|
||||
|
||||
{/* Sub-session tabs */}
|
||||
{subSessions.map((sub) => {
|
||||
const isActive = activeSessionId === sub.id
|
||||
const icon = getStatusIcon(sub)
|
||||
|
||||
return (
|
||||
<button
|
||||
key={sub.id}
|
||||
onClick={() => onSessionChange(sub.id)}
|
||||
className={`px-3 py-1.5 rounded-lg text-xs font-medium transition-colors ${
|
||||
isActive
|
||||
? 'bg-white dark:bg-gray-700 text-teal-700 dark:text-teal-400 shadow-sm ring-1 ring-teal-300 dark:ring-teal-600'
|
||||
: 'text-gray-500 dark:text-gray-400 hover:bg-white/50 dark:hover:bg-gray-700/50'
|
||||
}`}
|
||||
title={sub.name}
|
||||
>
|
||||
<span className="mr-1">{icon}</span>
|
||||
Box {sub.box_index + 1}
|
||||
</button>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
320
admin-lehrer/components/ocr-pipeline/ColumnControls.tsx
Normal file
320
admin-lehrer/components/ocr-pipeline/ColumnControls.tsx
Normal file
@@ -0,0 +1,320 @@
|
||||
'use client'
|
||||
|
||||
import { useState, useMemo } from 'react'
|
||||
import type { ColumnResult, ColumnGroundTruth, PageRegion } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
interface ColumnControlsProps {
|
||||
columnResult: ColumnResult | null
|
||||
onRerun: () => void
|
||||
onManualMode: () => void
|
||||
onGtMode: () => void
|
||||
onGroundTruth: (gt: ColumnGroundTruth) => void
|
||||
onNext: () => void
|
||||
isDetecting: boolean
|
||||
savedGtColumns: PageRegion[] | null
|
||||
}
|
||||
|
||||
const TYPE_COLORS: Record<string, string> = {
|
||||
column_en: 'bg-blue-100 text-blue-700 dark:bg-blue-900/30 dark:text-blue-400',
|
||||
column_de: 'bg-green-100 text-green-700 dark:bg-green-900/30 dark:text-green-400',
|
||||
column_example: 'bg-orange-100 text-orange-700 dark:bg-orange-900/30 dark:text-orange-400',
|
||||
column_text: 'bg-cyan-100 text-cyan-700 dark:bg-cyan-900/30 dark:text-cyan-400',
|
||||
page_ref: 'bg-purple-100 text-purple-700 dark:bg-purple-900/30 dark:text-purple-400',
|
||||
column_marker: 'bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400',
|
||||
column_ignore: 'bg-gray-100 text-gray-500 dark:bg-gray-700/30 dark:text-gray-500',
|
||||
header: 'bg-gray-100 text-gray-600 dark:bg-gray-700/50 dark:text-gray-400',
|
||||
footer: 'bg-gray-100 text-gray-600 dark:bg-gray-700/50 dark:text-gray-400',
|
||||
}
|
||||
|
||||
const TYPE_LABELS: Record<string, string> = {
|
||||
column_en: 'EN',
|
||||
column_de: 'DE',
|
||||
column_example: 'Beispiel',
|
||||
column_text: 'Text',
|
||||
page_ref: 'Seite',
|
||||
column_marker: 'Marker',
|
||||
column_ignore: 'Ignorieren',
|
||||
header: 'Header',
|
||||
footer: 'Footer',
|
||||
}
|
||||
|
||||
const METHOD_LABELS: Record<string, string> = {
|
||||
content: 'Inhalt',
|
||||
position_enhanced: 'Position',
|
||||
position_fallback: 'Fallback',
|
||||
}
|
||||
|
||||
interface DiffRow {
|
||||
index: number
|
||||
autoCol: PageRegion | null
|
||||
gtCol: PageRegion | null
|
||||
diffX: number | null
|
||||
diffW: number | null
|
||||
typeMismatch: boolean
|
||||
}
|
||||
|
||||
/** Match auto columns to GT columns by overlap on X-axis (IoU > 50%) */
|
||||
function computeDiff(autoCols: PageRegion[], gtCols: PageRegion[]): DiffRow[] {
|
||||
const rows: DiffRow[] = []
|
||||
const usedGt = new Set<number>()
|
||||
const usedAuto = new Set<number>()
|
||||
|
||||
// Match auto → GT by best X-axis overlap
|
||||
for (let ai = 0; ai < autoCols.length; ai++) {
|
||||
const a = autoCols[ai]
|
||||
let bestIdx = -1
|
||||
let bestIoU = 0
|
||||
|
||||
for (let gi = 0; gi < gtCols.length; gi++) {
|
||||
if (usedGt.has(gi)) continue
|
||||
const g = gtCols[gi]
|
||||
const overlapStart = Math.max(a.x, g.x)
|
||||
const overlapEnd = Math.min(a.x + a.width, g.x + g.width)
|
||||
const overlap = Math.max(0, overlapEnd - overlapStart)
|
||||
const union = (a.width + g.width) - overlap
|
||||
const iou = union > 0 ? overlap / union : 0
|
||||
if (iou > bestIoU) {
|
||||
bestIoU = iou
|
||||
bestIdx = gi
|
||||
}
|
||||
}
|
||||
|
||||
if (bestIdx >= 0 && bestIoU > 0.3) {
|
||||
usedGt.add(bestIdx)
|
||||
usedAuto.add(ai)
|
||||
const g = gtCols[bestIdx]
|
||||
rows.push({
|
||||
index: rows.length + 1,
|
||||
autoCol: a,
|
||||
gtCol: g,
|
||||
diffX: g.x - a.x,
|
||||
diffW: g.width - a.width,
|
||||
typeMismatch: a.type !== g.type,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Unmatched auto columns
|
||||
for (let ai = 0; ai < autoCols.length; ai++) {
|
||||
if (usedAuto.has(ai)) continue
|
||||
rows.push({
|
||||
index: rows.length + 1,
|
||||
autoCol: autoCols[ai],
|
||||
gtCol: null,
|
||||
diffX: null,
|
||||
diffW: null,
|
||||
typeMismatch: false,
|
||||
})
|
||||
}
|
||||
|
||||
// Unmatched GT columns
|
||||
for (let gi = 0; gi < gtCols.length; gi++) {
|
||||
if (usedGt.has(gi)) continue
|
||||
rows.push({
|
||||
index: rows.length + 1,
|
||||
autoCol: null,
|
||||
gtCol: gtCols[gi],
|
||||
diffX: null,
|
||||
diffW: null,
|
||||
typeMismatch: false,
|
||||
})
|
||||
}
|
||||
|
||||
return rows
|
||||
}
|
||||
|
||||
export function ColumnControls({ columnResult, onRerun, onManualMode, onGtMode, onGroundTruth, onNext, isDetecting, savedGtColumns }: ColumnControlsProps) {
|
||||
const [gtSaved, setGtSaved] = useState(false)
|
||||
|
||||
const diffRows = useMemo(() => {
|
||||
if (!columnResult || !savedGtColumns) return null
|
||||
const autoCols = columnResult.columns.filter(c => c.type.startsWith('column') || c.type === 'page_ref')
|
||||
const gtCols = savedGtColumns.filter(c => c.type.startsWith('column') || c.type === 'page_ref')
|
||||
return computeDiff(autoCols, gtCols)
|
||||
}, [columnResult, savedGtColumns])
|
||||
|
||||
if (!columnResult) return null
|
||||
|
||||
const columns = columnResult.columns.filter((c: PageRegion) => c.type.startsWith('column') || c.type === 'page_ref')
|
||||
const headerFooter = columnResult.columns.filter((c: PageRegion) => !c.type.startsWith('column') && c.type !== 'page_ref')
|
||||
|
||||
const handleGt = (isCorrect: boolean) => {
|
||||
onGroundTruth({ is_correct: isCorrect })
|
||||
setGtSaved(true)
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-4">
|
||||
{/* Summary */}
|
||||
<div className="flex items-center gap-3 flex-wrap">
|
||||
<div className="text-sm text-gray-600 dark:text-gray-400">
|
||||
<span className="font-medium text-gray-800 dark:text-gray-200">{columns.length} Spalten</span> erkannt
|
||||
{columnResult.duration_seconds > 0 && (
|
||||
<span className="ml-2 text-xs">({columnResult.duration_seconds}s)</span>
|
||||
)}
|
||||
</div>
|
||||
<button
|
||||
onClick={onRerun}
|
||||
disabled={isDetecting}
|
||||
className="text-xs px-2 py-1 bg-gray-100 dark:bg-gray-700 rounded hover:bg-gray-200 dark:hover:bg-gray-600 transition-colors disabled:opacity-50"
|
||||
>
|
||||
Erneut erkennen
|
||||
</button>
|
||||
<button
|
||||
onClick={onManualMode}
|
||||
className="text-xs px-2 py-1 bg-teal-100 text-teal-700 dark:bg-teal-900/30 dark:text-teal-400 rounded hover:bg-teal-200 dark:hover:bg-teal-900/50 transition-colors"
|
||||
>
|
||||
Manuell markieren
|
||||
</button>
|
||||
<button
|
||||
onClick={onGtMode}
|
||||
className="text-xs px-2 py-1 bg-amber-100 text-amber-700 dark:bg-amber-900/30 dark:text-amber-400 rounded hover:bg-amber-200 dark:hover:bg-amber-900/50 transition-colors"
|
||||
>
|
||||
{savedGtColumns ? 'Ground Truth bearbeiten' : 'Ground Truth eintragen'}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Column list */}
|
||||
<div className="space-y-2">
|
||||
{columns.map((col: PageRegion, i: number) => (
|
||||
<div key={i} className="flex items-center gap-3 text-sm">
|
||||
<span className={`px-2 py-0.5 rounded text-xs font-medium ${TYPE_COLORS[col.type] || ''}`}>
|
||||
{TYPE_LABELS[col.type] || col.type}
|
||||
</span>
|
||||
{col.classification_confidence != null && col.classification_confidence < 1.0 && (
|
||||
<span className="text-xs font-medium text-gray-600 dark:text-gray-300">
|
||||
{Math.round(col.classification_confidence * 100)}%
|
||||
</span>
|
||||
)}
|
||||
{col.classification_method && (
|
||||
<span className="text-xs text-gray-400 dark:text-gray-500">
|
||||
({METHOD_LABELS[col.classification_method] || col.classification_method})
|
||||
</span>
|
||||
)}
|
||||
<span className="text-gray-500 dark:text-gray-400 text-xs font-mono">
|
||||
x={col.x} y={col.y} {col.width}x{col.height}px
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
{headerFooter.map((r: PageRegion, i: number) => (
|
||||
<div key={`hf-${i}`} className="flex items-center gap-3 text-sm">
|
||||
<span className={`px-2 py-0.5 rounded text-xs font-medium ${TYPE_COLORS[r.type] || ''}`}>
|
||||
{TYPE_LABELS[r.type] || r.type}
|
||||
</span>
|
||||
<span className="text-gray-500 dark:text-gray-400 text-xs font-mono">
|
||||
x={r.x} y={r.y} {r.width}x{r.height}px
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
|
||||
{/* Diff table (Auto vs GT) */}
|
||||
{diffRows && diffRows.length > 0 && (
|
||||
<div className="border-t border-gray-100 dark:border-gray-700 pt-3">
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-2">
|
||||
Vergleich: Auto vs Ground Truth
|
||||
</div>
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full text-xs">
|
||||
<thead>
|
||||
<tr className="text-gray-500 dark:text-gray-400 border-b border-gray-100 dark:border-gray-700">
|
||||
<th className="text-left py-1 pr-2">#</th>
|
||||
<th className="text-left py-1 pr-2">Auto (Typ, x, w)</th>
|
||||
<th className="text-left py-1 pr-2">GT (Typ, x, w)</th>
|
||||
<th className="text-right py-1 pr-2">Diff X</th>
|
||||
<th className="text-right py-1">Diff W</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{diffRows.map((row) => (
|
||||
<tr
|
||||
key={row.index}
|
||||
className={
|
||||
!row.autoCol || !row.gtCol || row.typeMismatch
|
||||
? 'bg-red-50 dark:bg-red-900/10'
|
||||
: (row.diffX !== null && Math.abs(row.diffX) > 20) || (row.diffW !== null && Math.abs(row.diffW) > 20)
|
||||
? 'bg-amber-50 dark:bg-amber-900/10'
|
||||
: ''
|
||||
}
|
||||
>
|
||||
<td className="py-1 pr-2 font-mono text-gray-400">{row.index}</td>
|
||||
<td className="py-1 pr-2 font-mono">
|
||||
{row.autoCol ? (
|
||||
<span>
|
||||
<span className={`inline-block px-1 rounded ${TYPE_COLORS[row.autoCol.type] || ''}`}>
|
||||
{TYPE_LABELS[row.autoCol.type] || row.autoCol.type}
|
||||
</span>
|
||||
{' '}{row.autoCol.x}, {row.autoCol.width}
|
||||
</span>
|
||||
) : (
|
||||
<span className="text-red-400">fehlt</span>
|
||||
)}
|
||||
</td>
|
||||
<td className="py-1 pr-2 font-mono">
|
||||
{row.gtCol ? (
|
||||
<span>
|
||||
<span className={`inline-block px-1 rounded ${TYPE_COLORS[row.gtCol.type] || ''}`}>
|
||||
{TYPE_LABELS[row.gtCol.type] || row.gtCol.type}
|
||||
</span>
|
||||
{' '}{row.gtCol.x}, {row.gtCol.width}
|
||||
</span>
|
||||
) : (
|
||||
<span className="text-red-400">fehlt</span>
|
||||
)}
|
||||
</td>
|
||||
<td className="py-1 pr-2 text-right font-mono">
|
||||
{row.diffX !== null ? (
|
||||
<span className={Math.abs(row.diffX) > 20 ? 'text-amber-600 dark:text-amber-400' : 'text-gray-500'}>
|
||||
{row.diffX > 0 ? '+' : ''}{row.diffX}
|
||||
</span>
|
||||
) : '—'}
|
||||
</td>
|
||||
<td className="py-1 text-right font-mono">
|
||||
{row.diffW !== null ? (
|
||||
<span className={Math.abs(row.diffW) > 20 ? 'text-amber-600 dark:text-amber-400' : 'text-gray-500'}>
|
||||
{row.diffW > 0 ? '+' : ''}{row.diffW}
|
||||
</span>
|
||||
) : '—'}
|
||||
</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Ground Truth + Navigation */}
|
||||
<div className="flex items-center justify-between pt-2 border-t border-gray-100 dark:border-gray-700">
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-sm text-gray-500 dark:text-gray-400">Spalten korrekt?</span>
|
||||
{gtSaved ? (
|
||||
<span className="text-xs text-green-600 dark:text-green-400">Gespeichert</span>
|
||||
) : (
|
||||
<>
|
||||
<button
|
||||
onClick={() => handleGt(true)}
|
||||
className="text-xs px-3 py-1 bg-green-100 text-green-700 dark:bg-green-900/30 dark:text-green-400 rounded hover:bg-green-200 dark:hover:bg-green-900/50 transition-colors"
|
||||
>
|
||||
Ja
|
||||
</button>
|
||||
<button
|
||||
onClick={() => handleGt(false)}
|
||||
className="text-xs px-3 py-1 bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400 rounded hover:bg-red-200 dark:hover:bg-red-900/50 transition-colors"
|
||||
>
|
||||
Nein
|
||||
</button>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<button
|
||||
onClick={onNext}
|
||||
className="px-4 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors text-sm font-medium"
|
||||
>
|
||||
Weiter
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
209
admin-lehrer/components/ocr-pipeline/DeskewControls.tsx
Normal file
209
admin-lehrer/components/ocr-pipeline/DeskewControls.tsx
Normal file
@@ -0,0 +1,209 @@
|
||||
'use client'
|
||||
|
||||
import { useState } from 'react'
|
||||
import type { DeskewResult, DeskewGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
interface DeskewControlsProps {
|
||||
deskewResult: DeskewResult | null
|
||||
showBinarized: boolean
|
||||
onToggleBinarized: () => void
|
||||
showGrid: boolean
|
||||
onToggleGrid: () => void
|
||||
onManualDeskew: (angle: number) => void
|
||||
onGroundTruth: (gt: DeskewGroundTruth) => void
|
||||
onNext: () => void
|
||||
isApplying: boolean
|
||||
}
|
||||
|
||||
const METHOD_LABELS: Record<string, string> = {
|
||||
hough: 'Hough-Linien',
|
||||
word_alignment: 'Wortausrichtung',
|
||||
manual: 'Manuell',
|
||||
}
|
||||
|
||||
export function DeskewControls({
|
||||
deskewResult,
|
||||
showBinarized,
|
||||
onToggleBinarized,
|
||||
showGrid,
|
||||
onToggleGrid,
|
||||
onManualDeskew,
|
||||
onGroundTruth,
|
||||
onNext,
|
||||
isApplying,
|
||||
}: DeskewControlsProps) {
|
||||
const [manualAngle, setManualAngle] = useState(0)
|
||||
const [gtFeedback, setGtFeedback] = useState<'correct' | 'incorrect' | null>(null)
|
||||
const [gtNotes, setGtNotes] = useState('')
|
||||
const [gtSaved, setGtSaved] = useState(false)
|
||||
|
||||
const handleGroundTruth = (isCorrect: boolean) => {
|
||||
setGtFeedback(isCorrect ? 'correct' : 'incorrect')
|
||||
if (isCorrect) {
|
||||
onGroundTruth({ is_correct: true })
|
||||
setGtSaved(true)
|
||||
}
|
||||
}
|
||||
|
||||
const handleGroundTruthIncorrect = () => {
|
||||
onGroundTruth({
|
||||
is_correct: false,
|
||||
corrected_angle: manualAngle !== 0 ? manualAngle : undefined,
|
||||
notes: gtNotes || undefined,
|
||||
})
|
||||
setGtSaved(true)
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Results */}
|
||||
{deskewResult && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
||||
<div className="flex flex-wrap items-center gap-3 text-sm">
|
||||
<div>
|
||||
<span className="text-gray-500">Winkel:</span>{' '}
|
||||
<span className="font-mono font-medium">{deskewResult.angle_applied}°</span>
|
||||
</div>
|
||||
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
|
||||
<div>
|
||||
<span className="text-gray-500">Methode:</span>{' '}
|
||||
<span className="inline-flex items-center px-2 py-0.5 rounded-full text-xs font-medium bg-teal-100 text-teal-700 dark:bg-teal-900/40 dark:text-teal-300">
|
||||
{METHOD_LABELS[deskewResult.method_used] || deskewResult.method_used}
|
||||
</span>
|
||||
</div>
|
||||
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
|
||||
<div>
|
||||
<span className="text-gray-500">Konfidenz:</span>{' '}
|
||||
<span className="font-mono">{Math.round(deskewResult.confidence * 100)}%</span>
|
||||
</div>
|
||||
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
|
||||
<div className="text-gray-400 text-xs">
|
||||
Hough: {deskewResult.angle_hough}° | WA: {deskewResult.angle_word_alignment}°
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Toggles */}
|
||||
<div className="flex gap-3 mt-3">
|
||||
<button
|
||||
onClick={onToggleBinarized}
|
||||
className={`text-xs px-3 py-1 rounded-full border transition-colors ${
|
||||
showBinarized
|
||||
? 'bg-teal-100 border-teal-300 text-teal-700 dark:bg-teal-900/40 dark:border-teal-600 dark:text-teal-300'
|
||||
: 'border-gray-300 text-gray-500 dark:border-gray-600 dark:text-gray-400'
|
||||
}`}
|
||||
>
|
||||
Binarisiert anzeigen
|
||||
</button>
|
||||
<button
|
||||
onClick={onToggleGrid}
|
||||
className={`text-xs px-3 py-1 rounded-full border transition-colors ${
|
||||
showGrid
|
||||
? 'bg-teal-100 border-teal-300 text-teal-700 dark:bg-teal-900/40 dark:border-teal-600 dark:text-teal-300'
|
||||
: 'border-gray-300 text-gray-500 dark:border-gray-600 dark:text-gray-400'
|
||||
}`}
|
||||
>
|
||||
Raster anzeigen
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Manual angle */}
|
||||
{deskewResult && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
||||
<div className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">Manuelle Korrektur</div>
|
||||
<div className="flex items-center gap-3">
|
||||
<span className="text-xs text-gray-400 w-8 text-right">-5°</span>
|
||||
<input
|
||||
type="range"
|
||||
min={-5}
|
||||
max={5}
|
||||
step={0.1}
|
||||
value={manualAngle}
|
||||
onChange={(e) => setManualAngle(parseFloat(e.target.value))}
|
||||
className="flex-1 h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer dark:bg-gray-700 accent-teal-500"
|
||||
/>
|
||||
<span className="text-xs text-gray-400 w-8">+5°</span>
|
||||
<span className="font-mono text-sm w-14 text-right">{manualAngle.toFixed(1)}°</span>
|
||||
<button
|
||||
onClick={() => onManualDeskew(manualAngle)}
|
||||
disabled={isApplying}
|
||||
className="px-3 py-1.5 text-sm bg-teal-600 text-white rounded-md hover:bg-teal-700 disabled:opacity-50 transition-colors"
|
||||
>
|
||||
{isApplying ? '...' : 'Anwenden'}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Ground Truth */}
|
||||
{deskewResult && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
||||
<div className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||
Rotation korrekt?
|
||||
</div>
|
||||
<p className="text-xs text-gray-400 mb-2">Nur die Drehung bewerten — Woelbung/Verzerrung wird im naechsten Schritt korrigiert.</p>
|
||||
{!gtSaved ? (
|
||||
<div className="space-y-3">
|
||||
<div className="flex gap-2">
|
||||
<button
|
||||
onClick={() => handleGroundTruth(true)}
|
||||
className={`px-4 py-1.5 rounded-md text-sm font-medium transition-colors ${
|
||||
gtFeedback === 'correct'
|
||||
? 'bg-green-100 text-green-700 ring-2 ring-green-400'
|
||||
: 'bg-gray-100 text-gray-600 hover:bg-green-50 dark:bg-gray-700 dark:text-gray-300'
|
||||
}`}
|
||||
>
|
||||
Ja
|
||||
</button>
|
||||
<button
|
||||
onClick={() => handleGroundTruth(false)}
|
||||
className={`px-4 py-1.5 rounded-md text-sm font-medium transition-colors ${
|
||||
gtFeedback === 'incorrect'
|
||||
? 'bg-red-100 text-red-700 ring-2 ring-red-400'
|
||||
: 'bg-gray-100 text-gray-600 hover:bg-red-50 dark:bg-gray-700 dark:text-gray-300'
|
||||
}`}
|
||||
>
|
||||
Nein
|
||||
</button>
|
||||
</div>
|
||||
{gtFeedback === 'incorrect' && (
|
||||
<div className="space-y-2">
|
||||
<textarea
|
||||
value={gtNotes}
|
||||
onChange={(e) => setGtNotes(e.target.value)}
|
||||
placeholder="Notizen zur Korrektur..."
|
||||
className="w-full text-sm border border-gray-300 dark:border-gray-600 rounded-md p-2 bg-white dark:bg-gray-900 text-gray-800 dark:text-gray-200"
|
||||
rows={2}
|
||||
/>
|
||||
<button
|
||||
onClick={handleGroundTruthIncorrect}
|
||||
className="text-sm px-3 py-1 bg-red-600 text-white rounded-md hover:bg-red-700 transition-colors"
|
||||
>
|
||||
Feedback speichern
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
) : (
|
||||
<div className="text-sm text-green-600 dark:text-green-400">
|
||||
Feedback gespeichert
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Next button */}
|
||||
{deskewResult && (
|
||||
<div className="flex justify-end">
|
||||
<button
|
||||
onClick={onNext}
|
||||
className="px-6 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium transition-colors"
|
||||
>
|
||||
Uebernehmen & Weiter →
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
553
admin-lehrer/components/ocr-pipeline/DewarpControls.tsx
Normal file
553
admin-lehrer/components/ocr-pipeline/DewarpControls.tsx
Normal file
@@ -0,0 +1,553 @@
|
||||
'use client'
|
||||
|
||||
import { useEffect, useState } from 'react'
|
||||
import type { DeskewResult, DewarpResult, DewarpDetection, DewarpGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
interface DewarpControlsProps {
|
||||
dewarpResult: DewarpResult | null
|
||||
deskewResult?: DeskewResult | null
|
||||
showGrid: boolean
|
||||
onToggleGrid: () => void
|
||||
onManualDewarp: (shearDegrees: number) => void
|
||||
onCombinedAdjust?: (rotationDegrees: number, shearDegrees: number) => void
|
||||
onGroundTruth: (gt: DewarpGroundTruth) => void
|
||||
onNext: () => void
|
||||
isApplying: boolean
|
||||
}
|
||||
|
||||
const METHOD_LABELS: Record<string, string> = {
|
||||
vertical_edge: 'A: Vertikale Kanten',
|
||||
projection: 'B: Projektions-Varianz',
|
||||
hough_lines: 'C: Hough-Linien',
|
||||
text_lines: 'D: Textzeilenanalyse',
|
||||
manual: 'Manuell',
|
||||
manual_combined: 'Manuell (kombiniert)',
|
||||
none: 'Keine Korrektur',
|
||||
}
|
||||
|
||||
const SHEAR_METHOD_KEYS = ['vertical_edge', 'projection', 'hough_lines', 'text_lines'] as const
|
||||
|
||||
/** Colour for a confidence value (0-1). */
|
||||
function confColor(conf: number): string {
|
||||
if (conf >= 0.7) return 'text-green-600 dark:text-green-400'
|
||||
if (conf >= 0.5) return 'text-yellow-600 dark:text-yellow-400'
|
||||
return 'text-gray-400'
|
||||
}
|
||||
|
||||
/** Short confidence bar (visual). */
|
||||
function ConfBar({ value }: { value: number }) {
|
||||
const pct = Math.round(value * 100)
|
||||
const bg = value >= 0.7 ? 'bg-green-500' : value >= 0.5 ? 'bg-yellow-500' : 'bg-gray-400'
|
||||
return (
|
||||
<div className="flex items-center gap-1.5">
|
||||
<div className="w-16 h-1.5 bg-gray-200 dark:bg-gray-700 rounded-full overflow-hidden">
|
||||
<div className={`h-full rounded-full ${bg}`} style={{ width: `${pct}%` }} />
|
||||
</div>
|
||||
<span className={`text-xs font-mono ${confColor(value)}`}>{pct}%</span>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
/** A single slider row for fine-tuning. */
|
||||
function FineTuneSlider({
|
||||
label,
|
||||
value,
|
||||
onChange,
|
||||
min,
|
||||
max,
|
||||
step,
|
||||
unit = '\u00B0',
|
||||
radioName,
|
||||
radioChecked,
|
||||
onRadioChange,
|
||||
}: {
|
||||
label: string
|
||||
value: number
|
||||
onChange: (v: number) => void
|
||||
min: number
|
||||
max: number
|
||||
step: number
|
||||
unit?: string
|
||||
radioName?: string
|
||||
radioChecked?: boolean
|
||||
onRadioChange?: () => void
|
||||
}) {
|
||||
return (
|
||||
<div className="flex items-center gap-2">
|
||||
{radioName !== undefined && (
|
||||
<input
|
||||
type="radio"
|
||||
name={radioName}
|
||||
checked={radioChecked}
|
||||
onChange={onRadioChange}
|
||||
className="w-3.5 h-3.5 accent-teal-500"
|
||||
/>
|
||||
)}
|
||||
<span className="text-xs text-gray-500 dark:text-gray-400 w-36 shrink-0">{label}</span>
|
||||
<span className="text-xs text-gray-400 w-8 text-right">{min}{unit}</span>
|
||||
<input
|
||||
type="range"
|
||||
min={min * 100}
|
||||
max={max * 100}
|
||||
step={step * 100}
|
||||
value={Math.round(value * 100)}
|
||||
onChange={(e) => onChange(parseInt(e.target.value) / 100)}
|
||||
className="flex-1 h-1.5 bg-gray-200 rounded-lg appearance-none cursor-pointer dark:bg-gray-700 accent-teal-500"
|
||||
/>
|
||||
<span className="text-xs text-gray-400 w-8">+{max}{unit}</span>
|
||||
<span className="font-mono text-xs w-14 text-right tabular-nums">
|
||||
{value >= 0 ? '+' : ''}{value.toFixed(2)}{unit}
|
||||
</span>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export function DewarpControls({
|
||||
dewarpResult,
|
||||
deskewResult,
|
||||
showGrid,
|
||||
onToggleGrid,
|
||||
onManualDewarp,
|
||||
onCombinedAdjust,
|
||||
onGroundTruth,
|
||||
onNext,
|
||||
isApplying,
|
||||
}: DewarpControlsProps) {
|
||||
const [manualShear, setManualShear] = useState(0)
|
||||
const [gtFeedback, setGtFeedback] = useState<'correct' | 'incorrect' | null>(null)
|
||||
const [gtNotes, setGtNotes] = useState('')
|
||||
const [gtSaved, setGtSaved] = useState(false)
|
||||
const [showDetails, setShowDetails] = useState(false)
|
||||
const [showFineTune, setShowFineTune] = useState(false)
|
||||
|
||||
// Fine-tuning rotation sliders (3 passes)
|
||||
const [p1Iterative, setP1Iterative] = useState(0)
|
||||
const [p2Residual, setP2Residual] = useState(0)
|
||||
const [p3Textline, setP3Textline] = useState(0)
|
||||
|
||||
// Fine-tuning shear sliders (4 methods) + selected method
|
||||
const [shearValues, setShearValues] = useState<Record<string, number>>({
|
||||
vertical_edge: 0,
|
||||
projection: 0,
|
||||
hough_lines: 0,
|
||||
text_lines: 0,
|
||||
})
|
||||
const [selectedShearMethod, setSelectedShearMethod] = useState<string>('vertical_edge')
|
||||
|
||||
// Initialize slider to auto-detected value when result arrives
|
||||
useEffect(() => {
|
||||
if (dewarpResult && dewarpResult.shear_degrees !== undefined) {
|
||||
setManualShear(dewarpResult.shear_degrees)
|
||||
}
|
||||
}, [dewarpResult?.shear_degrees])
|
||||
|
||||
// Initialize fine-tuning sliders from deskew result
|
||||
useEffect(() => {
|
||||
if (deskewResult) {
|
||||
setP1Iterative(deskewResult.angle_iterative ?? 0)
|
||||
setP2Residual(deskewResult.angle_residual ?? 0)
|
||||
setP3Textline(deskewResult.angle_textline ?? 0)
|
||||
}
|
||||
}, [deskewResult])
|
||||
|
||||
// Initialize shear sliders from dewarp detections
|
||||
useEffect(() => {
|
||||
if (dewarpResult?.detections) {
|
||||
const newValues = { ...shearValues }
|
||||
let bestMethod = selectedShearMethod
|
||||
let bestConf = -1
|
||||
for (const d of dewarpResult.detections) {
|
||||
if (d.method in newValues) {
|
||||
newValues[d.method] = d.shear_degrees
|
||||
if (d.confidence > bestConf) {
|
||||
bestConf = d.confidence
|
||||
bestMethod = d.method
|
||||
}
|
||||
}
|
||||
}
|
||||
setShearValues(newValues)
|
||||
// Select the method that was actually used, or the highest confidence
|
||||
if (dewarpResult.method_used && dewarpResult.method_used in newValues) {
|
||||
setSelectedShearMethod(dewarpResult.method_used)
|
||||
} else {
|
||||
setSelectedShearMethod(bestMethod)
|
||||
}
|
||||
}
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [dewarpResult?.detections])
|
||||
|
||||
const rotationSum = p1Iterative + p2Residual + p3Textline
|
||||
const activeShear = shearValues[selectedShearMethod] ?? 0
|
||||
|
||||
const handleGroundTruth = (isCorrect: boolean) => {
|
||||
setGtFeedback(isCorrect ? 'correct' : 'incorrect')
|
||||
if (isCorrect) {
|
||||
onGroundTruth({ is_correct: true })
|
||||
setGtSaved(true)
|
||||
}
|
||||
}
|
||||
|
||||
const handleGroundTruthIncorrect = () => {
|
||||
onGroundTruth({
|
||||
is_correct: false,
|
||||
corrected_shear: manualShear !== 0 ? manualShear : undefined,
|
||||
notes: gtNotes || undefined,
|
||||
})
|
||||
setGtSaved(true)
|
||||
}
|
||||
|
||||
const handleShearValueChange = (method: string, value: number) => {
|
||||
setShearValues((prev) => ({ ...prev, [method]: value }))
|
||||
}
|
||||
|
||||
const handleFineTunePreview = () => {
|
||||
if (onCombinedAdjust) {
|
||||
onCombinedAdjust(rotationSum, activeShear)
|
||||
}
|
||||
}
|
||||
|
||||
const wasRejected = dewarpResult && dewarpResult.method_used === 'none' && (dewarpResult.detections || []).length > 0
|
||||
const wasApplied = dewarpResult && dewarpResult.method_used !== 'none' && dewarpResult.method_used !== 'manual' && dewarpResult.method_used !== 'manual_combined'
|
||||
const detections = dewarpResult?.detections || []
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Summary banner */}
|
||||
{dewarpResult && (
|
||||
<div className={`rounded-lg border p-4 ${
|
||||
wasRejected
|
||||
? 'bg-amber-50 border-amber-200 dark:bg-amber-900/20 dark:border-amber-700'
|
||||
: wasApplied
|
||||
? 'bg-green-50 border-green-200 dark:bg-green-900/20 dark:border-green-700'
|
||||
: 'bg-white border-gray-200 dark:bg-gray-800 dark:border-gray-700'
|
||||
}`}>
|
||||
{/* Status line */}
|
||||
<div className="flex items-center gap-2 mb-3">
|
||||
<span className={`text-lg ${wasRejected ? '' : wasApplied ? '' : ''}`}>
|
||||
{wasRejected ? '\u26A0\uFE0F' : wasApplied ? '\u2705' : '\u2796'}
|
||||
</span>
|
||||
<span className="text-sm font-medium text-gray-800 dark:text-gray-200">
|
||||
{wasRejected
|
||||
? 'Quality Gate: Korrektur verworfen (Projektion nicht verbessert)'
|
||||
: wasApplied
|
||||
? `Korrektur angewendet: ${dewarpResult.shear_degrees.toFixed(2)}\u00B0`
|
||||
: dewarpResult.method_used === 'manual' || dewarpResult.method_used === 'manual_combined'
|
||||
? `Manuelle Korrektur: ${dewarpResult.shear_degrees.toFixed(2)}\u00B0`
|
||||
: 'Keine Korrektur noetig'}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{/* Key metrics */}
|
||||
<div className="flex flex-wrap items-center gap-4 text-sm">
|
||||
<div>
|
||||
<span className="text-gray-500">Scherung:</span>{' '}
|
||||
<span className="font-mono font-medium">{dewarpResult.shear_degrees.toFixed(2)}\u00B0</span>
|
||||
</div>
|
||||
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
|
||||
<div>
|
||||
<span className="text-gray-500">Methode:</span>{' '}
|
||||
<span className="inline-flex items-center px-2 py-0.5 rounded-full text-xs font-medium bg-teal-100 text-teal-700 dark:bg-teal-900/40 dark:text-teal-300">
|
||||
{dewarpResult.method_used.includes('+')
|
||||
? `Ensemble (${dewarpResult.method_used.split('+').map(m => METHOD_LABELS[m] || m).join(' + ')})`
|
||||
: METHOD_LABELS[dewarpResult.method_used] || dewarpResult.method_used}
|
||||
</span>
|
||||
</div>
|
||||
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
|
||||
<div className="flex items-center gap-1.5">
|
||||
<span className="text-gray-500">Konfidenz:</span>
|
||||
<ConfBar value={dewarpResult.confidence} />
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Toggles row */}
|
||||
<div className="flex gap-2 mt-3">
|
||||
<button
|
||||
onClick={onToggleGrid}
|
||||
className={`text-xs px-3 py-1 rounded-full border transition-colors ${
|
||||
showGrid
|
||||
? 'bg-teal-100 border-teal-300 text-teal-700 dark:bg-teal-900/40 dark:border-teal-600 dark:text-teal-300'
|
||||
: 'border-gray-300 text-gray-500 dark:border-gray-600 dark:text-gray-400'
|
||||
}`}
|
||||
>
|
||||
Raster
|
||||
</button>
|
||||
{detections.length > 0 && (
|
||||
<button
|
||||
onClick={() => setShowDetails(v => !v)}
|
||||
className={`text-xs px-3 py-1 rounded-full border transition-colors ${
|
||||
showDetails
|
||||
? 'bg-blue-100 border-blue-300 text-blue-700 dark:bg-blue-900/40 dark:border-blue-600 dark:text-blue-300'
|
||||
: 'border-gray-300 text-gray-500 dark:border-gray-600 dark:text-gray-400'
|
||||
}`}
|
||||
>
|
||||
Details ({detections.length} Methoden)
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Detailed detections */}
|
||||
{showDetails && detections.length > 0 && (
|
||||
<div className="mt-3 pt-3 border-t border-gray-200 dark:border-gray-700">
|
||||
<div className="text-xs text-gray-500 mb-2">Einzelne Detektoren:</div>
|
||||
<div className="space-y-1.5">
|
||||
{detections.map((d: DewarpDetection) => {
|
||||
const isUsed = dewarpResult.method_used.includes(d.method)
|
||||
const aboveThreshold = d.confidence >= 0.5
|
||||
return (
|
||||
<div
|
||||
key={d.method}
|
||||
className={`flex items-center gap-3 text-xs px-2 py-1.5 rounded ${
|
||||
isUsed
|
||||
? 'bg-teal-50 dark:bg-teal-900/20'
|
||||
: 'bg-gray-50 dark:bg-gray-800'
|
||||
}`}
|
||||
>
|
||||
<span className="w-4 text-center">
|
||||
{isUsed ? '\u2713' : aboveThreshold ? '\u2012' : '\u2717'}
|
||||
</span>
|
||||
<span className={`w-40 ${isUsed ? 'font-medium text-gray-800 dark:text-gray-200' : 'text-gray-500'}`}>
|
||||
{METHOD_LABELS[d.method] || d.method}
|
||||
</span>
|
||||
<span className="font-mono w-16 text-right">
|
||||
{d.shear_degrees.toFixed(2)}\u00B0
|
||||
</span>
|
||||
<ConfBar value={d.confidence} />
|
||||
{!aboveThreshold && (
|
||||
<span className="text-gray-400 ml-1">(unter Schwelle)</span>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
{wasRejected && (
|
||||
<div className="mt-2 text-xs text-amber-600 dark:text-amber-400">
|
||||
Die Korrektur wurde verworfen, weil die horizontale Projektions-Varianz nach Anwendung nicht besser war als vorher.
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Manual shear angle slider */}
|
||||
{dewarpResult && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
||||
<div className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">Scherwinkel (manuell)</div>
|
||||
<div className="flex items-center gap-3">
|
||||
<span className="text-xs text-gray-400 w-10 text-right">-2.0\u00B0</span>
|
||||
<input
|
||||
type="range"
|
||||
min={-200}
|
||||
max={200}
|
||||
step={5}
|
||||
value={Math.round(manualShear * 100)}
|
||||
onChange={(e) => setManualShear(parseInt(e.target.value) / 100)}
|
||||
className="flex-1 h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer dark:bg-gray-700 accent-teal-500"
|
||||
/>
|
||||
<span className="text-xs text-gray-400 w-10">+2.0\u00B0</span>
|
||||
<span className="font-mono text-sm w-16 text-right">{manualShear.toFixed(2)}\u00B0</span>
|
||||
<button
|
||||
onClick={() => onManualDewarp(manualShear)}
|
||||
disabled={isApplying}
|
||||
className="px-3 py-1.5 text-sm bg-teal-600 text-white rounded-md hover:bg-teal-700 disabled:opacity-50 transition-colors"
|
||||
>
|
||||
{isApplying ? '...' : 'Anwenden'}
|
||||
</button>
|
||||
</div>
|
||||
<p className="text-xs text-gray-400 mt-1">
|
||||
Scherung der vertikalen Achse in Grad. Positiv = Spalten nach rechts kippen, negativ = nach links.
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Fine-tuning panel */}
|
||||
{dewarpResult && onCombinedAdjust && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700">
|
||||
<button
|
||||
onClick={() => setShowFineTune(v => !v)}
|
||||
className="w-full flex items-center justify-between p-4 text-left"
|
||||
>
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-sm">⚙️</span>
|
||||
<span className="text-sm font-medium text-gray-700 dark:text-gray-300">Feinabstimmung</span>
|
||||
<span className="text-xs text-gray-400">(7 Regler)</span>
|
||||
</div>
|
||||
<span className="text-gray-400 text-sm">{showFineTune ? '\u25B2' : '\u25BC'}</span>
|
||||
</button>
|
||||
|
||||
{showFineTune && (
|
||||
<div className="px-4 pb-4 space-y-5">
|
||||
{/* Rotation section */}
|
||||
<div>
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider mb-2">
|
||||
Rotation (Begradigung)
|
||||
</div>
|
||||
<div className="space-y-2">
|
||||
<FineTuneSlider
|
||||
label="P1 Iterative Projection"
|
||||
value={p1Iterative}
|
||||
onChange={setP1Iterative}
|
||||
min={-5}
|
||||
max={5}
|
||||
step={0.05}
|
||||
/>
|
||||
<FineTuneSlider
|
||||
label="P2 Word-Alignment"
|
||||
value={p2Residual}
|
||||
onChange={setP2Residual}
|
||||
min={-3}
|
||||
max={3}
|
||||
step={0.05}
|
||||
/>
|
||||
<FineTuneSlider
|
||||
label="P3 Textline-Regression"
|
||||
value={p3Textline}
|
||||
onChange={setP3Textline}
|
||||
min={-3}
|
||||
max={3}
|
||||
step={0.05}
|
||||
/>
|
||||
<div className="flex items-center gap-2 pt-1 border-t border-gray-100 dark:border-gray-700">
|
||||
<span className="text-xs text-gray-500 dark:text-gray-400 w-36 shrink-0">Summe Rotation</span>
|
||||
<span className="font-mono text-sm font-medium text-teal-600 dark:text-teal-400">
|
||||
{rotationSum >= 0 ? '+' : ''}{rotationSum.toFixed(2)}\u00B0
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Shear section */}
|
||||
<div>
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider mb-2">
|
||||
Scherung (Entzerrung) — einen Wert waehlen
|
||||
</div>
|
||||
<div className="space-y-2">
|
||||
{SHEAR_METHOD_KEYS.map((method) => (
|
||||
<FineTuneSlider
|
||||
key={method}
|
||||
label={METHOD_LABELS[method] || method}
|
||||
value={shearValues[method]}
|
||||
onChange={(v) => handleShearValueChange(method, v)}
|
||||
min={-5}
|
||||
max={5}
|
||||
step={0.05}
|
||||
radioName="shear-method"
|
||||
radioChecked={selectedShearMethod === method}
|
||||
onRadioChange={() => setSelectedShearMethod(method)}
|
||||
/>
|
||||
))}
|
||||
<div className="flex items-center gap-2 pt-1 border-t border-gray-100 dark:border-gray-700">
|
||||
<span className="text-xs text-gray-500 dark:text-gray-400 w-36 shrink-0">Gewaehlte Scherung</span>
|
||||
<span className="font-mono text-sm font-medium text-teal-600 dark:text-teal-400">
|
||||
{activeShear >= 0 ? '+' : ''}{activeShear.toFixed(2)}\u00B0
|
||||
</span>
|
||||
<span className="text-xs text-gray-400 ml-1">
|
||||
({METHOD_LABELS[selectedShearMethod]})
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Preview + Save */}
|
||||
<div className="flex items-center gap-3 pt-2">
|
||||
<button
|
||||
onClick={handleFineTunePreview}
|
||||
disabled={isApplying}
|
||||
className="px-4 py-2 text-sm bg-teal-600 text-white rounded-md hover:bg-teal-700 disabled:opacity-50 transition-colors"
|
||||
>
|
||||
{isApplying ? 'Wird angewendet...' : 'Vorschau'}
|
||||
</button>
|
||||
<button
|
||||
onClick={() => {
|
||||
onGroundTruth({
|
||||
is_correct: false,
|
||||
corrected_shear: activeShear,
|
||||
notes: `Fine-tuned: rotation=${rotationSum.toFixed(3)}, shear=${activeShear.toFixed(3)} (${selectedShearMethod})`,
|
||||
})
|
||||
setGtSaved(true)
|
||||
}}
|
||||
disabled={gtSaved}
|
||||
className="px-4 py-2 text-sm bg-blue-600 text-white rounded-md hover:bg-blue-700 disabled:opacity-50 transition-colors"
|
||||
>
|
||||
{gtSaved ? 'Gespeichert' : 'Als Ground Truth speichern'}
|
||||
</button>
|
||||
<span className="text-xs text-gray-400">
|
||||
Rotation: {rotationSum >= 0 ? '+' : ''}{rotationSum.toFixed(2)}\u00B0 + Scherung: {activeShear >= 0 ? '+' : ''}{activeShear.toFixed(2)}\u00B0
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Ground Truth */}
|
||||
{dewarpResult && !showFineTune && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
||||
<div className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||
Spalten vertikal ausgerichtet?
|
||||
</div>
|
||||
<p className="text-xs text-gray-400 mb-2">Pruefen ob die Spaltenraender jetzt senkrecht zum Raster stehen.</p>
|
||||
{!gtSaved ? (
|
||||
<div className="space-y-3">
|
||||
<div className="flex gap-2">
|
||||
<button
|
||||
onClick={() => handleGroundTruth(true)}
|
||||
className={`px-4 py-1.5 rounded-md text-sm font-medium transition-colors ${
|
||||
gtFeedback === 'correct'
|
||||
? 'bg-green-100 text-green-700 ring-2 ring-green-400'
|
||||
: 'bg-gray-100 text-gray-600 hover:bg-green-50 dark:bg-gray-700 dark:text-gray-300'
|
||||
}`}
|
||||
>
|
||||
Ja
|
||||
</button>
|
||||
<button
|
||||
onClick={() => handleGroundTruth(false)}
|
||||
className={`px-4 py-1.5 rounded-md text-sm font-medium transition-colors ${
|
||||
gtFeedback === 'incorrect'
|
||||
? 'bg-red-100 text-red-700 ring-2 ring-red-400'
|
||||
: 'bg-gray-100 text-gray-600 hover:bg-red-50 dark:bg-gray-700 dark:text-gray-300'
|
||||
}`}
|
||||
>
|
||||
Nein
|
||||
</button>
|
||||
</div>
|
||||
{gtFeedback === 'incorrect' && (
|
||||
<div className="space-y-2">
|
||||
<textarea
|
||||
value={gtNotes}
|
||||
onChange={(e) => setGtNotes(e.target.value)}
|
||||
placeholder="Notizen zur Korrektur..."
|
||||
className="w-full text-sm border border-gray-300 dark:border-gray-600 rounded-md p-2 bg-white dark:bg-gray-900 text-gray-800 dark:text-gray-200"
|
||||
rows={2}
|
||||
/>
|
||||
<button
|
||||
onClick={handleGroundTruthIncorrect}
|
||||
className="text-sm px-3 py-1 bg-red-600 text-white rounded-md hover:bg-red-700 transition-colors"
|
||||
>
|
||||
Feedback speichern
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
) : (
|
||||
<div className="text-sm text-green-600 dark:text-green-400">
|
||||
Feedback gespeichert
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Next button */}
|
||||
{dewarpResult && (
|
||||
<div className="flex justify-end">
|
||||
<button
|
||||
onClick={onNext}
|
||||
className="px-6 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium transition-colors"
|
||||
>
|
||||
Uebernehmen & Weiter →
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,403 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useRef, useState } from 'react'
|
||||
import type { GridCell } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
// Column type → colour mapping
|
||||
const COL_TYPE_COLORS: Record<string, string> = {
|
||||
column_en: '#3b82f6', // blue-500
|
||||
column_de: '#22c55e', // green-500
|
||||
column_example: '#f97316', // orange-500
|
||||
column_text: '#a855f7', // purple-500
|
||||
page_ref: '#06b6d4', // cyan-500
|
||||
column_marker: '#6b7280', // gray-500
|
||||
}
|
||||
|
||||
interface FabricReconstructionCanvasProps {
|
||||
sessionId: string
|
||||
cells: GridCell[]
|
||||
onCellsChanged: (updates: { cell_id: string; text: string }[]) => void
|
||||
}
|
||||
|
||||
// Fabric.js types (subset used here)
|
||||
interface FabricCanvas {
|
||||
add: (...objects: FabricObject[]) => FabricCanvas
|
||||
remove: (...objects: FabricObject[]) => FabricCanvas
|
||||
setBackgroundImage: (img: FabricImage, callback: () => void) => void
|
||||
renderAll: () => void
|
||||
getObjects: () => FabricObject[]
|
||||
dispose: () => void
|
||||
on: (event: string, handler: (e: FabricEvent) => void) => void
|
||||
setWidth: (w: number) => void
|
||||
setHeight: (h: number) => void
|
||||
getActiveObject: () => FabricObject | null
|
||||
discardActiveObject: () => FabricCanvas
|
||||
requestRenderAll: () => void
|
||||
setZoom: (z: number) => void
|
||||
getZoom: () => number
|
||||
}
|
||||
|
||||
interface FabricObject {
|
||||
type?: string
|
||||
left?: number
|
||||
top?: number
|
||||
width?: number
|
||||
height?: number
|
||||
text?: string
|
||||
set: (props: Record<string, unknown>) => FabricObject
|
||||
get: (prop: string) => unknown
|
||||
data?: Record<string, unknown>
|
||||
selectable?: boolean
|
||||
on?: (event: string, handler: () => void) => void
|
||||
setCoords?: () => void
|
||||
}
|
||||
|
||||
interface FabricImage extends FabricObject {
|
||||
width?: number
|
||||
height?: number
|
||||
scaleX?: number
|
||||
scaleY?: number
|
||||
}
|
||||
|
||||
interface FabricEvent {
|
||||
target?: FabricObject
|
||||
e?: MouseEvent
|
||||
}
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
type FabricModule = any
|
||||
|
||||
export function FabricReconstructionCanvas({
|
||||
sessionId,
|
||||
cells,
|
||||
onCellsChanged,
|
||||
}: FabricReconstructionCanvasProps) {
|
||||
const canvasElRef = useRef<HTMLCanvasElement>(null)
|
||||
const fabricRef = useRef<FabricCanvas | null>(null)
|
||||
const fabricModuleRef = useRef<FabricModule>(null)
|
||||
const [ready, setReady] = useState(false)
|
||||
const [opacity, setOpacity] = useState(30)
|
||||
const [zoom, setZoom] = useState(100)
|
||||
const [selectedCell, setSelectedCell] = useState<string | null>(null)
|
||||
const [error, setError] = useState('')
|
||||
|
||||
// Undo/Redo
|
||||
const undoStackRef = useRef<{ cellId: string; oldText: string; newText: string }[]>([])
|
||||
const redoStackRef = useRef<{ cellId: string; oldText: string; newText: string }[]>([])
|
||||
|
||||
// ---- Initialise Fabric.js ----
|
||||
useEffect(() => {
|
||||
let disposed = false
|
||||
|
||||
async function init() {
|
||||
try {
|
||||
const fabricModule = await import('fabric')
|
||||
if (disposed) return
|
||||
fabricModuleRef.current = fabricModule
|
||||
|
||||
const canvasEl = canvasElRef.current
|
||||
if (!canvasEl) return
|
||||
|
||||
// Load background image first to get dimensions
|
||||
const imgUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
|
||||
|
||||
const bgImg = await fabricModule.FabricImage.fromURL(imgUrl, { crossOrigin: 'anonymous' }) as FabricImage
|
||||
|
||||
if (disposed) return
|
||||
|
||||
const imgW = (bgImg.width || 800) * (bgImg.scaleX || 1)
|
||||
const imgH = (bgImg.height || 600) * (bgImg.scaleY || 1)
|
||||
|
||||
bgImg.set({ opacity: opacity / 100, selectable: false, evented: false } as Record<string, unknown>)
|
||||
|
||||
const canvas = new fabricModule.Canvas(canvasEl, {
|
||||
width: imgW,
|
||||
height: imgH,
|
||||
selection: true,
|
||||
preserveObjectStacking: true,
|
||||
backgroundImage: bgImg,
|
||||
}) as unknown as FabricCanvas
|
||||
|
||||
fabricRef.current = canvas
|
||||
canvas.renderAll()
|
||||
|
||||
// Add cell objects
|
||||
addCellObjects(canvas, fabricModule, cells, imgW, imgH)
|
||||
|
||||
// Listen for text changes
|
||||
canvas.on('object:modified', (e: FabricEvent) => {
|
||||
if (e.target?.data?.cellId) {
|
||||
const cellId = e.target.data.cellId as string
|
||||
const newText = (e.target.text || '') as string
|
||||
onCellsChanged([{ cell_id: cellId, text: newText }])
|
||||
}
|
||||
})
|
||||
|
||||
// Selection tracking
|
||||
canvas.on('selection:created', (e: FabricEvent) => {
|
||||
if (e.target?.data?.cellId) setSelectedCell(e.target.data.cellId as string)
|
||||
})
|
||||
canvas.on('selection:updated', (e: FabricEvent) => {
|
||||
if (e.target?.data?.cellId) setSelectedCell(e.target.data.cellId as string)
|
||||
})
|
||||
canvas.on('selection:cleared', () => setSelectedCell(null))
|
||||
|
||||
setReady(true)
|
||||
} catch (err) {
|
||||
if (!disposed) setError(err instanceof Error ? err.message : 'Fabric.js konnte nicht geladen werden')
|
||||
}
|
||||
}
|
||||
|
||||
init()
|
||||
|
||||
return () => {
|
||||
disposed = true
|
||||
fabricRef.current?.dispose()
|
||||
fabricRef.current = null
|
||||
}
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [sessionId])
|
||||
|
||||
function addCellObjects(
|
||||
canvas: FabricCanvas,
|
||||
fabricModule: FabricModule,
|
||||
gridCells: GridCell[],
|
||||
imgW: number,
|
||||
imgH: number,
|
||||
) {
|
||||
for (const cell of gridCells) {
|
||||
const color = COL_TYPE_COLORS[cell.col_type] || '#6b7280'
|
||||
const x = (cell.bbox_pct.x / 100) * imgW
|
||||
const y = (cell.bbox_pct.y / 100) * imgH
|
||||
const w = (cell.bbox_pct.w / 100) * imgW
|
||||
const h = (cell.bbox_pct.h / 100) * imgH
|
||||
|
||||
const fontSize = Math.max(8, Math.min(18, h * 0.55))
|
||||
|
||||
const textObj = new fabricModule.IText(cell.text || '', {
|
||||
left: x,
|
||||
top: y,
|
||||
width: w,
|
||||
fontSize,
|
||||
fontFamily: 'monospace',
|
||||
fill: '#000000',
|
||||
backgroundColor: `${color}22`,
|
||||
padding: 2,
|
||||
editable: true,
|
||||
selectable: true,
|
||||
lockScalingFlip: true,
|
||||
data: {
|
||||
cellId: cell.cell_id,
|
||||
colType: cell.col_type,
|
||||
rowIndex: cell.row_index,
|
||||
colIndex: cell.col_index,
|
||||
originalText: cell.text,
|
||||
},
|
||||
})
|
||||
|
||||
// Border colour matches column type
|
||||
textObj.set({
|
||||
borderColor: color,
|
||||
cornerColor: color,
|
||||
cornerSize: 6,
|
||||
transparentCorners: false,
|
||||
} as Record<string, unknown>)
|
||||
|
||||
canvas.add(textObj)
|
||||
}
|
||||
canvas.renderAll()
|
||||
}
|
||||
|
||||
// ---- Opacity slider ----
|
||||
const handleOpacityChange = useCallback((val: number) => {
|
||||
setOpacity(val)
|
||||
const canvas = fabricRef.current
|
||||
if (!canvas) return
|
||||
// Fabric v6: backgroundImage is a direct property on the canvas
|
||||
const bgImg = (canvas as unknown as { backgroundImage?: FabricObject }).backgroundImage
|
||||
if (bgImg) {
|
||||
bgImg.set({ opacity: val / 100 })
|
||||
canvas.renderAll()
|
||||
}
|
||||
}, [])
|
||||
|
||||
// ---- Zoom ----
|
||||
const handleZoomChange = useCallback((val: number) => {
|
||||
setZoom(val)
|
||||
const canvas = fabricRef.current
|
||||
if (!canvas) return
|
||||
;(canvas as unknown as { zoom: number }).zoom = val / 100
|
||||
canvas.requestRenderAll()
|
||||
}, [])
|
||||
|
||||
// ---- Undo / Redo via keyboard ----
|
||||
useEffect(() => {
|
||||
const handler = (e: KeyboardEvent) => {
|
||||
if (!(e.metaKey || e.ctrlKey) || e.key !== 'z') return
|
||||
e.preventDefault()
|
||||
|
||||
const canvas = fabricRef.current
|
||||
if (!canvas) return
|
||||
|
||||
if (e.shiftKey) {
|
||||
// Redo
|
||||
const action = redoStackRef.current.pop()
|
||||
if (!action) return
|
||||
undoStackRef.current.push(action)
|
||||
const obj = canvas.getObjects().find(
|
||||
(o: FabricObject) => o.data?.cellId === action.cellId
|
||||
)
|
||||
if (obj) {
|
||||
obj.set({ text: action.newText } as Record<string, unknown>)
|
||||
canvas.renderAll()
|
||||
onCellsChanged([{ cell_id: action.cellId, text: action.newText }])
|
||||
}
|
||||
} else {
|
||||
// Undo
|
||||
const action = undoStackRef.current.pop()
|
||||
if (!action) return
|
||||
redoStackRef.current.push(action)
|
||||
const obj = canvas.getObjects().find(
|
||||
(o: FabricObject) => o.data?.cellId === action.cellId
|
||||
)
|
||||
if (obj) {
|
||||
obj.set({ text: action.oldText } as Record<string, unknown>)
|
||||
canvas.renderAll()
|
||||
onCellsChanged([{ cell_id: action.cellId, text: action.oldText }])
|
||||
}
|
||||
}
|
||||
}
|
||||
document.addEventListener('keydown', handler)
|
||||
return () => document.removeEventListener('keydown', handler)
|
||||
}, [onCellsChanged])
|
||||
|
||||
// ---- Delete selected cell (via context-menu or Delete key) ----
|
||||
useEffect(() => {
|
||||
const handler = (e: KeyboardEvent) => {
|
||||
if (e.key !== 'Delete' && e.key !== 'Backspace') return
|
||||
// Only delete if not currently editing text inside an IText
|
||||
const canvas = fabricRef.current
|
||||
if (!canvas) return
|
||||
const active = canvas.getActiveObject()
|
||||
if (!active) return
|
||||
// If the IText is in editing mode, let the keypress pass through
|
||||
if ((active as unknown as Record<string, boolean>).isEditing) return
|
||||
e.preventDefault()
|
||||
canvas.remove(active)
|
||||
canvas.discardActiveObject()
|
||||
canvas.renderAll()
|
||||
}
|
||||
document.addEventListener('keydown', handler)
|
||||
return () => document.removeEventListener('keydown', handler)
|
||||
}, [])
|
||||
|
||||
// ---- Export helpers ----
|
||||
const handleExportPdf = useCallback(() => {
|
||||
window.open(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reconstruction/export/pdf`,
|
||||
'_blank'
|
||||
)
|
||||
}, [sessionId])
|
||||
|
||||
const handleExportDocx = useCallback(() => {
|
||||
window.open(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reconstruction/export/docx`,
|
||||
'_blank'
|
||||
)
|
||||
}, [sessionId])
|
||||
|
||||
if (error) {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-8 text-red-500 text-sm">
|
||||
<p>Fabric.js Editor konnte nicht geladen werden:</p>
|
||||
<p className="text-xs mt-1 text-gray-400">{error}</p>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-2">
|
||||
{/* Toolbar */}
|
||||
<div className="flex items-center gap-3 bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 px-3 py-2 text-xs">
|
||||
{/* Opacity slider */}
|
||||
<label className="flex items-center gap-1.5 text-gray-500">
|
||||
Hintergrund
|
||||
<input
|
||||
type="range"
|
||||
min={0} max={100}
|
||||
value={opacity}
|
||||
onChange={e => handleOpacityChange(Number(e.target.value))}
|
||||
className="w-20 h-1 accent-teal-500"
|
||||
/>
|
||||
<span className="w-8 text-right">{opacity}%</span>
|
||||
</label>
|
||||
|
||||
<div className="w-px h-5 bg-gray-300 dark:bg-gray-600" />
|
||||
|
||||
{/* Zoom */}
|
||||
<label className="flex items-center gap-1.5 text-gray-500">
|
||||
Zoom
|
||||
<button onClick={() => handleZoomChange(Math.max(25, zoom - 25))}
|
||||
className="px-1.5 py-0.5 border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700">
|
||||
−
|
||||
</button>
|
||||
<span className="w-8 text-center">{zoom}%</span>
|
||||
<button onClick={() => handleZoomChange(Math.min(200, zoom + 25))}
|
||||
className="px-1.5 py-0.5 border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700">
|
||||
+
|
||||
</button>
|
||||
<button onClick={() => handleZoomChange(100)}
|
||||
className="px-1.5 py-0.5 border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700">
|
||||
Fit
|
||||
</button>
|
||||
</label>
|
||||
|
||||
<div className="w-px h-5 bg-gray-300 dark:bg-gray-600" />
|
||||
|
||||
{/* Selected cell info */}
|
||||
{selectedCell && (
|
||||
<span className="text-gray-400">
|
||||
Zelle: <span className="text-gray-600 dark:text-gray-300">{selectedCell}</span>
|
||||
</span>
|
||||
)}
|
||||
|
||||
<div className="flex-1" />
|
||||
|
||||
{/* Export buttons */}
|
||||
<button onClick={handleExportPdf}
|
||||
className="px-2.5 py-1 border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700">
|
||||
PDF
|
||||
</button>
|
||||
<button onClick={handleExportDocx}
|
||||
className="px-2.5 py-1 border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700">
|
||||
DOCX
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Canvas */}
|
||||
<div className="border rounded-lg overflow-auto dark:border-gray-700 bg-gray-100 dark:bg-gray-900"
|
||||
style={{ maxHeight: '75vh' }}>
|
||||
{!ready && (
|
||||
<div className="flex items-center justify-center py-12">
|
||||
<div className="animate-spin rounded-full h-5 w-5 border-b-2 border-teal-500" />
|
||||
<span className="ml-2 text-sm text-gray-500">Canvas wird geladen...</span>
|
||||
</div>
|
||||
)}
|
||||
<canvas ref={canvasElRef} />
|
||||
</div>
|
||||
|
||||
{/* Legend */}
|
||||
<div className="flex items-center gap-4 text-xs text-gray-500">
|
||||
{Object.entries(COL_TYPE_COLORS).map(([type, color]) => (
|
||||
<span key={type} className="flex items-center gap-1">
|
||||
<span className="w-3 h-3 rounded" style={{ backgroundColor: color + '44', border: `1px solid ${color}` }} />
|
||||
{type.replace('column_', '').replace('page_', '')}
|
||||
</span>
|
||||
))}
|
||||
<span className="ml-auto text-gray-400">Doppelklick = Text bearbeiten | Delete = Zelle entfernen | Cmd+Z = Undo</span>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
143
admin-lehrer/components/ocr-pipeline/ImageCompareView.tsx
Normal file
143
admin-lehrer/components/ocr-pipeline/ImageCompareView.tsx
Normal file
@@ -0,0 +1,143 @@
|
||||
'use client'
|
||||
|
||||
import { useState } from 'react'
|
||||
|
||||
const A4_WIDTH_MM = 210
|
||||
const A4_HEIGHT_MM = 297
|
||||
|
||||
interface ImageCompareViewProps {
|
||||
originalUrl: string | null
|
||||
deskewedUrl: string | null
|
||||
showGrid: boolean
|
||||
showGridLeft?: boolean
|
||||
showBinarized: boolean
|
||||
binarizedUrl: string | null
|
||||
leftLabel?: string
|
||||
rightLabel?: string
|
||||
}
|
||||
|
||||
function MmGridOverlay() {
|
||||
const lines: React.ReactNode[] = []
|
||||
|
||||
// Vertical lines every 10mm
|
||||
for (let mm = 0; mm <= A4_WIDTH_MM; mm += 10) {
|
||||
const x = (mm / A4_WIDTH_MM) * 100
|
||||
const is50 = mm % 50 === 0
|
||||
lines.push(
|
||||
<line
|
||||
key={`v-${mm}`}
|
||||
x1={x} y1={0} x2={x} y2={100}
|
||||
stroke={is50 ? 'rgba(59, 130, 246, 0.4)' : 'rgba(59, 130, 246, 0.15)'}
|
||||
strokeWidth={is50 ? 0.12 : 0.05}
|
||||
/>
|
||||
)
|
||||
// Label every 50mm
|
||||
if (is50 && mm > 0) {
|
||||
lines.push(
|
||||
<text key={`vl-${mm}`} x={x} y={1.2} fill="rgba(59,130,246,0.6)" fontSize="1.2" textAnchor="middle">
|
||||
{mm}
|
||||
</text>
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Horizontal lines every 10mm
|
||||
for (let mm = 0; mm <= A4_HEIGHT_MM; mm += 10) {
|
||||
const y = (mm / A4_HEIGHT_MM) * 100
|
||||
const is50 = mm % 50 === 0
|
||||
lines.push(
|
||||
<line
|
||||
key={`h-${mm}`}
|
||||
x1={0} y1={y} x2={100} y2={y}
|
||||
stroke={is50 ? 'rgba(59, 130, 246, 0.4)' : 'rgba(59, 130, 246, 0.15)'}
|
||||
strokeWidth={is50 ? 0.12 : 0.05}
|
||||
/>
|
||||
)
|
||||
if (is50 && mm > 0) {
|
||||
lines.push(
|
||||
<text key={`hl-${mm}`} x={0.5} y={y + 0.6} fill="rgba(59,130,246,0.6)" fontSize="1.2">
|
||||
{mm}
|
||||
</text>
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<svg
|
||||
viewBox="0 0 100 100"
|
||||
preserveAspectRatio="none"
|
||||
className="absolute inset-0 w-full h-full pointer-events-none"
|
||||
style={{ zIndex: 10 }}
|
||||
>
|
||||
<g style={{ pointerEvents: 'none' }}>{lines}</g>
|
||||
</svg>
|
||||
)
|
||||
}
|
||||
|
||||
export function ImageCompareView({
|
||||
originalUrl,
|
||||
deskewedUrl,
|
||||
showGrid,
|
||||
showGridLeft,
|
||||
showBinarized,
|
||||
binarizedUrl,
|
||||
leftLabel,
|
||||
rightLabel,
|
||||
}: ImageCompareViewProps) {
|
||||
const [leftError, setLeftError] = useState(false)
|
||||
const [rightError, setRightError] = useState(false)
|
||||
|
||||
const rightUrl = showBinarized && binarizedUrl ? binarizedUrl : deskewedUrl
|
||||
|
||||
return (
|
||||
<div className="grid grid-cols-1 lg:grid-cols-2 gap-4">
|
||||
{/* Left: Original */}
|
||||
<div className="space-y-2">
|
||||
<h3 className="text-sm font-medium text-gray-500 dark:text-gray-400">{leftLabel || 'Original (unbearbeitet)'}</h3>
|
||||
<div className="relative bg-gray-100 dark:bg-gray-900 rounded-lg overflow-hidden border border-gray-200 dark:border-gray-700"
|
||||
style={{ aspectRatio: '210/297' }}>
|
||||
{originalUrl && !leftError ? (
|
||||
<>
|
||||
<img
|
||||
src={originalUrl}
|
||||
alt="Original Scan"
|
||||
className="w-full h-full object-contain"
|
||||
onError={() => setLeftError(true)}
|
||||
/>
|
||||
{showGridLeft && <MmGridOverlay />}
|
||||
</>
|
||||
) : (
|
||||
<div className="flex items-center justify-center h-full text-gray-400">
|
||||
{leftError ? 'Fehler beim Laden' : 'Noch kein Bild'}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Right: Deskewed with Grid */}
|
||||
<div className="space-y-2">
|
||||
<h3 className="text-sm font-medium text-gray-500 dark:text-gray-400">
|
||||
{rightLabel || `${showBinarized ? 'Binarisiert' : 'Begradigt'}${showGrid ? ' + Raster (mm)' : ''}`}
|
||||
</h3>
|
||||
<div className="relative bg-gray-100 dark:bg-gray-900 rounded-lg overflow-hidden border border-gray-200 dark:border-gray-700"
|
||||
style={{ aspectRatio: '210/297' }}>
|
||||
{rightUrl && !rightError ? (
|
||||
<>
|
||||
<img
|
||||
src={rightUrl}
|
||||
alt={rightLabel || 'Bearbeitetes Bild'}
|
||||
className="w-full h-full object-contain"
|
||||
onError={() => setRightError(true)}
|
||||
/>
|
||||
{showGrid && <MmGridOverlay />}
|
||||
</>
|
||||
) : (
|
||||
<div className="flex items-center justify-center h-full text-gray-400">
|
||||
{rightError ? 'Fehler beim Laden' : `${rightLabel || 'Verarbeitung'} laeuft...`}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
359
admin-lehrer/components/ocr-pipeline/ManualColumnEditor.tsx
Normal file
359
admin-lehrer/components/ocr-pipeline/ManualColumnEditor.tsx
Normal file
@@ -0,0 +1,359 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useRef, useState } from 'react'
|
||||
import type { ColumnTypeKey, PageRegion } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
const COLUMN_TYPES: { value: ColumnTypeKey; label: string }[] = [
|
||||
{ value: 'column_en', label: 'EN' },
|
||||
{ value: 'column_de', label: 'DE' },
|
||||
{ value: 'column_example', label: 'Beispiel' },
|
||||
{ value: 'column_text', label: 'Text' },
|
||||
{ value: 'page_ref', label: 'Seite' },
|
||||
{ value: 'column_marker', label: 'Marker' },
|
||||
{ value: 'column_ignore', label: 'Ignorieren' },
|
||||
]
|
||||
|
||||
const TYPE_OVERLAY_COLORS: Record<string, string> = {
|
||||
column_en: 'rgba(59, 130, 246, 0.12)',
|
||||
column_de: 'rgba(34, 197, 94, 0.12)',
|
||||
column_example: 'rgba(249, 115, 22, 0.12)',
|
||||
column_text: 'rgba(6, 182, 212, 0.12)',
|
||||
page_ref: 'rgba(168, 85, 247, 0.12)',
|
||||
column_marker: 'rgba(239, 68, 68, 0.12)',
|
||||
column_ignore: 'rgba(128, 128, 128, 0.06)',
|
||||
}
|
||||
|
||||
const TYPE_BADGE_COLORS: Record<string, string> = {
|
||||
column_en: 'bg-blue-100 text-blue-700 dark:bg-blue-900/30 dark:text-blue-400',
|
||||
column_de: 'bg-green-100 text-green-700 dark:bg-green-900/30 dark:text-green-400',
|
||||
column_example: 'bg-orange-100 text-orange-700 dark:bg-orange-900/30 dark:text-orange-400',
|
||||
column_text: 'bg-cyan-100 text-cyan-700 dark:bg-cyan-900/30 dark:text-cyan-400',
|
||||
page_ref: 'bg-purple-100 text-purple-700 dark:bg-purple-900/30 dark:text-purple-400',
|
||||
column_marker: 'bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400',
|
||||
column_ignore: 'bg-gray-100 text-gray-500 dark:bg-gray-700/30 dark:text-gray-500',
|
||||
}
|
||||
|
||||
// Default column type sequence for newly created columns
|
||||
const DEFAULT_TYPE_SEQUENCE: ColumnTypeKey[] = [
|
||||
'page_ref', 'column_en', 'column_de', 'column_example', 'column_text',
|
||||
]
|
||||
|
||||
const MIN_DIVIDER_DISTANCE_PERCENT = 2 // Minimum 2% apart
|
||||
|
||||
interface ManualColumnEditorProps {
|
||||
imageUrl: string
|
||||
imageWidth: number
|
||||
imageHeight: number
|
||||
onApply: (columns: PageRegion[]) => void
|
||||
onCancel: () => void
|
||||
applying: boolean
|
||||
mode?: 'manual' | 'ground-truth'
|
||||
layout?: 'two-column' | 'stacked'
|
||||
initialDividers?: number[]
|
||||
initialColumnTypes?: ColumnTypeKey[]
|
||||
}
|
||||
|
||||
export function ManualColumnEditor({
|
||||
imageUrl,
|
||||
imageWidth,
|
||||
imageHeight,
|
||||
onApply,
|
||||
onCancel,
|
||||
applying,
|
||||
mode = 'manual',
|
||||
layout = 'two-column',
|
||||
initialDividers,
|
||||
initialColumnTypes,
|
||||
}: ManualColumnEditorProps) {
|
||||
const containerRef = useRef<HTMLDivElement>(null)
|
||||
const [dividers, setDividers] = useState<number[]>(initialDividers ?? [])
|
||||
const [columnTypes, setColumnTypes] = useState<ColumnTypeKey[]>(initialColumnTypes ?? [])
|
||||
const [dragging, setDragging] = useState<number | null>(null)
|
||||
const [imageLoaded, setImageLoaded] = useState(false)
|
||||
|
||||
const isGT = mode === 'ground-truth'
|
||||
|
||||
// Sync columnTypes length when dividers change
|
||||
useEffect(() => {
|
||||
const numColumns = dividers.length + 1
|
||||
setColumnTypes(prev => {
|
||||
if (prev.length === numColumns) return prev
|
||||
const next = [...prev]
|
||||
while (next.length < numColumns) {
|
||||
const idx = next.length
|
||||
next.push(DEFAULT_TYPE_SEQUENCE[idx] || 'column_text')
|
||||
}
|
||||
while (next.length > numColumns) {
|
||||
next.pop()
|
||||
}
|
||||
return next
|
||||
})
|
||||
}, [dividers.length])
|
||||
|
||||
const getXPercent = useCallback((clientX: number): number => {
|
||||
if (!containerRef.current) return 0
|
||||
const rect = containerRef.current.getBoundingClientRect()
|
||||
const pct = ((clientX - rect.left) / rect.width) * 100
|
||||
return Math.max(0, Math.min(100, pct))
|
||||
}, [])
|
||||
|
||||
const canPlaceDivider = useCallback((xPct: number, excludeIndex?: number): boolean => {
|
||||
for (let i = 0; i < dividers.length; i++) {
|
||||
if (i === excludeIndex) continue
|
||||
if (Math.abs(dividers[i] - xPct) < MIN_DIVIDER_DISTANCE_PERCENT) return false
|
||||
}
|
||||
return xPct > MIN_DIVIDER_DISTANCE_PERCENT && xPct < (100 - MIN_DIVIDER_DISTANCE_PERCENT)
|
||||
}, [dividers])
|
||||
|
||||
// Click on image to add a divider
|
||||
const handleImageClick = useCallback((e: React.MouseEvent) => {
|
||||
if (dragging !== null) return
|
||||
// Don't add if clicking on a divider handle
|
||||
if ((e.target as HTMLElement).dataset.divider) return
|
||||
|
||||
const xPct = getXPercent(e.clientX)
|
||||
if (!canPlaceDivider(xPct)) return
|
||||
|
||||
setDividers(prev => [...prev, xPct].sort((a, b) => a - b))
|
||||
}, [dragging, getXPercent, canPlaceDivider])
|
||||
|
||||
// Drag handlers
|
||||
const handleDividerMouseDown = useCallback((e: React.MouseEvent, index: number) => {
|
||||
e.stopPropagation()
|
||||
e.preventDefault()
|
||||
setDragging(index)
|
||||
}, [])
|
||||
|
||||
useEffect(() => {
|
||||
if (dragging === null) return
|
||||
|
||||
const handleMouseMove = (e: MouseEvent) => {
|
||||
const xPct = getXPercent(e.clientX)
|
||||
if (canPlaceDivider(xPct, dragging)) {
|
||||
setDividers(prev => {
|
||||
const next = [...prev]
|
||||
next[dragging] = xPct
|
||||
return next.sort((a, b) => a - b)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
const handleMouseUp = () => {
|
||||
setDragging(null)
|
||||
}
|
||||
|
||||
window.addEventListener('mousemove', handleMouseMove)
|
||||
window.addEventListener('mouseup', handleMouseUp)
|
||||
return () => {
|
||||
window.removeEventListener('mousemove', handleMouseMove)
|
||||
window.removeEventListener('mouseup', handleMouseUp)
|
||||
}
|
||||
}, [dragging, getXPercent, canPlaceDivider])
|
||||
|
||||
const removeDivider = useCallback((index: number) => {
|
||||
setDividers(prev => prev.filter((_, i) => i !== index))
|
||||
}, [])
|
||||
|
||||
const updateColumnType = useCallback((colIndex: number, type: ColumnTypeKey) => {
|
||||
setColumnTypes(prev => {
|
||||
const next = [...prev]
|
||||
next[colIndex] = type
|
||||
return next
|
||||
})
|
||||
}, [])
|
||||
|
||||
const handleApply = useCallback(() => {
|
||||
// Build PageRegion array from dividers
|
||||
const sorted = [...dividers].sort((a, b) => a - b)
|
||||
const columns: PageRegion[] = []
|
||||
|
||||
for (let i = 0; i <= sorted.length; i++) {
|
||||
const leftPct = i === 0 ? 0 : sorted[i - 1]
|
||||
const rightPct = i === sorted.length ? 100 : sorted[i]
|
||||
const x = Math.round((leftPct / 100) * imageWidth)
|
||||
const w = Math.round(((rightPct - leftPct) / 100) * imageWidth)
|
||||
|
||||
columns.push({
|
||||
type: columnTypes[i] || 'column_text',
|
||||
x,
|
||||
y: 0,
|
||||
width: w,
|
||||
height: imageHeight,
|
||||
classification_confidence: 1.0,
|
||||
classification_method: 'manual',
|
||||
})
|
||||
}
|
||||
|
||||
onApply(columns)
|
||||
}, [dividers, columnTypes, imageWidth, imageHeight, onApply])
|
||||
|
||||
// Compute column regions for overlay
|
||||
const sorted = [...dividers].sort((a, b) => a - b)
|
||||
const columnRegions = Array.from({ length: sorted.length + 1 }, (_, i) => ({
|
||||
leftPct: i === 0 ? 0 : sorted[i - 1],
|
||||
rightPct: i === sorted.length ? 100 : sorted[i],
|
||||
type: columnTypes[i] || 'column_text',
|
||||
}))
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Layout: image + controls */}
|
||||
<div className={layout === 'stacked' ? 'space-y-4' : 'grid grid-cols-2 gap-4'}>
|
||||
{/* Left: Interactive image */}
|
||||
<div>
|
||||
<div className="flex items-center justify-between mb-1">
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400">
|
||||
Klicken um Trennlinien zu setzen
|
||||
</div>
|
||||
<button
|
||||
onClick={onCancel}
|
||||
className="text-xs px-2 py-0.5 text-gray-500 hover:text-gray-700 dark:text-gray-400 dark:hover:text-gray-200"
|
||||
>
|
||||
Abbrechen
|
||||
</button>
|
||||
</div>
|
||||
<div
|
||||
ref={containerRef}
|
||||
className="relative border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900 cursor-crosshair select-none"
|
||||
onClick={handleImageClick}
|
||||
>
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
<img
|
||||
src={imageUrl}
|
||||
alt="Entzerrtes Bild"
|
||||
className="w-full h-auto block"
|
||||
draggable={false}
|
||||
onLoad={() => setImageLoaded(true)}
|
||||
/>
|
||||
|
||||
{imageLoaded && (
|
||||
<>
|
||||
{/* Column overlays */}
|
||||
{columnRegions.map((region, i) => (
|
||||
<div
|
||||
key={`col-${i}`}
|
||||
className="absolute top-0 bottom-0 pointer-events-none"
|
||||
style={{
|
||||
left: `${region.leftPct}%`,
|
||||
width: `${region.rightPct - region.leftPct}%`,
|
||||
backgroundColor: TYPE_OVERLAY_COLORS[region.type] || 'rgba(128,128,128,0.08)',
|
||||
}}
|
||||
>
|
||||
<span className="absolute top-1 left-1/2 -translate-x-1/2 text-[10px] font-medium text-gray-600 dark:text-gray-300 bg-white/80 dark:bg-gray-800/80 px-1 rounded">
|
||||
{i + 1}
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
|
||||
{/* Divider lines */}
|
||||
{sorted.map((xPct, i) => (
|
||||
<div
|
||||
key={`div-${i}`}
|
||||
data-divider="true"
|
||||
className="absolute top-0 bottom-0 group"
|
||||
style={{
|
||||
left: `${xPct}%`,
|
||||
transform: 'translateX(-50%)',
|
||||
width: '12px',
|
||||
cursor: 'col-resize',
|
||||
zIndex: 10,
|
||||
}}
|
||||
onMouseDown={(e) => handleDividerMouseDown(e, i)}
|
||||
>
|
||||
{/* Visible line */}
|
||||
<div
|
||||
data-divider="true"
|
||||
className="absolute top-0 bottom-0 left-1/2 -translate-x-1/2 w-0.5 border-l-2 border-dashed border-red-500"
|
||||
/>
|
||||
{/* Delete button */}
|
||||
<button
|
||||
data-divider="true"
|
||||
onClick={(e) => {
|
||||
e.stopPropagation()
|
||||
removeDivider(i)
|
||||
}}
|
||||
className="absolute top-2 left-1/2 -translate-x-1/2 w-4 h-4 bg-red-500 text-white rounded-full text-[10px] leading-none flex items-center justify-center opacity-0 group-hover:opacity-100 transition-opacity z-20"
|
||||
title="Linie entfernen"
|
||||
>
|
||||
x
|
||||
</button>
|
||||
</div>
|
||||
))}
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Right: Column type assignment + actions */}
|
||||
<div className="space-y-4">
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Spaltentypen
|
||||
</div>
|
||||
|
||||
{dividers.length === 0 ? (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-6 text-center">
|
||||
<div className="text-3xl mb-2">👆</div>
|
||||
<p className="text-sm text-gray-500 dark:text-gray-400">
|
||||
Klicken Sie auf das Bild links, um vertikale Trennlinien zwischen den Spalten zu setzen.
|
||||
</p>
|
||||
<p className="text-xs text-gray-400 dark:text-gray-500 mt-2">
|
||||
Linien koennen per Drag verschoben und per Hover geloescht werden.
|
||||
</p>
|
||||
</div>
|
||||
) : (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-3">
|
||||
<div className="text-sm text-gray-600 dark:text-gray-400">
|
||||
<span className="font-medium text-gray-800 dark:text-gray-200">
|
||||
{dividers.length} Linien = {dividers.length + 1} Spalten
|
||||
</span>
|
||||
</div>
|
||||
<div className="grid gap-2">
|
||||
{columnRegions.map((region, i) => (
|
||||
<div key={i} className="flex items-center gap-3">
|
||||
<span className={`w-16 text-center px-2 py-0.5 rounded text-xs font-medium ${TYPE_BADGE_COLORS[region.type] || 'bg-gray-100 text-gray-600'}`}>
|
||||
Spalte {i + 1}
|
||||
</span>
|
||||
<select
|
||||
value={columnTypes[i] || 'column_text'}
|
||||
onChange={(e) => updateColumnType(i, e.target.value as ColumnTypeKey)}
|
||||
className="text-sm border border-gray-200 dark:border-gray-600 rounded px-2 py-1 bg-white dark:bg-gray-700 text-gray-800 dark:text-gray-200"
|
||||
>
|
||||
{COLUMN_TYPES.map(t => (
|
||||
<option key={t.value} value={t.value}>{t.label}</option>
|
||||
))}
|
||||
</select>
|
||||
<span className="text-xs text-gray-400 font-mono">
|
||||
{Math.round(region.rightPct - region.leftPct)}%
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Action buttons */}
|
||||
<div className="flex flex-col gap-2">
|
||||
<button
|
||||
onClick={handleApply}
|
||||
disabled={dividers.length === 0 || applying}
|
||||
className="w-full px-4 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors text-sm font-medium disabled:opacity-50 disabled:cursor-not-allowed"
|
||||
>
|
||||
{applying
|
||||
? 'Wird gespeichert...'
|
||||
: isGT
|
||||
? `${dividers.length + 1} Spalten als Ground Truth speichern`
|
||||
: `${dividers.length + 1} Spalten uebernehmen`}
|
||||
</button>
|
||||
<button
|
||||
onClick={() => setDividers([])}
|
||||
disabled={dividers.length === 0}
|
||||
className="text-xs px-3 py-2 text-gray-500 hover:text-gray-700 dark:text-gray-400 dark:hover:text-gray-200 disabled:opacity-50"
|
||||
>
|
||||
Alle Linien entfernen
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
115
admin-lehrer/components/ocr-pipeline/PipelineStepper.tsx
Normal file
115
admin-lehrer/components/ocr-pipeline/PipelineStepper.tsx
Normal file
@@ -0,0 +1,115 @@
|
||||
'use client'
|
||||
|
||||
import { PipelineStep, DocumentTypeResult } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
const DOC_TYPE_LABELS: Record<string, string> = {
|
||||
vocab_table: 'Vokabeltabelle',
|
||||
full_text: 'Volltext',
|
||||
generic_table: 'Tabelle',
|
||||
}
|
||||
|
||||
interface PipelineStepperProps {
|
||||
steps: PipelineStep[]
|
||||
currentStep: number
|
||||
onStepClick: (index: number) => void
|
||||
onReprocess?: (index: number) => void
|
||||
docTypeResult?: DocumentTypeResult | null
|
||||
onDocTypeChange?: (docType: DocumentTypeResult['doc_type']) => void
|
||||
}
|
||||
|
||||
export function PipelineStepper({
|
||||
steps,
|
||||
currentStep,
|
||||
onStepClick,
|
||||
onReprocess,
|
||||
docTypeResult,
|
||||
onDocTypeChange,
|
||||
}: PipelineStepperProps) {
|
||||
return (
|
||||
<div className="space-y-2">
|
||||
<div className="flex items-center justify-between px-4 py-3 bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700">
|
||||
{steps.map((step, index) => {
|
||||
const isActive = index === currentStep
|
||||
const isCompleted = step.status === 'completed'
|
||||
const isFailed = step.status === 'failed'
|
||||
const isSkipped = step.status === 'skipped'
|
||||
const isClickable = (index <= currentStep || isCompleted) && !isSkipped
|
||||
|
||||
return (
|
||||
<div key={step.id} className="flex items-center">
|
||||
{index > 0 && (
|
||||
<div
|
||||
className={`h-0.5 w-8 mx-1 ${
|
||||
isSkipped
|
||||
? 'bg-gray-200 dark:bg-gray-700 border-t border-dashed border-gray-400'
|
||||
: index <= currentStep ? 'bg-teal-400' : 'bg-gray-300 dark:bg-gray-600'
|
||||
}`}
|
||||
/>
|
||||
)}
|
||||
<div className="relative group">
|
||||
<button
|
||||
onClick={() => isClickable && onStepClick(index)}
|
||||
disabled={!isClickable}
|
||||
className={`flex items-center gap-1.5 px-3 py-1.5 rounded-full text-sm font-medium transition-all ${
|
||||
isSkipped
|
||||
? 'bg-gray-100 text-gray-400 dark:bg-gray-800 dark:text-gray-600 line-through'
|
||||
: isActive
|
||||
? 'bg-teal-100 text-teal-700 dark:bg-teal-900/40 dark:text-teal-300 ring-2 ring-teal-400'
|
||||
: isCompleted
|
||||
? 'bg-green-100 text-green-700 dark:bg-green-900/40 dark:text-green-300'
|
||||
: isFailed
|
||||
? 'bg-red-100 text-red-700 dark:bg-red-900/40 dark:text-red-300'
|
||||
: 'text-gray-400 dark:text-gray-500'
|
||||
} ${isClickable ? 'cursor-pointer hover:opacity-80' : 'cursor-default'}`}
|
||||
>
|
||||
<span className="text-base">
|
||||
{isSkipped ? '-' : isCompleted ? '\u2713' : isFailed ? '\u2717' : step.icon}
|
||||
</span>
|
||||
<span className="hidden sm:inline">{step.name}</span>
|
||||
<span className="sm:hidden">{index + 1}</span>
|
||||
</button>
|
||||
{/* Reprocess button — shown on completed steps on hover */}
|
||||
{isCompleted && onReprocess && (
|
||||
<button
|
||||
onClick={(e) => { e.stopPropagation(); onReprocess(index) }}
|
||||
className="absolute -top-1 -right-1 w-4 h-4 bg-orange-500 text-white rounded-full text-[9px] leading-none opacity-0 group-hover:opacity-100 transition-opacity flex items-center justify-center"
|
||||
title={`Ab hier neu verarbeiten`}
|
||||
>
|
||||
↻
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
|
||||
{/* Document type badge */}
|
||||
{docTypeResult && (
|
||||
<div className="flex items-center gap-2 px-4 py-2 bg-blue-50 dark:bg-blue-900/20 rounded-lg border border-blue-200 dark:border-blue-800 text-sm">
|
||||
<span className="text-blue-600 dark:text-blue-400 font-medium">
|
||||
Dokumenttyp:
|
||||
</span>
|
||||
{onDocTypeChange ? (
|
||||
<select
|
||||
value={docTypeResult.doc_type}
|
||||
onChange={(e) => onDocTypeChange(e.target.value as DocumentTypeResult['doc_type'])}
|
||||
className="bg-white dark:bg-gray-800 border border-blue-300 dark:border-blue-700 rounded px-2 py-0.5 text-sm text-blue-700 dark:text-blue-300"
|
||||
>
|
||||
<option value="vocab_table">Vokabeltabelle</option>
|
||||
<option value="generic_table">Tabelle (generisch)</option>
|
||||
<option value="full_text">Volltext</option>
|
||||
</select>
|
||||
) : (
|
||||
<span className="text-blue-700 dark:text-blue-300">
|
||||
{DOC_TYPE_LABELS[docTypeResult.doc_type] || docTypeResult.doc_type}
|
||||
</span>
|
||||
)}
|
||||
<span className="text-blue-400 dark:text-blue-500 text-xs">
|
||||
({Math.round(docTypeResult.confidence * 100)}% Konfidenz)
|
||||
</span>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
432
admin-lehrer/components/ocr-pipeline/StepColumnDetection.tsx
Normal file
432
admin-lehrer/components/ocr-pipeline/StepColumnDetection.tsx
Normal file
@@ -0,0 +1,432 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useState } from 'react'
|
||||
import type { ColumnResult, ColumnGroundTruth, PageRegion, SubSession } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
import { ColumnControls } from './ColumnControls'
|
||||
import { ManualColumnEditor } from './ManualColumnEditor'
|
||||
import type { ColumnTypeKey } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
type ViewMode = 'normal' | 'ground-truth' | 'manual'
|
||||
|
||||
interface StepColumnDetectionProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
onBoxSessionsCreated?: (subSessions: SubSession[]) => void
|
||||
}
|
||||
|
||||
/** Convert PageRegion[] to divider percentages + column types for ManualColumnEditor */
|
||||
function columnsToEditorState(
|
||||
columns: PageRegion[],
|
||||
imageWidth: number
|
||||
): { dividers: number[]; columnTypes: ColumnTypeKey[] } {
|
||||
if (!columns.length || !imageWidth) return { dividers: [], columnTypes: [] }
|
||||
|
||||
const sorted = [...columns].sort((a, b) => a.x - b.x)
|
||||
const dividers: number[] = []
|
||||
const columnTypes: ColumnTypeKey[] = sorted.map(c => c.type)
|
||||
|
||||
for (let i = 1; i < sorted.length; i++) {
|
||||
const xPct = (sorted[i].x / imageWidth) * 100
|
||||
dividers.push(xPct)
|
||||
}
|
||||
|
||||
return { dividers, columnTypes }
|
||||
}
|
||||
|
||||
export function StepColumnDetection({ sessionId, onNext, onBoxSessionsCreated }: StepColumnDetectionProps) {
|
||||
const [columnResult, setColumnResult] = useState<ColumnResult | null>(null)
|
||||
const [detecting, setDetecting] = useState(false)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [viewMode, setViewMode] = useState<ViewMode>('normal')
|
||||
const [applying, setApplying] = useState(false)
|
||||
const [imageDimensions, setImageDimensions] = useState<{ width: number; height: number } | null>(null)
|
||||
const [savedGtColumns, setSavedGtColumns] = useState<PageRegion[] | null>(null)
|
||||
const [creatingBoxSessions, setCreatingBoxSessions] = useState(false)
|
||||
const [existingSubSessions, setExistingSubSessions] = useState<SubSession[] | null>(null)
|
||||
const [isSubSession, setIsSubSession] = useState(false)
|
||||
|
||||
// Fetch session info (image dimensions) + check for cached column result
|
||||
useEffect(() => {
|
||||
if (!sessionId || imageDimensions) return
|
||||
|
||||
const fetchSessionInfo = async () => {
|
||||
try {
|
||||
const infoRes = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
|
||||
if (infoRes.ok) {
|
||||
const info = await infoRes.json()
|
||||
if (info.image_width && info.image_height) {
|
||||
setImageDimensions({ width: info.image_width, height: info.image_height })
|
||||
}
|
||||
const isSub = !!info.parent_session_id
|
||||
setIsSubSession(isSub)
|
||||
if (info.sub_sessions && info.sub_sessions.length > 0) {
|
||||
setExistingSubSessions(info.sub_sessions)
|
||||
onBoxSessionsCreated?.(info.sub_sessions)
|
||||
}
|
||||
if (info.column_result) {
|
||||
setColumnResult(info.column_result)
|
||||
// Sub-session with pseudo-column already set → auto-advance
|
||||
if (isSub) {
|
||||
onNext()
|
||||
return
|
||||
}
|
||||
return
|
||||
}
|
||||
// Sub-session without columns → auto-detect (creates pseudo-column)
|
||||
if (isSub) {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/columns`, { method: 'POST' })
|
||||
if (res.ok) {
|
||||
const data: ColumnResult = await res.json()
|
||||
setColumnResult(data)
|
||||
onNext()
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to fetch session info:', e)
|
||||
}
|
||||
|
||||
// No cached result - run auto-detection
|
||||
runAutoDetection()
|
||||
}
|
||||
|
||||
fetchSessionInfo()
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [sessionId])
|
||||
|
||||
// Load saved GT if exists
|
||||
useEffect(() => {
|
||||
if (!sessionId) return
|
||||
const fetchGt = async () => {
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/ground-truth/columns`)
|
||||
if (res.ok) {
|
||||
const data = await res.json()
|
||||
const corrected = data.columns_gt?.corrected_columns
|
||||
if (corrected) setSavedGtColumns(corrected)
|
||||
}
|
||||
} catch {
|
||||
// No saved GT - that's fine
|
||||
}
|
||||
}
|
||||
fetchGt()
|
||||
}, [sessionId])
|
||||
|
||||
const runAutoDetection = useCallback(async () => {
|
||||
if (!sessionId) return
|
||||
setDetecting(true)
|
||||
setError(null)
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/columns`, {
|
||||
method: 'POST',
|
||||
})
|
||||
if (!res.ok) {
|
||||
const err = await res.json().catch(() => ({ detail: res.statusText }))
|
||||
throw new Error(err.detail || 'Spaltenerkennung fehlgeschlagen')
|
||||
}
|
||||
const data: ColumnResult = await res.json()
|
||||
setColumnResult(data)
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||
} finally {
|
||||
setDetecting(false)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
const handleRerun = useCallback(() => {
|
||||
runAutoDetection()
|
||||
}, [runAutoDetection])
|
||||
|
||||
const handleGroundTruth = useCallback(async (gt: ColumnGroundTruth) => {
|
||||
if (!sessionId) return
|
||||
try {
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/ground-truth/columns`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(gt),
|
||||
})
|
||||
} catch (e) {
|
||||
console.error('Ground truth save failed:', e)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
const handleManualApply = useCallback(async (columns: PageRegion[]) => {
|
||||
if (!sessionId) return
|
||||
setApplying(true)
|
||||
setError(null)
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/columns/manual`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ columns }),
|
||||
})
|
||||
if (!res.ok) {
|
||||
const err = await res.json().catch(() => ({ detail: res.statusText }))
|
||||
throw new Error(err.detail || 'Manuelle Spalten konnten nicht gespeichert werden')
|
||||
}
|
||||
const data = await res.json()
|
||||
setColumnResult({
|
||||
columns: data.columns,
|
||||
duration_seconds: data.duration_seconds ?? 0,
|
||||
})
|
||||
setViewMode('normal')
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Fehler beim Speichern')
|
||||
} finally {
|
||||
setApplying(false)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
const handleGtApply = useCallback(async (columns: PageRegion[]) => {
|
||||
if (!sessionId) return
|
||||
setApplying(true)
|
||||
setError(null)
|
||||
try {
|
||||
const gt: ColumnGroundTruth = {
|
||||
is_correct: false,
|
||||
corrected_columns: columns,
|
||||
}
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/ground-truth/columns`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(gt),
|
||||
})
|
||||
setSavedGtColumns(columns)
|
||||
setViewMode('normal')
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Fehler beim Speichern')
|
||||
} finally {
|
||||
setApplying(false)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
// Count box zones from column result
|
||||
const boxZones = columnResult?.zones?.filter(z => z.zone_type === 'box') || []
|
||||
const boxCount = boxZones.length
|
||||
|
||||
const createBoxSessions = useCallback(async () => {
|
||||
if (!sessionId) return
|
||||
setCreatingBoxSessions(true)
|
||||
setError(null)
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/create-box-sessions`, {
|
||||
method: 'POST',
|
||||
})
|
||||
if (!res.ok) {
|
||||
const err = await res.json().catch(() => ({ detail: res.statusText }))
|
||||
throw new Error(err.detail || 'Box-Sessions konnten nicht erstellt werden')
|
||||
}
|
||||
const data = await res.json()
|
||||
const subs: SubSession[] = data.sub_sessions.map((s: { id: string; name?: string; box_index: number }) => ({
|
||||
id: s.id,
|
||||
name: s.name || `Box ${s.box_index + 1}`,
|
||||
box_index: s.box_index,
|
||||
current_step: 1,
|
||||
status: 'pending',
|
||||
}))
|
||||
setExistingSubSessions(subs)
|
||||
onBoxSessionsCreated?.(subs)
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Fehler beim Erstellen der Box-Sessions')
|
||||
} finally {
|
||||
setCreatingBoxSessions(false)
|
||||
}
|
||||
}, [sessionId, onBoxSessionsCreated])
|
||||
|
||||
if (!sessionId) {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||
<div className="text-5xl mb-4">📊</div>
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||
Schritt 3: Spaltenerkennung
|
||||
</h3>
|
||||
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||
Bitte zuerst Schritt 1 und 2 abschliessen.
|
||||
</p>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const dewarpedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
|
||||
const overlayUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/columns-overlay`
|
||||
|
||||
// Pre-compute editor state from saved GT or auto columns for GT mode
|
||||
const gtInitial = savedGtColumns
|
||||
? columnsToEditorState(savedGtColumns, imageDimensions?.width ?? 1000)
|
||||
: undefined
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Loading indicator */}
|
||||
{detecting && (
|
||||
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
|
||||
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
|
||||
Spaltenerkennung laeuft...
|
||||
</div>
|
||||
)}
|
||||
|
||||
{viewMode === 'manual' ? (
|
||||
/* Manual column editor - overwrites column_result */
|
||||
<ManualColumnEditor
|
||||
imageUrl={dewarpedUrl}
|
||||
imageWidth={imageDimensions?.width ?? 1000}
|
||||
imageHeight={imageDimensions?.height ?? 1400}
|
||||
onApply={handleManualApply}
|
||||
onCancel={() => setViewMode('normal')}
|
||||
applying={applying}
|
||||
mode="manual"
|
||||
/>
|
||||
) : viewMode === 'ground-truth' ? (
|
||||
/* GT mode: auto result (left, readonly) + GT editor (right) */
|
||||
<div className="grid grid-cols-2 gap-4">
|
||||
{/* Left: Auto result (readonly overlay) */}
|
||||
<div>
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Auto-Ergebnis (readonly)
|
||||
</div>
|
||||
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900">
|
||||
{columnResult ? (
|
||||
// eslint-disable-next-line @next/next/no-img-element
|
||||
<img
|
||||
src={`${overlayUrl}?t=${Date.now()}`}
|
||||
alt="Auto Spalten-Overlay"
|
||||
className="w-full h-auto"
|
||||
/>
|
||||
) : (
|
||||
<div className="aspect-[3/4] flex items-center justify-center text-gray-400 text-sm">
|
||||
Keine Auto-Daten
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
{/* Auto column list */}
|
||||
{columnResult && (
|
||||
<div className="mt-2 space-y-1">
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400">
|
||||
Auto: {columnResult.columns.length} Spalten
|
||||
</div>
|
||||
{columnResult.columns
|
||||
.filter(c => c.type.startsWith('column') || c.type === 'page_ref')
|
||||
.map((col, i) => (
|
||||
<div key={i} className="text-xs text-gray-500 dark:text-gray-400 font-mono">
|
||||
{i + 1}. {col.type} x={col.x} w={col.width}
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Right: GT editor */}
|
||||
<div>
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Ground Truth Editor
|
||||
</div>
|
||||
<ManualColumnEditor
|
||||
imageUrl={dewarpedUrl}
|
||||
imageWidth={imageDimensions?.width ?? 1000}
|
||||
imageHeight={imageDimensions?.height ?? 1400}
|
||||
onApply={handleGtApply}
|
||||
onCancel={() => setViewMode('normal')}
|
||||
applying={applying}
|
||||
mode="ground-truth"
|
||||
layout="stacked"
|
||||
initialDividers={gtInitial?.dividers}
|
||||
initialColumnTypes={gtInitial?.columnTypes}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
/* Normal mode: overlay (left) vs clean (right) */
|
||||
<div className="grid grid-cols-2 gap-4">
|
||||
<div>
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Mit Spalten-Overlay
|
||||
</div>
|
||||
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900">
|
||||
{columnResult ? (
|
||||
// eslint-disable-next-line @next/next/no-img-element
|
||||
<img
|
||||
src={`${overlayUrl}?t=${Date.now()}`}
|
||||
alt="Spalten-Overlay"
|
||||
className="w-full h-auto"
|
||||
/>
|
||||
) : (
|
||||
<div className="aspect-[3/4] flex items-center justify-center text-gray-400 text-sm">
|
||||
{detecting ? 'Erkenne Spalten...' : 'Keine Daten'}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
<div>
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Entzerrtes Bild
|
||||
</div>
|
||||
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900">
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
<img
|
||||
src={dewarpedUrl}
|
||||
alt="Entzerrt"
|
||||
className="w-full h-auto"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Box zone info */}
|
||||
{viewMode === 'normal' && boxCount > 0 && (
|
||||
<div className="bg-amber-50 dark:bg-amber-900/20 border border-amber-200 dark:border-amber-700 rounded-xl p-4 flex items-center justify-between">
|
||||
<div className="flex items-center gap-3">
|
||||
<span className="text-2xl">📦</span>
|
||||
<div>
|
||||
<div className="text-sm font-medium text-amber-800 dark:text-amber-300">
|
||||
{boxCount} Box{boxCount > 1 ? 'en' : ''} erkannt
|
||||
</div>
|
||||
<div className="text-xs text-amber-600 dark:text-amber-400">
|
||||
Box-Bereiche werden separat verarbeitet
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{existingSubSessions && existingSubSessions.length > 0 ? (
|
||||
<div className="text-xs text-amber-700 dark:text-amber-300 font-medium">
|
||||
{existingSubSessions.length} Box-Session{existingSubSessions.length > 1 ? 's' : ''} vorhanden
|
||||
</div>
|
||||
) : (
|
||||
<button
|
||||
onClick={createBoxSessions}
|
||||
disabled={creatingBoxSessions}
|
||||
className="px-4 py-2 bg-amber-600 text-white rounded-lg hover:bg-amber-700 transition-colors text-sm font-medium disabled:opacity-50 flex items-center gap-2"
|
||||
>
|
||||
{creatingBoxSessions && (
|
||||
<div className="animate-spin w-3.5 h-3.5 border-2 border-white border-t-transparent rounded-full" />
|
||||
)}
|
||||
Box-Sessions erstellen
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Controls */}
|
||||
{viewMode === 'normal' && (
|
||||
<ColumnControls
|
||||
columnResult={columnResult}
|
||||
onRerun={handleRerun}
|
||||
onManualMode={() => setViewMode('manual')}
|
||||
onGtMode={() => setViewMode('ground-truth')}
|
||||
onGroundTruth={handleGroundTruth}
|
||||
onNext={onNext}
|
||||
isDetecting={detecting}
|
||||
savedGtColumns={savedGtColumns}
|
||||
/>
|
||||
)}
|
||||
|
||||
{error && (
|
||||
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
19
admin-lehrer/components/ocr-pipeline/StepCoordinates.tsx
Normal file
19
admin-lehrer/components/ocr-pipeline/StepCoordinates.tsx
Normal file
@@ -0,0 +1,19 @@
|
||||
'use client'
|
||||
|
||||
export function StepCoordinates() {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||
<div className="text-5xl mb-4">📍</div>
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||
Schritt 5: Koordinatenzuweisung
|
||||
</h3>
|
||||
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||
Exakte Positionszuweisung fuer jedes Wort auf der Seite.
|
||||
Dieser Schritt wird in einer zukuenftigen Version implementiert.
|
||||
</p>
|
||||
<div className="mt-6 px-4 py-2 bg-amber-100 dark:bg-amber-900/30 text-amber-700 dark:text-amber-400 rounded-full text-sm font-medium">
|
||||
Kommt bald
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
200
admin-lehrer/components/ocr-pipeline/StepCrop.tsx
Normal file
200
admin-lehrer/components/ocr-pipeline/StepCrop.tsx
Normal file
@@ -0,0 +1,200 @@
|
||||
'use client'
|
||||
|
||||
import { useEffect, useState } from 'react'
|
||||
import type { CropResult } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
import { ImageCompareView } from './ImageCompareView'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
interface StepCropProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
}
|
||||
|
||||
export function StepCrop({ sessionId, onNext }: StepCropProps) {
|
||||
const [cropResult, setCropResult] = useState<CropResult | null>(null)
|
||||
const [cropping, setCropping] = useState(false)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [hasRun, setHasRun] = useState(false)
|
||||
|
||||
// Auto-trigger crop on mount
|
||||
useEffect(() => {
|
||||
if (!sessionId || hasRun) return
|
||||
setHasRun(true)
|
||||
|
||||
const runCrop = async () => {
|
||||
setCropping(true)
|
||||
setError(null)
|
||||
|
||||
try {
|
||||
// Check if session already has crop result
|
||||
const sessionRes = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
|
||||
if (sessionRes.ok) {
|
||||
const sessionData = await sessionRes.json()
|
||||
if (sessionData.crop_result) {
|
||||
setCropResult(sessionData.crop_result)
|
||||
setCropping(false)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/crop`, {
|
||||
method: 'POST',
|
||||
})
|
||||
|
||||
if (!res.ok) {
|
||||
throw new Error('Zuschnitt fehlgeschlagen')
|
||||
}
|
||||
|
||||
const data = await res.json()
|
||||
setCropResult(data)
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||
} finally {
|
||||
setCropping(false)
|
||||
}
|
||||
}
|
||||
|
||||
runCrop()
|
||||
}, [sessionId, hasRun])
|
||||
|
||||
const handleSkip = async () => {
|
||||
if (!sessionId) return
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/crop/skip`, {
|
||||
method: 'POST',
|
||||
})
|
||||
if (res.ok) {
|
||||
const data = await res.json()
|
||||
setCropResult(data)
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Skip crop failed:', e)
|
||||
}
|
||||
onNext()
|
||||
}
|
||||
|
||||
if (!sessionId) {
|
||||
return <div className="text-sm text-gray-400">Keine Session ausgewaehlt.</div>
|
||||
}
|
||||
|
||||
const dewarpedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/dewarped`
|
||||
const croppedUrl = cropResult
|
||||
? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
|
||||
: null
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Loading indicator */}
|
||||
{cropping && (
|
||||
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
|
||||
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
|
||||
Scannerraender werden erkannt...
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Image comparison */}
|
||||
<ImageCompareView
|
||||
originalUrl={dewarpedUrl}
|
||||
deskewedUrl={croppedUrl}
|
||||
showGrid={false}
|
||||
showBinarized={false}
|
||||
binarizedUrl={null}
|
||||
leftLabel="Entzerrt"
|
||||
rightLabel="Zugeschnitten"
|
||||
/>
|
||||
|
||||
{/* Crop result info */}
|
||||
{cropResult && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
||||
<div className="flex flex-wrap items-center gap-3 text-sm">
|
||||
{(cropResult as Record<string, unknown>).multi_page ? (
|
||||
<>
|
||||
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-blue-50 dark:bg-blue-900/20 text-blue-700 dark:text-blue-400 text-xs font-medium">
|
||||
Mehrseitig: {(cropResult as Record<string, unknown>).page_count as number} Seiten erkannt
|
||||
</span>
|
||||
{((cropResult as Record<string, unknown>).sub_sessions as Array<{id: string; name: string; page_index: number}> | undefined)?.map((sub) => (
|
||||
<span key={sub.id} className="text-gray-400 text-xs">
|
||||
Seite {sub.page_index + 1}
|
||||
</span>
|
||||
))}
|
||||
</>
|
||||
) : cropResult.crop_applied ? (
|
||||
<>
|
||||
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-amber-50 dark:bg-amber-900/20 text-amber-700 dark:text-amber-400 text-xs font-medium">
|
||||
Zugeschnitten
|
||||
</span>
|
||||
{cropResult.detected_format && (
|
||||
<>
|
||||
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
|
||||
<span className="text-gray-600 dark:text-gray-400">
|
||||
Format: <span className="font-medium">{cropResult.detected_format}</span>
|
||||
{cropResult.format_confidence != null && (
|
||||
<span className="text-gray-400 ml-1">
|
||||
({Math.round(cropResult.format_confidence * 100)}%)
|
||||
</span>
|
||||
)}
|
||||
</span>
|
||||
</>
|
||||
)}
|
||||
{cropResult.original_size && cropResult.cropped_size && (
|
||||
<>
|
||||
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
|
||||
<span className="text-gray-400 text-xs">
|
||||
{cropResult.original_size.width}x{cropResult.original_size.height} → {cropResult.cropped_size.width}x{cropResult.cropped_size.height}
|
||||
</span>
|
||||
</>
|
||||
)}
|
||||
{cropResult.border_fractions && (
|
||||
<>
|
||||
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
|
||||
<span className="text-gray-400 text-xs">
|
||||
Raender: O={pct(cropResult.border_fractions.top)} U={pct(cropResult.border_fractions.bottom)} L={pct(cropResult.border_fractions.left)} R={pct(cropResult.border_fractions.right)}
|
||||
</span>
|
||||
</>
|
||||
)}
|
||||
</>
|
||||
) : (
|
||||
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-green-50 dark:bg-green-900/20 text-green-700 dark:text-green-400 text-xs font-medium">
|
||||
Kein Zuschnitt noetig
|
||||
</span>
|
||||
)}
|
||||
{cropResult.duration_seconds != null && (
|
||||
<span className="text-gray-400 text-xs ml-auto">
|
||||
{cropResult.duration_seconds}s
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Action buttons */}
|
||||
{cropResult && (
|
||||
<div className="flex justify-between">
|
||||
<button
|
||||
onClick={handleSkip}
|
||||
className="px-4 py-2 text-sm text-gray-500 hover:text-gray-700 dark:text-gray-400 dark:hover:text-gray-200 transition-colors"
|
||||
>
|
||||
Ueberspringen
|
||||
</button>
|
||||
<button
|
||||
onClick={onNext}
|
||||
className="px-6 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium transition-colors"
|
||||
>
|
||||
Weiter →
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{error && (
|
||||
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
function pct(v: number): string {
|
||||
return `${(v * 100).toFixed(1)}%`
|
||||
}
|
||||
183
admin-lehrer/components/ocr-pipeline/StepDeskew.tsx
Normal file
183
admin-lehrer/components/ocr-pipeline/StepDeskew.tsx
Normal file
@@ -0,0 +1,183 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useState } from 'react'
|
||||
import type { DeskewGroundTruth, DeskewResult, SessionInfo } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
import { DeskewControls } from './DeskewControls'
|
||||
import { ImageCompareView } from './ImageCompareView'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
interface StepDeskewProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
}
|
||||
|
||||
export function StepDeskew({ sessionId, onNext }: StepDeskewProps) {
|
||||
const [session, setSession] = useState<SessionInfo | null>(null)
|
||||
const [deskewResult, setDeskewResult] = useState<DeskewResult | null>(null)
|
||||
const [deskewing, setDeskewing] = useState(false)
|
||||
const [applying, setApplying] = useState(false)
|
||||
const [showBinarized, setShowBinarized] = useState(false)
|
||||
const [showGrid, setShowGrid] = useState(true)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [hasAutoRun, setHasAutoRun] = useState(false)
|
||||
|
||||
// Load session and auto-trigger deskew
|
||||
useEffect(() => {
|
||||
if (!sessionId || session) return
|
||||
|
||||
const loadAndDeskew = async () => {
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
|
||||
if (!res.ok) return
|
||||
const data = await res.json()
|
||||
|
||||
const sessionInfo: SessionInfo = {
|
||||
session_id: data.session_id,
|
||||
filename: data.filename,
|
||||
image_width: data.image_width,
|
||||
image_height: data.image_height,
|
||||
// Use oriented image as "before" view (deskew runs right after orientation)
|
||||
original_image_url: `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/oriented`,
|
||||
}
|
||||
setSession(sessionInfo)
|
||||
|
||||
// If deskew result already exists, use it
|
||||
if (data.deskew_result) {
|
||||
const dr: DeskewResult = {
|
||||
...data.deskew_result,
|
||||
deskewed_image_url: `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/deskewed`,
|
||||
binarized_image_url: `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/binarized`,
|
||||
}
|
||||
setDeskewResult(dr)
|
||||
return
|
||||
}
|
||||
|
||||
// Auto-trigger deskew if not already done
|
||||
if (!hasAutoRun) {
|
||||
setHasAutoRun(true)
|
||||
setDeskewing(true)
|
||||
const deskewRes = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/deskew`, {
|
||||
method: 'POST',
|
||||
})
|
||||
|
||||
if (!deskewRes.ok) {
|
||||
throw new Error('Begradigung fehlgeschlagen')
|
||||
}
|
||||
|
||||
const deskewData: DeskewResult = await deskewRes.json()
|
||||
deskewData.deskewed_image_url = `${KLAUSUR_API}${deskewData.deskewed_image_url}`
|
||||
deskewData.binarized_image_url = `${KLAUSUR_API}${deskewData.binarized_image_url}`
|
||||
setDeskewResult(deskewData)
|
||||
}
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Fehler beim Laden')
|
||||
} finally {
|
||||
setDeskewing(false)
|
||||
}
|
||||
}
|
||||
|
||||
loadAndDeskew()
|
||||
}, [sessionId, session, hasAutoRun])
|
||||
|
||||
const handleManualDeskew = useCallback(async (angle: number) => {
|
||||
if (!sessionId) return
|
||||
setApplying(true)
|
||||
setError(null)
|
||||
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/deskew/manual`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ angle }),
|
||||
})
|
||||
|
||||
if (!res.ok) throw new Error('Manuelle Begradigung fehlgeschlagen')
|
||||
|
||||
const data = await res.json()
|
||||
setDeskewResult((prev) =>
|
||||
prev
|
||||
? {
|
||||
...prev,
|
||||
angle_applied: data.angle_applied,
|
||||
method_used: data.method_used,
|
||||
deskewed_image_url: `${KLAUSUR_API}${data.deskewed_image_url}?t=${Date.now()}`,
|
||||
}
|
||||
: null,
|
||||
)
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Fehler')
|
||||
} finally {
|
||||
setApplying(false)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
const handleGroundTruth = useCallback(async (gt: DeskewGroundTruth) => {
|
||||
if (!sessionId) return
|
||||
try {
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/ground-truth/deskew`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(gt),
|
||||
})
|
||||
} catch (e) {
|
||||
console.error('Ground truth save failed:', e)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
if (!sessionId) {
|
||||
return <div className="text-sm text-gray-400">Keine Session ausgewaehlt.</div>
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Filename */}
|
||||
{session && (
|
||||
<div className="text-sm text-gray-500 dark:text-gray-400">
|
||||
Datei: <span className="font-medium text-gray-700 dark:text-gray-300">{session.filename}</span>
|
||||
{' '}({session.image_width} x {session.image_height} px)
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Loading indicator */}
|
||||
{deskewing && (
|
||||
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
|
||||
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
|
||||
Begradigung laeuft (beide Methoden)...
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Image comparison */}
|
||||
{session && (
|
||||
<ImageCompareView
|
||||
originalUrl={session.original_image_url}
|
||||
deskewedUrl={deskewResult?.deskewed_image_url ?? null}
|
||||
showGrid={showGrid}
|
||||
showBinarized={showBinarized}
|
||||
binarizedUrl={deskewResult?.binarized_image_url ?? null}
|
||||
leftLabel="Orientiert"
|
||||
rightLabel="Begradigt"
|
||||
/>
|
||||
)}
|
||||
|
||||
{/* Controls */}
|
||||
<DeskewControls
|
||||
deskewResult={deskewResult}
|
||||
showBinarized={showBinarized}
|
||||
onToggleBinarized={() => setShowBinarized((v) => !v)}
|
||||
showGrid={showGrid}
|
||||
onToggleGrid={() => setShowGrid((v) => !v)}
|
||||
onManualDeskew={handleManualDeskew}
|
||||
onGroundTruth={handleGroundTruth}
|
||||
onNext={onNext}
|
||||
isApplying={applying}
|
||||
/>
|
||||
|
||||
{error && (
|
||||
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
204
admin-lehrer/components/ocr-pipeline/StepDewarp.tsx
Normal file
204
admin-lehrer/components/ocr-pipeline/StepDewarp.tsx
Normal file
@@ -0,0 +1,204 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useState } from 'react'
|
||||
import type { DeskewResult, DewarpResult, DewarpGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
import { DewarpControls } from './DewarpControls'
|
||||
import { ImageCompareView } from './ImageCompareView'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
interface StepDewarpProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
}
|
||||
|
||||
export function StepDewarp({ sessionId, onNext }: StepDewarpProps) {
|
||||
const [dewarpResult, setDewarpResult] = useState<DewarpResult | null>(null)
|
||||
const [deskewResult, setDeskewResult] = useState<DeskewResult | null>(null)
|
||||
const [dewarping, setDewarping] = useState(false)
|
||||
const [applying, setApplying] = useState(false)
|
||||
const [showGrid, setShowGrid] = useState(true)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
|
||||
// Load session info to get deskew_result (for fine-tuning init values)
|
||||
useEffect(() => {
|
||||
if (!sessionId) return
|
||||
const loadSession = async () => {
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
|
||||
if (res.ok) {
|
||||
const data = await res.json()
|
||||
if (data.deskew_result) {
|
||||
setDeskewResult(data.deskew_result)
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to load session info:', e)
|
||||
}
|
||||
}
|
||||
loadSession()
|
||||
}, [sessionId])
|
||||
|
||||
// Auto-trigger dewarp when component mounts with a sessionId
|
||||
useEffect(() => {
|
||||
if (!sessionId || dewarpResult) return
|
||||
|
||||
const runDewarp = async () => {
|
||||
setDewarping(true)
|
||||
setError(null)
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/dewarp`, {
|
||||
method: 'POST',
|
||||
})
|
||||
if (!res.ok) {
|
||||
const err = await res.json().catch(() => ({ detail: res.statusText }))
|
||||
throw new Error(err.detail || 'Entzerrung fehlgeschlagen')
|
||||
}
|
||||
const data: DewarpResult = await res.json()
|
||||
data.dewarped_image_url = `${KLAUSUR_API}${data.dewarped_image_url}`
|
||||
setDewarpResult(data)
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||
} finally {
|
||||
setDewarping(false)
|
||||
}
|
||||
}
|
||||
|
||||
runDewarp()
|
||||
}, [sessionId, dewarpResult])
|
||||
|
||||
const handleManualDewarp = useCallback(async (shearDegrees: number) => {
|
||||
if (!sessionId) return
|
||||
setApplying(true)
|
||||
setError(null)
|
||||
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/dewarp/manual`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ shear_degrees: shearDegrees }),
|
||||
})
|
||||
if (!res.ok) throw new Error('Manuelle Entzerrung fehlgeschlagen')
|
||||
|
||||
const data = await res.json()
|
||||
setDewarpResult((prev) =>
|
||||
prev
|
||||
? {
|
||||
...prev,
|
||||
method_used: data.method_used,
|
||||
shear_degrees: data.shear_degrees,
|
||||
dewarped_image_url: `${KLAUSUR_API}${data.dewarped_image_url}?t=${Date.now()}`,
|
||||
}
|
||||
: null,
|
||||
)
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Fehler')
|
||||
} finally {
|
||||
setApplying(false)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
const handleCombinedAdjust = useCallback(async (rotationDegrees: number, shearDegrees: number) => {
|
||||
if (!sessionId) return
|
||||
setApplying(true)
|
||||
setError(null)
|
||||
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/adjust-combined`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ rotation_degrees: rotationDegrees, shear_degrees: shearDegrees }),
|
||||
})
|
||||
if (!res.ok) throw new Error('Kombinierte Anpassung fehlgeschlagen')
|
||||
|
||||
const data = await res.json()
|
||||
setDewarpResult((prev) =>
|
||||
prev
|
||||
? {
|
||||
...prev,
|
||||
method_used: data.method_used,
|
||||
shear_degrees: data.shear_degrees,
|
||||
dewarped_image_url: `${KLAUSUR_API}${data.dewarped_image_url}?t=${Date.now()}`,
|
||||
}
|
||||
: null,
|
||||
)
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Fehler')
|
||||
} finally {
|
||||
setApplying(false)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
const handleGroundTruth = useCallback(async (gt: DewarpGroundTruth) => {
|
||||
if (!sessionId) return
|
||||
try {
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/ground-truth/dewarp`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(gt),
|
||||
})
|
||||
} catch (e) {
|
||||
console.error('Ground truth save failed:', e)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
if (!sessionId) {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||
<div className="text-5xl mb-4">🔧</div>
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||
Schritt 2: Entzerrung (Dewarp)
|
||||
</h3>
|
||||
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||
Bitte zuerst Schritt 1 (Begradigung) abschliessen.
|
||||
</p>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const deskewedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/deskewed`
|
||||
const dewarpedUrl = dewarpResult?.dewarped_image_url ?? null
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Loading indicator */}
|
||||
{dewarping && (
|
||||
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
|
||||
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
|
||||
Entzerrung laeuft (beide Methoden)...
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Image comparison: deskewed (left) vs dewarped (right) */}
|
||||
<ImageCompareView
|
||||
originalUrl={deskewedUrl}
|
||||
deskewedUrl={dewarpedUrl}
|
||||
showGrid={showGrid}
|
||||
showGridLeft={showGrid}
|
||||
showBinarized={false}
|
||||
binarizedUrl={null}
|
||||
leftLabel={`Begradigt (nach Deskew)${showGrid ? ' + Raster' : ''}`}
|
||||
rightLabel={`Entzerrt${showGrid ? ' + Raster (mm)' : ''}`}
|
||||
/>
|
||||
|
||||
{/* Controls */}
|
||||
<DewarpControls
|
||||
dewarpResult={dewarpResult}
|
||||
deskewResult={deskewResult}
|
||||
showGrid={showGrid}
|
||||
onToggleGrid={() => setShowGrid((v) => !v)}
|
||||
onManualDewarp={handleManualDewarp}
|
||||
onCombinedAdjust={handleCombinedAdjust}
|
||||
onGroundTruth={handleGroundTruth}
|
||||
onNext={onNext}
|
||||
isApplying={applying}
|
||||
/>
|
||||
|
||||
{error && (
|
||||
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
640
admin-lehrer/components/ocr-pipeline/StepGroundTruth.tsx
Normal file
640
admin-lehrer/components/ocr-pipeline/StepGroundTruth.tsx
Normal file
@@ -0,0 +1,640 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useRef, useState } from 'react'
|
||||
import type {
|
||||
GridCell, ColumnMeta, ImageRegion, ImageStyle,
|
||||
} from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
import { IMAGE_STYLES as STYLES } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
const COL_TYPE_COLORS: Record<string, string> = {
|
||||
column_en: '#3b82f6',
|
||||
column_de: '#22c55e',
|
||||
column_example: '#f97316',
|
||||
column_text: '#a855f7',
|
||||
page_ref: '#06b6d4',
|
||||
column_marker: '#6b7280',
|
||||
}
|
||||
|
||||
interface StepGroundTruthProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
}
|
||||
|
||||
interface SessionData {
|
||||
cells: GridCell[]
|
||||
columnsUsed: ColumnMeta[]
|
||||
imageWidth: number
|
||||
imageHeight: number
|
||||
originalImageUrl: string
|
||||
}
|
||||
|
||||
export function StepGroundTruth({ sessionId, onNext }: StepGroundTruthProps) {
|
||||
const [status, setStatus] = useState<'loading' | 'ready' | 'saving' | 'saved' | 'error'>('loading')
|
||||
const [error, setError] = useState('')
|
||||
const [session, setSession] = useState<SessionData | null>(null)
|
||||
const [imageRegions, setImageRegions] = useState<(ImageRegion & { generating?: boolean })[]>([])
|
||||
const [detecting, setDetecting] = useState(false)
|
||||
const [zoom, setZoom] = useState(100)
|
||||
const [syncScroll, setSyncScroll] = useState(true)
|
||||
const [notes, setNotes] = useState('')
|
||||
const [score, setScore] = useState<number | null>(null)
|
||||
const [drawingRegion, setDrawingRegion] = useState(false)
|
||||
const [dragStart, setDragStart] = useState<{ x: number; y: number } | null>(null)
|
||||
const [dragEnd, setDragEnd] = useState<{ x: number; y: number } | null>(null)
|
||||
const [isGroundTruth, setIsGroundTruth] = useState(false)
|
||||
const [gtSaving, setGtSaving] = useState(false)
|
||||
const [gtMessage, setGtMessage] = useState('')
|
||||
|
||||
const leftPanelRef = useRef<HTMLDivElement>(null)
|
||||
const rightPanelRef = useRef<HTMLDivElement>(null)
|
||||
const reconRef = useRef<HTMLDivElement>(null)
|
||||
const [reconWidth, setReconWidth] = useState(0)
|
||||
|
||||
// Track reconstruction container width for font size calculation
|
||||
useEffect(() => {
|
||||
const el = reconRef.current
|
||||
if (!el) return
|
||||
const obs = new ResizeObserver(entries => {
|
||||
for (const entry of entries) setReconWidth(entry.contentRect.width)
|
||||
})
|
||||
obs.observe(el)
|
||||
return () => obs.disconnect()
|
||||
}, [session])
|
||||
|
||||
// Load session data
|
||||
useEffect(() => {
|
||||
if (!sessionId) return
|
||||
loadSessionData()
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [sessionId])
|
||||
|
||||
const loadSessionData = async () => {
|
||||
if (!sessionId) return
|
||||
setStatus('loading')
|
||||
try {
|
||||
const resp = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
|
||||
if (!resp.ok) throw new Error(`Failed to load session: ${resp.status}`)
|
||||
const data = await resp.json()
|
||||
|
||||
const wordResult = data.word_result || {}
|
||||
setSession({
|
||||
cells: wordResult.cells || [],
|
||||
columnsUsed: wordResult.columns_used || [],
|
||||
imageWidth: wordResult.image_width || data.image_width || 800,
|
||||
imageHeight: wordResult.image_height || data.image_height || 600,
|
||||
originalImageUrl: data.original_image_url
|
||||
? `${KLAUSUR_API}${data.original_image_url}`
|
||||
: `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/original`,
|
||||
})
|
||||
|
||||
// Check if session has ground truth reference
|
||||
const gt = data.ground_truth
|
||||
setIsGroundTruth(!!gt?.build_grid_reference)
|
||||
|
||||
// Load existing validation data
|
||||
const valResp = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reconstruction/validation`)
|
||||
if (valResp.ok) {
|
||||
const valData = await valResp.json()
|
||||
const validation = valData.validation
|
||||
if (validation) {
|
||||
setImageRegions(validation.image_regions || [])
|
||||
setNotes(validation.notes || '')
|
||||
setScore(validation.score ?? null)
|
||||
}
|
||||
}
|
||||
|
||||
setStatus('ready')
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : String(e))
|
||||
setStatus('error')
|
||||
}
|
||||
}
|
||||
|
||||
// Sync scroll between panels
|
||||
const handleScroll = useCallback((source: 'left' | 'right') => {
|
||||
if (!syncScroll) return
|
||||
const from = source === 'left' ? leftPanelRef.current : rightPanelRef.current
|
||||
const to = source === 'left' ? rightPanelRef.current : leftPanelRef.current
|
||||
if (from && to) {
|
||||
to.scrollTop = from.scrollTop
|
||||
to.scrollLeft = from.scrollLeft
|
||||
}
|
||||
}, [syncScroll])
|
||||
|
||||
// Detect images via VLM
|
||||
const handleDetectImages = async () => {
|
||||
if (!sessionId) return
|
||||
setDetecting(true)
|
||||
try {
|
||||
const resp = await fetch(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reconstruction/detect-images`,
|
||||
{ method: 'POST' }
|
||||
)
|
||||
if (!resp.ok) throw new Error(`Detection failed: ${resp.status}`)
|
||||
const data = await resp.json()
|
||||
setImageRegions(data.regions || [])
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : String(e))
|
||||
} finally {
|
||||
setDetecting(false)
|
||||
}
|
||||
}
|
||||
|
||||
// Generate image for a region
|
||||
const handleGenerateImage = async (index: number) => {
|
||||
if (!sessionId) return
|
||||
const region = imageRegions[index]
|
||||
if (!region) return
|
||||
|
||||
setImageRegions(prev => prev.map((r, i) => i === index ? { ...r, generating: true } : r))
|
||||
|
||||
try {
|
||||
const resp = await fetch(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reconstruction/generate-image`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
region_index: index,
|
||||
prompt: region.prompt,
|
||||
style: region.style,
|
||||
}),
|
||||
}
|
||||
)
|
||||
if (!resp.ok) throw new Error(`Generation failed: ${resp.status}`)
|
||||
const data = await resp.json()
|
||||
|
||||
setImageRegions(prev => prev.map((r, i) =>
|
||||
i === index ? { ...r, image_b64: data.image_b64, generating: false } : r
|
||||
))
|
||||
} catch (e) {
|
||||
setImageRegions(prev => prev.map((r, i) => i === index ? { ...r, generating: false } : r))
|
||||
setError(e instanceof Error ? e.message : String(e))
|
||||
}
|
||||
}
|
||||
|
||||
// Save validation
|
||||
const handleSave = async () => {
|
||||
if (!sessionId) {
|
||||
setError('Keine Session-ID vorhanden')
|
||||
return
|
||||
}
|
||||
setStatus('saving')
|
||||
setError('')
|
||||
try {
|
||||
const resp = await fetch(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reconstruction/validate`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ notes, score: score ?? 0 }),
|
||||
}
|
||||
)
|
||||
if (!resp.ok) {
|
||||
const body = await resp.text().catch(() => '')
|
||||
throw new Error(`Speichern fehlgeschlagen (${resp.status}): ${body}`)
|
||||
}
|
||||
setStatus('saved')
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : String(e))
|
||||
setStatus('ready')
|
||||
}
|
||||
}
|
||||
|
||||
// Mark/update ground truth reference
|
||||
const handleMarkGroundTruth = async () => {
|
||||
if (!sessionId) return
|
||||
setGtSaving(true)
|
||||
setGtMessage('')
|
||||
try {
|
||||
const resp = await fetch(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/mark-ground-truth?pipeline=ocr-pipeline`,
|
||||
{ method: 'POST' }
|
||||
)
|
||||
if (!resp.ok) {
|
||||
const body = await resp.text().catch(() => '')
|
||||
throw new Error(`Ground Truth fehlgeschlagen (${resp.status}): ${body}`)
|
||||
}
|
||||
const data = await resp.json()
|
||||
setIsGroundTruth(true)
|
||||
setGtMessage(`Ground Truth gespeichert (${data.cells_saved} Zellen)`)
|
||||
setTimeout(() => setGtMessage(''), 5000)
|
||||
} catch (e) {
|
||||
setGtMessage(e instanceof Error ? e.message : String(e))
|
||||
} finally {
|
||||
setGtSaving(false)
|
||||
}
|
||||
}
|
||||
|
||||
// Handle manual region drawing on reconstruction
|
||||
const handleReconMouseDown = (e: React.MouseEvent<HTMLDivElement>) => {
|
||||
if (!drawingRegion) return
|
||||
const rect = e.currentTarget.getBoundingClientRect()
|
||||
const x = ((e.clientX - rect.left) / rect.width) * 100
|
||||
const y = ((e.clientY - rect.top) / rect.height) * 100
|
||||
setDragStart({ x, y })
|
||||
setDragEnd({ x, y })
|
||||
}
|
||||
|
||||
const handleReconMouseMove = (e: React.MouseEvent<HTMLDivElement>) => {
|
||||
if (!dragStart) return
|
||||
const rect = e.currentTarget.getBoundingClientRect()
|
||||
const x = ((e.clientX - rect.left) / rect.width) * 100
|
||||
const y = ((e.clientY - rect.top) / rect.height) * 100
|
||||
setDragEnd({ x, y })
|
||||
}
|
||||
|
||||
const handleReconMouseUp = () => {
|
||||
if (!dragStart || !dragEnd) return
|
||||
const x = Math.min(dragStart.x, dragEnd.x)
|
||||
const y = Math.min(dragStart.y, dragEnd.y)
|
||||
const w = Math.abs(dragEnd.x - dragStart.x)
|
||||
const h = Math.abs(dragEnd.y - dragStart.y)
|
||||
|
||||
if (w > 2 && h > 2) {
|
||||
setImageRegions(prev => [...prev, {
|
||||
bbox_pct: { x, y, w, h },
|
||||
prompt: '',
|
||||
description: 'Manually selected region',
|
||||
image_b64: null,
|
||||
style: 'educational' as ImageStyle,
|
||||
}])
|
||||
}
|
||||
|
||||
setDragStart(null)
|
||||
setDragEnd(null)
|
||||
setDrawingRegion(false)
|
||||
}
|
||||
|
||||
const handleRemoveRegion = (index: number) => {
|
||||
setImageRegions(prev => prev.filter((_, i) => i !== index))
|
||||
}
|
||||
|
||||
if (status === 'loading') {
|
||||
return (
|
||||
<div className="flex items-center justify-center py-16">
|
||||
<div className="animate-spin rounded-full h-8 w-8 border-b-2 border-teal-500 mr-3" />
|
||||
<span className="text-gray-500 dark:text-gray-400">Session wird geladen...</span>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
if (status === 'error' && !session) {
|
||||
return (
|
||||
<div className="text-center py-16">
|
||||
<p className="text-red-500">{error}</p>
|
||||
<button onClick={loadSessionData} className="mt-4 px-4 py-2 bg-teal-600 text-white rounded hover:bg-teal-700">
|
||||
Erneut laden
|
||||
</button>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
if (!session) return null
|
||||
|
||||
const aspect = session.imageHeight / session.imageWidth
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Header / Controls */}
|
||||
<div className="flex items-center justify-between flex-wrap gap-2">
|
||||
<h3 className="text-lg font-medium text-gray-800 dark:text-gray-200">
|
||||
Validierung — Original vs. Rekonstruktion
|
||||
</h3>
|
||||
<div className="flex items-center gap-3">
|
||||
<button
|
||||
onClick={handleDetectImages}
|
||||
disabled={detecting}
|
||||
className="px-3 py-1.5 text-sm bg-indigo-600 text-white rounded hover:bg-indigo-700 disabled:opacity-50"
|
||||
>
|
||||
{detecting ? 'Erkennung laeuft...' : 'Bilder erkennen'}
|
||||
</button>
|
||||
<label className="flex items-center gap-1.5 text-sm text-gray-600 dark:text-gray-400">
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={syncScroll}
|
||||
onChange={e => setSyncScroll(e.target.checked)}
|
||||
className="rounded"
|
||||
/>
|
||||
Sync Scroll
|
||||
</label>
|
||||
<div className="flex items-center gap-1.5">
|
||||
<button onClick={() => setZoom(z => Math.max(50, z - 25))} className="px-2 py-1 text-sm border rounded dark:border-gray-600 hover:bg-gray-100 dark:hover:bg-gray-700">-</button>
|
||||
<span className="text-sm text-gray-600 dark:text-gray-400 w-12 text-center">{zoom}%</span>
|
||||
<button onClick={() => setZoom(z => Math.min(200, z + 25))} className="px-2 py-1 text-sm border rounded dark:border-gray-600 hover:bg-gray-100 dark:hover:bg-gray-700">+</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{error && (
|
||||
<div className="p-2 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 text-sm rounded">
|
||||
{error}
|
||||
<button onClick={() => setError('')} className="ml-2 underline">Schliessen</button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Side-by-side panels */}
|
||||
<div className="grid grid-cols-2 gap-4" style={{ height: 'calc(100vh - 580px)', minHeight: 300 }}>
|
||||
{/* Left: Original */}
|
||||
<div className="border rounded-lg dark:border-gray-700 overflow-hidden flex flex-col">
|
||||
<div className="px-3 py-1.5 bg-gray-50 dark:bg-gray-800 text-sm font-medium text-gray-600 dark:text-gray-400 border-b dark:border-gray-700">
|
||||
Original
|
||||
</div>
|
||||
<div
|
||||
ref={leftPanelRef}
|
||||
className="flex-1 overflow-auto"
|
||||
onScroll={() => handleScroll('left')}
|
||||
>
|
||||
<div style={{ width: `${zoom}%`, minWidth: '100%' }}>
|
||||
<img
|
||||
src={session.originalImageUrl}
|
||||
alt="Original"
|
||||
className="w-full h-auto"
|
||||
draggable={false}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Right: Reconstruction */}
|
||||
<div className="border rounded-lg dark:border-gray-700 overflow-hidden flex flex-col">
|
||||
<div className="px-3 py-1.5 bg-gray-50 dark:bg-gray-800 text-sm font-medium text-gray-600 dark:text-gray-400 border-b dark:border-gray-700 flex items-center justify-between">
|
||||
<span>Rekonstruktion</span>
|
||||
<button
|
||||
onClick={() => setDrawingRegion(!drawingRegion)}
|
||||
className={`text-xs px-2 py-0.5 rounded ${drawingRegion ? 'bg-indigo-600 text-white' : 'bg-gray-200 dark:bg-gray-700 text-gray-600 dark:text-gray-400'}`}
|
||||
>
|
||||
{drawingRegion ? 'Region zeichnen...' : '+ Region'}
|
||||
</button>
|
||||
</div>
|
||||
<div
|
||||
ref={rightPanelRef}
|
||||
className="flex-1 overflow-auto"
|
||||
onScroll={() => handleScroll('right')}
|
||||
>
|
||||
<div style={{ width: `${zoom}%`, minWidth: '100%' }}>
|
||||
{/* Reconstruction container */}
|
||||
<div
|
||||
ref={reconRef}
|
||||
className="relative bg-white"
|
||||
style={{
|
||||
paddingBottom: `${aspect * 100}%`,
|
||||
cursor: drawingRegion ? 'crosshair' : 'default',
|
||||
}}
|
||||
onMouseDown={handleReconMouseDown}
|
||||
onMouseMove={handleReconMouseMove}
|
||||
onMouseUp={handleReconMouseUp}
|
||||
>
|
||||
{/* Row separator lines — derive from cells */}
|
||||
{(() => {
|
||||
const rowYs = new Set<number>()
|
||||
for (const cell of session.cells) {
|
||||
if (cell.col_index === 0 && cell.bbox_pct) {
|
||||
rowYs.add(cell.bbox_pct.y)
|
||||
}
|
||||
}
|
||||
return Array.from(rowYs).map((y, i) => (
|
||||
<div
|
||||
key={`row-${i}`}
|
||||
className="absolute left-0 right-0"
|
||||
style={{
|
||||
top: `${y}%`,
|
||||
height: '1px',
|
||||
backgroundColor: 'rgba(0,0,0,0.06)',
|
||||
}}
|
||||
/>
|
||||
))
|
||||
})()}
|
||||
|
||||
{/* Cell texts — black on white, font size derived from cell height */}
|
||||
{session.cells.map(cell => {
|
||||
if (!cell.bbox_pct || !cell.text) return null
|
||||
// Container height in px = reconWidth * aspect
|
||||
// Cell height in px = containerHeightPx * (bbox_pct.h / 100)
|
||||
// Font size ≈ 70% of cell height
|
||||
const containerH = reconWidth * aspect
|
||||
const cellHeightPx = containerH * (cell.bbox_pct.h / 100)
|
||||
const fontSize = Math.max(6, cellHeightPx * 0.7)
|
||||
return (
|
||||
<span
|
||||
key={cell.cell_id}
|
||||
className="absolute leading-none overflow-hidden whitespace-nowrap"
|
||||
style={{
|
||||
left: `${cell.bbox_pct.x}%`,
|
||||
top: `${cell.bbox_pct.y}%`,
|
||||
width: `${cell.bbox_pct.w}%`,
|
||||
height: `${cell.bbox_pct.h}%`,
|
||||
color: '#1a1a1a',
|
||||
fontSize: `${fontSize}px`,
|
||||
fontWeight: cell.is_bold ? 'bold' : 'normal',
|
||||
fontFamily: "'Liberation Sans', 'DejaVu Sans', Arial, sans-serif",
|
||||
display: 'flex',
|
||||
alignItems: 'center',
|
||||
padding: '0 1px',
|
||||
}}
|
||||
title={`${cell.cell_id}: ${cell.text}`}
|
||||
>
|
||||
{cell.text}
|
||||
</span>
|
||||
)
|
||||
})}
|
||||
|
||||
{/* Generated images at region positions */}
|
||||
{imageRegions.map((region, i) => (
|
||||
<div
|
||||
key={`region-${i}`}
|
||||
className="absolute border-2 border-dashed border-indigo-400"
|
||||
style={{
|
||||
left: `${region.bbox_pct.x}%`,
|
||||
top: `${region.bbox_pct.y}%`,
|
||||
width: `${region.bbox_pct.w}%`,
|
||||
height: `${region.bbox_pct.h}%`,
|
||||
}}
|
||||
>
|
||||
{region.image_b64 ? (
|
||||
<img src={region.image_b64} alt={region.description} className="w-full h-full object-cover" />
|
||||
) : (
|
||||
<div className="w-full h-full flex items-center justify-center bg-indigo-50/50 text-indigo-400 text-[0.5em]">
|
||||
{region.generating ? '...' : `Bild ${i + 1}`}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
))}
|
||||
|
||||
{/* Drawing rectangle */}
|
||||
{dragStart && dragEnd && (
|
||||
<div
|
||||
className="absolute border-2 border-dashed border-red-500 bg-red-100/20 pointer-events-none"
|
||||
style={{
|
||||
left: `${Math.min(dragStart.x, dragEnd.x)}%`,
|
||||
top: `${Math.min(dragStart.y, dragEnd.y)}%`,
|
||||
width: `${Math.abs(dragEnd.x - dragStart.x)}%`,
|
||||
height: `${Math.abs(dragEnd.y - dragStart.y)}%`,
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Image regions panel */}
|
||||
{imageRegions.length > 0 && (
|
||||
<div className="border rounded-lg dark:border-gray-700 p-4">
|
||||
<h4 className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-3">
|
||||
Bildbereiche ({imageRegions.length} gefunden)
|
||||
</h4>
|
||||
<div className="space-y-3">
|
||||
{imageRegions.map((region, i) => (
|
||||
<div key={i} className="flex items-start gap-3 p-3 bg-gray-50 dark:bg-gray-800 rounded-lg">
|
||||
{/* Preview thumbnail */}
|
||||
<div className="w-16 h-16 flex-shrink-0 border rounded dark:border-gray-600 overflow-hidden bg-white">
|
||||
{region.image_b64 ? (
|
||||
<img src={region.image_b64} alt="" className="w-full h-full object-cover" />
|
||||
) : (
|
||||
<div className="w-full h-full flex items-center justify-center text-gray-400 text-xs">
|
||||
{Math.round(region.bbox_pct.w)}x{Math.round(region.bbox_pct.h)}%
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Prompt + controls */}
|
||||
<div className="flex-1 min-w-0 space-y-2">
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-xs text-gray-500 dark:text-gray-400 flex-shrink-0">
|
||||
Bereich {i + 1}:
|
||||
</span>
|
||||
<input
|
||||
type="text"
|
||||
value={region.prompt}
|
||||
onChange={e => {
|
||||
setImageRegions(prev => prev.map((r, j) =>
|
||||
j === i ? { ...r, prompt: e.target.value } : r
|
||||
))
|
||||
}}
|
||||
placeholder="Beschreibung / Prompt..."
|
||||
className="flex-1 text-sm px-2 py-1 border rounded dark:border-gray-600 dark:bg-gray-700 dark:text-white"
|
||||
/>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
<select
|
||||
value={region.style}
|
||||
onChange={e => {
|
||||
setImageRegions(prev => prev.map((r, j) =>
|
||||
j === i ? { ...r, style: e.target.value as ImageStyle } : r
|
||||
))
|
||||
}}
|
||||
className="text-sm px-2 py-1 border rounded dark:border-gray-600 dark:bg-gray-700 dark:text-white"
|
||||
>
|
||||
{STYLES.map(s => (
|
||||
<option key={s.value} value={s.value}>{s.label}</option>
|
||||
))}
|
||||
</select>
|
||||
<button
|
||||
onClick={() => handleGenerateImage(i)}
|
||||
disabled={!!region.generating || !region.prompt}
|
||||
className="px-3 py-1 text-sm bg-teal-600 text-white rounded hover:bg-teal-700 disabled:opacity-50"
|
||||
>
|
||||
{region.generating ? 'Generiere...' : 'Generieren'}
|
||||
</button>
|
||||
<button
|
||||
onClick={() => handleRemoveRegion(i)}
|
||||
className="px-2 py-1 text-sm text-red-600 hover:bg-red-50 dark:hover:bg-red-900/20 rounded"
|
||||
>
|
||||
Entfernen
|
||||
</button>
|
||||
</div>
|
||||
{region.description && region.description !== region.prompt && (
|
||||
<p className="text-xs text-gray-400">{region.description}</p>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Notes and score */}
|
||||
<div className="border rounded-lg dark:border-gray-700 p-4 space-y-3">
|
||||
<div className="flex items-center gap-4">
|
||||
<label className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||
Bewertung (1-10):
|
||||
</label>
|
||||
<input
|
||||
type="number"
|
||||
min={1}
|
||||
max={10}
|
||||
value={score ?? ''}
|
||||
onChange={e => setScore(e.target.value ? parseInt(e.target.value) : null)}
|
||||
className="w-20 text-sm px-2 py-1 border rounded dark:border-gray-600 dark:bg-gray-700 dark:text-white"
|
||||
/>
|
||||
<div className="flex gap-1">
|
||||
{[1, 2, 3, 4, 5, 6, 7, 8, 9, 10].map(v => (
|
||||
<button
|
||||
key={v}
|
||||
onClick={() => setScore(v)}
|
||||
className={`w-7 h-7 text-xs rounded ${score === v ? 'bg-teal-600 text-white' : 'bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-400 hover:bg-gray-200 dark:hover:bg-gray-600'}`}
|
||||
>
|
||||
{v}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
<div>
|
||||
<label className="text-sm font-medium text-gray-700 dark:text-gray-300 block mb-1">
|
||||
Notizen:
|
||||
</label>
|
||||
<textarea
|
||||
value={notes}
|
||||
onChange={e => setNotes(e.target.value)}
|
||||
rows={3}
|
||||
placeholder="Anmerkungen zur Qualitaet der Rekonstruktion..."
|
||||
className="w-full text-sm px-3 py-2 border rounded dark:border-gray-600 dark:bg-gray-700 dark:text-white"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Actions — sticky bottom bar */}
|
||||
<div className="sticky bottom-0 bg-white dark:bg-gray-900 border-t dark:border-gray-700 py-3 px-1 -mx-1 flex items-center justify-between">
|
||||
<div className="text-sm text-gray-500 dark:text-gray-400">
|
||||
{status === 'saved' && <span className="text-green-600 dark:text-green-400">Validierung gespeichert</span>}
|
||||
{status === 'saving' && <span>Speichere...</span>}
|
||||
{gtMessage && (
|
||||
<span className={gtMessage.includes('fehlgeschlagen') ? 'text-red-500' : 'text-amber-600 dark:text-amber-400'}>
|
||||
{gtMessage}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
<div className="flex items-center gap-3">
|
||||
<button
|
||||
onClick={handleMarkGroundTruth}
|
||||
disabled={gtSaving || status === 'saving'}
|
||||
className="px-4 py-2 text-sm bg-amber-600 text-white rounded hover:bg-amber-700 disabled:opacity-50"
|
||||
>
|
||||
{gtSaving ? 'Speichere...' : isGroundTruth ? 'Ground Truth aktualisieren' : 'Als Ground Truth markieren'}
|
||||
</button>
|
||||
<button
|
||||
onClick={handleSave}
|
||||
disabled={status === 'saving'}
|
||||
className="px-4 py-2 text-sm bg-gray-600 text-white rounded hover:bg-gray-700 disabled:opacity-50"
|
||||
>
|
||||
Speichern
|
||||
</button>
|
||||
<button
|
||||
onClick={async () => {
|
||||
await handleSave()
|
||||
onNext()
|
||||
}}
|
||||
disabled={status === 'saving'}
|
||||
className="px-4 py-2 text-sm bg-teal-600 text-white rounded hover:bg-teal-700 disabled:opacity-50"
|
||||
>
|
||||
Abschliessen
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
922
admin-lehrer/components/ocr-pipeline/StepLlmReview.tsx
Normal file
922
admin-lehrer/components/ocr-pipeline/StepLlmReview.tsx
Normal file
@@ -0,0 +1,922 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
|
||||
import type { GridCell, GridResult, WordEntry, ColumnMeta } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
import { usePixelWordPositions } from './usePixelWordPositions'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
interface LlmChange {
|
||||
row_index: number
|
||||
field: 'english' | 'german' | 'example'
|
||||
old: string
|
||||
new: string
|
||||
}
|
||||
|
||||
interface StepLlmReviewProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
}
|
||||
|
||||
interface ReviewMeta {
|
||||
total_entries: number
|
||||
to_review: number
|
||||
skipped: number
|
||||
model: string
|
||||
skipped_indices?: number[]
|
||||
}
|
||||
|
||||
interface StreamProgress {
|
||||
current: number
|
||||
total: number
|
||||
}
|
||||
|
||||
const FIELD_LABELS: Record<string, string> = {
|
||||
english: 'EN',
|
||||
german: 'DE',
|
||||
example: 'Beispiel',
|
||||
source_page: 'Seite',
|
||||
marker: 'Marker',
|
||||
text: 'Text',
|
||||
}
|
||||
|
||||
/** Map column type to WordEntry field name */
|
||||
const COL_TYPE_TO_FIELD: Record<string, string> = {
|
||||
column_en: 'english',
|
||||
column_de: 'german',
|
||||
column_example: 'example',
|
||||
page_ref: 'source_page',
|
||||
column_marker: 'marker',
|
||||
column_text: 'text',
|
||||
}
|
||||
|
||||
/** Column type → color class */
|
||||
const COL_TYPE_COLOR: Record<string, string> = {
|
||||
column_en: 'text-blue-600 dark:text-blue-400',
|
||||
column_de: 'text-green-600 dark:text-green-400',
|
||||
column_example: 'text-orange-600 dark:text-orange-400',
|
||||
page_ref: 'text-cyan-600 dark:text-cyan-400',
|
||||
column_marker: 'text-gray-500 dark:text-gray-400',
|
||||
column_text: 'text-gray-700 dark:text-gray-300',
|
||||
}
|
||||
|
||||
type RowStatus = 'pending' | 'active' | 'reviewed' | 'corrected' | 'skipped'
|
||||
|
||||
export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
|
||||
// Core state
|
||||
const [status, setStatus] = useState<'idle' | 'loading' | 'ready' | 'running' | 'done' | 'error' | 'applied'>('idle')
|
||||
const [meta, setMeta] = useState<ReviewMeta | null>(null)
|
||||
const [changes, setChanges] = useState<LlmChange[]>([])
|
||||
const [progress, setProgress] = useState<StreamProgress | null>(null)
|
||||
const [totalDuration, setTotalDuration] = useState(0)
|
||||
const [error, setError] = useState('')
|
||||
const [accepted, setAccepted] = useState<Set<number>>(new Set())
|
||||
const [applying, setApplying] = useState(false)
|
||||
|
||||
// Full vocab table state
|
||||
const [vocabEntries, setVocabEntries] = useState<WordEntry[]>([])
|
||||
const [columnsUsed, setColumnsUsed] = useState<ColumnMeta[]>([])
|
||||
const [activeRowIndices, setActiveRowIndices] = useState<Set<number>>(new Set())
|
||||
const [reviewedRows, setReviewedRows] = useState<Set<number>>(new Set())
|
||||
const [skippedRows, setSkippedRows] = useState<Set<number>>(new Set())
|
||||
const [correctedMap, setCorrectedMap] = useState<Map<number, LlmChange[]>>(new Map())
|
||||
|
||||
// Image
|
||||
const [imageNaturalSize, setImageNaturalSize] = useState<{ w: number; h: number } | null>(null)
|
||||
|
||||
// Overlay view state
|
||||
const [viewMode, setViewMode] = useState<'table' | 'overlay'>('table')
|
||||
const [fontScale, setFontScale] = useState(0.7)
|
||||
const [leftPaddingPct, setLeftPaddingPct] = useState(0)
|
||||
const [globalBold, setGlobalBold] = useState(false)
|
||||
const [cells, setCells] = useState<GridCell[]>([])
|
||||
const reconRef = useRef<HTMLDivElement>(null)
|
||||
const [reconWidth, setReconWidth] = useState(0)
|
||||
|
||||
// Pixel-analysed word positions via shared hook
|
||||
const overlayImageUrl = sessionId
|
||||
? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
|
||||
: ''
|
||||
const cellWordPositions = usePixelWordPositions(overlayImageUrl, cells, viewMode === 'overlay')
|
||||
|
||||
const tableRef = useRef<HTMLDivElement>(null)
|
||||
const activeRowRef = useRef<HTMLTableRowElement>(null)
|
||||
|
||||
// Track reconstruction container width for font size calculation
|
||||
useEffect(() => {
|
||||
const el = reconRef.current
|
||||
if (!el) return
|
||||
const obs = new ResizeObserver(entries => {
|
||||
for (const entry of entries) setReconWidth(entry.contentRect.width)
|
||||
})
|
||||
obs.observe(el)
|
||||
return () => obs.disconnect()
|
||||
}, [viewMode])
|
||||
|
||||
// Load session data on mount
|
||||
useEffect(() => {
|
||||
if (!sessionId) return
|
||||
loadSessionData()
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [sessionId])
|
||||
|
||||
const loadSessionData = async () => {
|
||||
if (!sessionId) return
|
||||
setStatus('loading')
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
|
||||
if (!res.ok) throw new Error(`HTTP ${res.status}`)
|
||||
const data = await res.json()
|
||||
|
||||
const wordResult: GridResult | undefined = data.word_result
|
||||
if (!wordResult) {
|
||||
setError('Keine Worterkennungsdaten gefunden. Bitte zuerst Schritt 5 abschliessen.')
|
||||
setStatus('error')
|
||||
return
|
||||
}
|
||||
|
||||
const entries = wordResult.vocab_entries || wordResult.entries || []
|
||||
setVocabEntries(entries)
|
||||
setColumnsUsed(wordResult.columns_used || [])
|
||||
setCells(wordResult.cells || [])
|
||||
|
||||
// Check if LLM review was already run
|
||||
const llmReview = wordResult.llm_review
|
||||
if (llmReview && llmReview.changes) {
|
||||
const existingChanges: LlmChange[] = llmReview.changes as LlmChange[]
|
||||
setChanges(existingChanges)
|
||||
setTotalDuration(llmReview.duration_ms || 0)
|
||||
|
||||
// Mark all rows as reviewed
|
||||
const allReviewed = new Set(entries.map((_: WordEntry, i: number) => i))
|
||||
setReviewedRows(allReviewed)
|
||||
|
||||
// Build corrected map
|
||||
const cMap = new Map<number, LlmChange[]>()
|
||||
for (const c of existingChanges) {
|
||||
const existing = cMap.get(c.row_index) || []
|
||||
existing.push(c)
|
||||
cMap.set(c.row_index, existing)
|
||||
}
|
||||
setCorrectedMap(cMap)
|
||||
|
||||
// Default: all accepted
|
||||
setAccepted(new Set(existingChanges.map((_: LlmChange, i: number) => i)))
|
||||
|
||||
setMeta({
|
||||
total_entries: entries.length,
|
||||
to_review: llmReview.entries_corrected !== undefined ? entries.length : entries.length,
|
||||
skipped: 0,
|
||||
model: llmReview.model_used || 'unknown',
|
||||
})
|
||||
setStatus('done')
|
||||
} else {
|
||||
setStatus('ready')
|
||||
}
|
||||
} catch (e: unknown) {
|
||||
setError(e instanceof Error ? e.message : String(e))
|
||||
setStatus('error')
|
||||
}
|
||||
}
|
||||
|
||||
const runReview = useCallback(async () => {
|
||||
if (!sessionId) return
|
||||
setStatus('running')
|
||||
setError('')
|
||||
setChanges([])
|
||||
setProgress(null)
|
||||
setMeta(null)
|
||||
setTotalDuration(0)
|
||||
setActiveRowIndices(new Set())
|
||||
setReviewedRows(new Set())
|
||||
setSkippedRows(new Set())
|
||||
setCorrectedMap(new Map())
|
||||
|
||||
try {
|
||||
const res = await fetch(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/llm-review?stream=true`,
|
||||
{ method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({}) },
|
||||
)
|
||||
|
||||
if (!res.ok) {
|
||||
const data = await res.json().catch(() => ({}))
|
||||
throw new Error(data.detail || `HTTP ${res.status}`)
|
||||
}
|
||||
|
||||
const reader = res.body!.getReader()
|
||||
const decoder = new TextDecoder()
|
||||
let buffer = ''
|
||||
let allChanges: LlmChange[] = []
|
||||
let allReviewed = new Set<number>()
|
||||
let allSkipped = new Set<number>()
|
||||
let cMap = new Map<number, LlmChange[]>()
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read()
|
||||
if (done) break
|
||||
buffer += decoder.decode(value, { stream: true })
|
||||
|
||||
while (buffer.includes('\n\n')) {
|
||||
const idx = buffer.indexOf('\n\n')
|
||||
const chunk = buffer.slice(0, idx).trim()
|
||||
buffer = buffer.slice(idx + 2)
|
||||
|
||||
if (!chunk.startsWith('data: ')) continue
|
||||
const dataStr = chunk.slice(6)
|
||||
|
||||
let event: any
|
||||
try { event = JSON.parse(dataStr) } catch { continue }
|
||||
|
||||
if (event.type === 'meta') {
|
||||
setMeta({
|
||||
total_entries: event.total_entries,
|
||||
to_review: event.to_review,
|
||||
skipped: event.skipped,
|
||||
model: event.model,
|
||||
skipped_indices: event.skipped_indices,
|
||||
})
|
||||
// Mark skipped rows
|
||||
if (event.skipped_indices) {
|
||||
allSkipped = new Set(event.skipped_indices)
|
||||
setSkippedRows(allSkipped)
|
||||
}
|
||||
}
|
||||
|
||||
if (event.type === 'batch') {
|
||||
const batchChanges: LlmChange[] = event.changes || []
|
||||
const batchRows: number[] = event.entries_reviewed || []
|
||||
|
||||
// Update active rows (currently being reviewed)
|
||||
setActiveRowIndices(new Set(batchRows))
|
||||
|
||||
// Accumulate changes
|
||||
allChanges = [...allChanges, ...batchChanges]
|
||||
setChanges(allChanges)
|
||||
setProgress(event.progress)
|
||||
|
||||
// Update corrected map
|
||||
for (const c of batchChanges) {
|
||||
const existing = cMap.get(c.row_index) || []
|
||||
existing.push(c)
|
||||
cMap.set(c.row_index, [...existing])
|
||||
}
|
||||
setCorrectedMap(new Map(cMap))
|
||||
|
||||
// Mark batch rows as reviewed
|
||||
for (const r of batchRows) {
|
||||
allReviewed.add(r)
|
||||
}
|
||||
setReviewedRows(new Set(allReviewed))
|
||||
|
||||
// Scroll to active row in table
|
||||
setTimeout(() => {
|
||||
activeRowRef.current?.scrollIntoView({ behavior: 'smooth', block: 'center' })
|
||||
}, 50)
|
||||
}
|
||||
|
||||
if (event.type === 'complete') {
|
||||
setActiveRowIndices(new Set())
|
||||
setTotalDuration(event.duration_ms)
|
||||
setAccepted(new Set(allChanges.map((_: LlmChange, i: number) => i)))
|
||||
// Mark all non-skipped as reviewed
|
||||
const allEntryIndices = vocabEntries.map((_: WordEntry, i: number) => i)
|
||||
for (const i of allEntryIndices) {
|
||||
if (!allSkipped.has(i)) allReviewed.add(i)
|
||||
}
|
||||
setReviewedRows(new Set(allReviewed))
|
||||
setStatus('done')
|
||||
}
|
||||
|
||||
if (event.type === 'error') {
|
||||
throw new Error(event.detail || 'Unbekannter Fehler')
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If stream ended without complete event
|
||||
if (allChanges.length === 0) {
|
||||
setStatus('done')
|
||||
}
|
||||
} catch (e: unknown) {
|
||||
const msg = e instanceof Error ? e.message : String(e)
|
||||
setError(msg)
|
||||
setStatus('error')
|
||||
}
|
||||
}, [sessionId, vocabEntries])
|
||||
|
||||
const toggleChange = (index: number) => {
|
||||
setAccepted(prev => {
|
||||
const next = new Set(prev)
|
||||
if (next.has(index)) next.delete(index)
|
||||
else next.add(index)
|
||||
return next
|
||||
})
|
||||
}
|
||||
|
||||
const toggleAll = () => {
|
||||
if (accepted.size === changes.length) {
|
||||
setAccepted(new Set())
|
||||
} else {
|
||||
setAccepted(new Set(changes.map((_: LlmChange, i: number) => i)))
|
||||
}
|
||||
}
|
||||
|
||||
const applyChanges = useCallback(async () => {
|
||||
if (!sessionId) return
|
||||
setApplying(true)
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/llm-review/apply`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ accepted_indices: Array.from(accepted) }),
|
||||
})
|
||||
if (!res.ok) {
|
||||
const data = await res.json().catch(() => ({}))
|
||||
throw new Error(data.detail || `HTTP ${res.status}`)
|
||||
}
|
||||
setStatus('applied')
|
||||
} catch (e: unknown) {
|
||||
setError(e instanceof Error ? e.message : String(e))
|
||||
} finally {
|
||||
setApplying(false)
|
||||
}
|
||||
}, [sessionId, accepted])
|
||||
|
||||
const getRowStatus = (rowIndex: number): RowStatus => {
|
||||
if (activeRowIndices.has(rowIndex)) return 'active'
|
||||
if (skippedRows.has(rowIndex)) return 'skipped'
|
||||
if (correctedMap.has(rowIndex)) return 'corrected'
|
||||
if (reviewedRows.has(rowIndex)) return 'reviewed'
|
||||
return 'pending'
|
||||
}
|
||||
|
||||
const dewarpedUrl = sessionId
|
||||
? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
|
||||
: ''
|
||||
|
||||
// Snap all cells in the same column to consistent x/w positions
|
||||
// Uses the median x and width per col_index so columns align vertically
|
||||
const colPositions = useMemo(() => {
|
||||
const byCol = new Map<number, { xs: number[]; ws: number[] }>()
|
||||
for (const cell of cells) {
|
||||
if (!cell.bbox_pct) continue
|
||||
const entry = byCol.get(cell.col_index) || { xs: [], ws: [] }
|
||||
entry.xs.push(cell.bbox_pct.x)
|
||||
entry.ws.push(cell.bbox_pct.w)
|
||||
byCol.set(cell.col_index, entry)
|
||||
}
|
||||
const result = new Map<number, { x: number; w: number }>()
|
||||
for (const [colIdx, { xs, ws }] of byCol) {
|
||||
xs.sort((a, b) => a - b)
|
||||
ws.sort((a, b) => a - b)
|
||||
const medianX = xs[Math.floor(xs.length / 2)]
|
||||
const medianW = ws[Math.floor(ws.length / 2)]
|
||||
result.set(colIdx, { x: medianX, w: medianW })
|
||||
}
|
||||
return result
|
||||
}, [cells])
|
||||
|
||||
if (!sessionId) {
|
||||
return <div className="text-center py-12 text-gray-400">Bitte zuerst eine Session auswaehlen.</div>
|
||||
}
|
||||
|
||||
// --- Loading session data ---
|
||||
if (status === 'loading' || status === 'idle') {
|
||||
return (
|
||||
<div className="flex items-center gap-3 justify-center py-12">
|
||||
<div className="animate-spin rounded-full h-5 w-5 border-b-2 border-teal-500" />
|
||||
<span className="text-gray-500">Session-Daten werden geladen...</span>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// --- Error ---
|
||||
if (status === 'error') {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-12 text-center">
|
||||
<div className="text-5xl mb-4">⚠️</div>
|
||||
<h3 className="text-lg font-medium text-red-600 dark:text-red-400 mb-2">Fehler bei OCR-Zeichenkorrektur</h3>
|
||||
<p className="text-sm text-gray-500 dark:text-gray-400 max-w-lg mb-4">{error}</p>
|
||||
<div className="flex gap-3">
|
||||
<button onClick={() => { setError(''); loadSessionData() }}
|
||||
className="px-5 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors text-sm">
|
||||
Erneut versuchen
|
||||
</button>
|
||||
<button onClick={onNext}
|
||||
className="px-5 py-2 bg-gray-200 dark:bg-gray-700 text-gray-700 dark:text-gray-300 rounded-lg hover:bg-gray-300 dark:hover:bg-gray-600 transition-colors text-sm">
|
||||
Ueberspringen →
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// --- Applied ---
|
||||
if (status === 'applied') {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-12 text-center">
|
||||
<div className="text-5xl mb-4">✅</div>
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">Korrekturen uebernommen</h3>
|
||||
<p className="text-sm text-gray-500 dark:text-gray-400 mb-6">
|
||||
{accepted.size} von {changes.length} Korrekturen wurden angewendet.
|
||||
</p>
|
||||
<button onClick={onNext}
|
||||
className="px-6 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors font-medium">
|
||||
Weiter →
|
||||
</button>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// Active entry for highlighting on image
|
||||
const activeEntry = vocabEntries.find((_: WordEntry, i: number) => activeRowIndices.has(i))
|
||||
|
||||
const pct = progress ? Math.round((progress.current / progress.total) * 100) : 0
|
||||
|
||||
/** Handle inline edit of a cell in the overlay */
|
||||
const handleCellEdit = (cellId: string, rowIndex: number, newText: string | null) => {
|
||||
if (newText === null) return
|
||||
setCells(prev => prev.map(c => c.cell_id === cellId ? { ...c, text: newText } : c))
|
||||
// Also update vocabEntries if this cell maps to a known field
|
||||
const cell = cells.find(c => c.cell_id === cellId)
|
||||
if (cell) {
|
||||
const field = COL_TYPE_TO_FIELD[cell.col_type]
|
||||
if (field) {
|
||||
setVocabEntries(prev => prev.map((e, i) =>
|
||||
i === rowIndex ? { ...e, [field]: newText } : e
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// --- Ready / Running / Done: 2-column layout ---
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Header */}
|
||||
<div className="flex items-center justify-between">
|
||||
<div>
|
||||
<h3 className="text-base font-medium text-gray-700 dark:text-gray-300">
|
||||
Schritt 6: Korrektur
|
||||
</h3>
|
||||
<p className="text-xs text-gray-400 mt-0.5">
|
||||
{status === 'ready' && `${vocabEntries.length} Eintraege bereit zur Pruefung`}
|
||||
{status === 'running' && meta && `${meta.model} · ${meta.to_review} zu pruefen, ${meta.skipped} uebersprungen`}
|
||||
{status === 'done' && (
|
||||
<>
|
||||
{changes.length} Korrektur{changes.length !== 1 ? 'en' : ''} gefunden
|
||||
{meta && <> · {meta.skipped} uebersprungen</>}
|
||||
{' '}· {totalDuration}ms · {meta?.model}
|
||||
</>
|
||||
)}
|
||||
</p>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
{status === 'ready' && (
|
||||
<button onClick={runReview}
|
||||
className="px-5 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors text-sm font-medium">
|
||||
Korrektur starten
|
||||
</button>
|
||||
)}
|
||||
{status === 'running' && (
|
||||
<div className="flex items-center gap-2 text-sm text-teal-600 dark:text-teal-400">
|
||||
<div className="animate-spin rounded-full h-4 w-4 border-b-2 border-teal-500" />
|
||||
{progress ? `${progress.current}/${progress.total}` : 'Startet...'}
|
||||
</div>
|
||||
)}
|
||||
{status === 'done' && changes.length > 0 && (
|
||||
<button onClick={toggleAll}
|
||||
className="text-xs px-3 py-1.5 border border-gray-300 dark:border-gray-600 rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 transition-colors text-gray-600 dark:text-gray-400">
|
||||
{accepted.size === changes.length ? 'Keine' : 'Alle'} auswaehlen
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Progress bar (while running) */}
|
||||
{status === 'running' && progress && (
|
||||
<div className="space-y-1">
|
||||
<div className="flex justify-between text-xs text-gray-400">
|
||||
<span>{progress.current} / {progress.total} Eintraege geprueft</span>
|
||||
<span>{pct}%</span>
|
||||
</div>
|
||||
<div className="w-full bg-gray-200 dark:bg-gray-700 rounded-full h-2">
|
||||
<div className="bg-teal-500 h-2 rounded-full transition-all duration-500" style={{ width: `${pct}%` }} />
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* View mode toggle */}
|
||||
<div className="flex items-center gap-1">
|
||||
<button
|
||||
onClick={() => setViewMode('table')}
|
||||
className={`px-3 py-1.5 text-xs rounded-l-lg border transition-colors ${
|
||||
viewMode === 'table'
|
||||
? 'bg-teal-600 text-white border-teal-600'
|
||||
: 'bg-white dark:bg-gray-800 text-gray-600 dark:text-gray-400 border-gray-300 dark:border-gray-600 hover:bg-gray-50 dark:hover:bg-gray-700'
|
||||
}`}
|
||||
>
|
||||
Tabelle
|
||||
</button>
|
||||
<button
|
||||
onClick={() => setViewMode('overlay')}
|
||||
className={`px-3 py-1.5 text-xs rounded-r-lg border transition-colors ${
|
||||
viewMode === 'overlay'
|
||||
? 'bg-teal-600 text-white border-teal-600'
|
||||
: 'bg-white dark:bg-gray-800 text-gray-600 dark:text-gray-400 border-gray-300 dark:border-gray-600 hover:bg-gray-50 dark:hover:bg-gray-700'
|
||||
}`}
|
||||
>
|
||||
Overlay
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Overlay toolbar */}
|
||||
{viewMode === 'overlay' && (
|
||||
<div className="flex items-center gap-4 flex-wrap bg-gray-50 dark:bg-gray-800/50 rounded-lg px-3 py-2">
|
||||
<label className="flex items-center gap-2 text-xs text-gray-600 dark:text-gray-400">
|
||||
Schrift
|
||||
<input
|
||||
type="range" min={30} max={120} value={Math.round(fontScale * 100)}
|
||||
onChange={e => setFontScale(Number(e.target.value) / 100)}
|
||||
className="w-24 h-1 accent-teal-600"
|
||||
/>
|
||||
<span className="w-8 text-right font-mono">{Math.round(fontScale * 100)}%</span>
|
||||
</label>
|
||||
<label className="flex items-center gap-2 text-xs text-gray-600 dark:text-gray-400">
|
||||
Einrueckung
|
||||
<input
|
||||
type="range" min={0} max={20} step={0.5} value={leftPaddingPct}
|
||||
onChange={e => setLeftPaddingPct(Number(e.target.value))}
|
||||
className="w-24 h-1 accent-teal-600"
|
||||
/>
|
||||
<span className="w-8 text-right font-mono">{leftPaddingPct}%</span>
|
||||
</label>
|
||||
<button
|
||||
onClick={() => setGlobalBold(b => !b)}
|
||||
className={`px-2 py-1 text-xs rounded border transition-colors font-bold ${
|
||||
globalBold
|
||||
? 'bg-teal-600 text-white border-teal-600'
|
||||
: 'bg-white dark:bg-gray-700 text-gray-600 dark:text-gray-400 border-gray-300 dark:border-gray-600'
|
||||
}`}
|
||||
>
|
||||
B
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* 2-column layout: Image + Table/Overlay */}
|
||||
<div className={`grid gap-4 ${viewMode === 'overlay' ? 'grid-cols-2' : 'grid-cols-3'}`}>
|
||||
{/* Left: Dewarped Image with highlight overlay */}
|
||||
<div className="col-span-1">
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Originalbild
|
||||
</div>
|
||||
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900 relative sticky top-4">
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
<img
|
||||
src={dewarpedUrl}
|
||||
alt="Dewarped"
|
||||
className="w-full h-auto"
|
||||
onLoad={(e) => {
|
||||
const img = e.target as HTMLImageElement
|
||||
setImageNaturalSize({ w: img.naturalWidth, h: img.naturalHeight })
|
||||
}}
|
||||
/>
|
||||
{/* Highlight overlay for active row */}
|
||||
{activeEntry?.bbox && (
|
||||
<div
|
||||
className="absolute border-2 border-yellow-400 bg-yellow-400/20 pointer-events-none animate-pulse"
|
||||
style={{
|
||||
left: `${activeEntry.bbox.x}%`,
|
||||
top: `${activeEntry.bbox.y}%`,
|
||||
width: `${activeEntry.bbox.w}%`,
|
||||
height: `${activeEntry.bbox.h}%`,
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Right: Table or Overlay */}
|
||||
<div className={viewMode === 'table' ? 'col-span-2' : 'col-span-1'} ref={tableRef}>
|
||||
{viewMode === 'table' ? (
|
||||
<>
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
{columnsUsed.length === 1 && columnsUsed[0]?.type === 'column_text' ? 'Tabelle' : 'Vokabeltabelle'} ({vocabEntries.length} Eintraege)
|
||||
</div>
|
||||
<div className="border border-gray-200 dark:border-gray-700 rounded-lg overflow-hidden">
|
||||
<div className="max-h-[70vh] overflow-y-auto">
|
||||
<table className="w-full text-sm">
|
||||
<thead className="sticky top-0 z-10">
|
||||
<tr className="bg-gray-50 dark:bg-gray-800 border-b border-gray-200 dark:border-gray-700">
|
||||
<th className="px-2 py-2 text-left text-gray-500 dark:text-gray-400 font-medium w-10">#</th>
|
||||
{columnsUsed.length > 0 ? (
|
||||
columnsUsed.map((col, i) => {
|
||||
const field = COL_TYPE_TO_FIELD[col.type]
|
||||
if (!field) return null
|
||||
return (
|
||||
<th key={i} className={`px-2 py-2 text-left font-medium ${COL_TYPE_COLOR[col.type] || 'text-gray-500 dark:text-gray-400'}`}>
|
||||
{FIELD_LABELS[field] || field}
|
||||
</th>
|
||||
)
|
||||
})
|
||||
) : (
|
||||
<>
|
||||
<th className="px-2 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">EN</th>
|
||||
<th className="px-2 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">DE</th>
|
||||
<th className="px-2 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">Beispiel</th>
|
||||
</>
|
||||
)}
|
||||
<th className="px-2 py-2 text-center text-gray-500 dark:text-gray-400 font-medium w-16">Status</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{vocabEntries.map((entry, idx) => {
|
||||
const rowStatus = getRowStatus(idx)
|
||||
const rowChanges = correctedMap.get(idx)
|
||||
|
||||
const rowBg = {
|
||||
pending: '',
|
||||
active: 'bg-yellow-50 dark:bg-yellow-900/20',
|
||||
reviewed: '',
|
||||
corrected: 'bg-teal-50/50 dark:bg-teal-900/10',
|
||||
skipped: 'bg-gray-50 dark:bg-gray-800/50',
|
||||
}[rowStatus]
|
||||
|
||||
return (
|
||||
<tr
|
||||
key={idx}
|
||||
ref={rowStatus === 'active' ? activeRowRef : undefined}
|
||||
className={`border-b border-gray-100 dark:border-gray-700/50 ${rowBg} ${
|
||||
rowStatus === 'active' ? 'ring-1 ring-yellow-400 ring-inset' : ''
|
||||
}`}
|
||||
>
|
||||
<td className="px-2 py-1.5 text-gray-400 font-mono text-xs">{idx}</td>
|
||||
{columnsUsed.length > 0 ? (
|
||||
columnsUsed.map((col, i) => {
|
||||
const field = COL_TYPE_TO_FIELD[col.type]
|
||||
if (!field) return null
|
||||
const text = (entry as Record<string, unknown>)[field] as string || ''
|
||||
return (
|
||||
<td key={i} className="px-2 py-1.5 text-xs">
|
||||
<CellContent text={text} field={field} rowChanges={rowChanges} />
|
||||
</td>
|
||||
)
|
||||
})
|
||||
) : (
|
||||
<>
|
||||
<td className="px-2 py-1.5">
|
||||
<CellContent text={entry.english} field="english" rowChanges={rowChanges} />
|
||||
</td>
|
||||
<td className="px-2 py-1.5">
|
||||
<CellContent text={entry.german} field="german" rowChanges={rowChanges} />
|
||||
</td>
|
||||
<td className="px-2 py-1.5 text-xs">
|
||||
<CellContent text={entry.example} field="example" rowChanges={rowChanges} />
|
||||
</td>
|
||||
</>
|
||||
)}
|
||||
<td className="px-2 py-1.5 text-center">
|
||||
<StatusIcon status={rowStatus} />
|
||||
</td>
|
||||
</tr>
|
||||
)
|
||||
})}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Text-Rekonstruktion ({cells.filter(c => c.text).length} Zellen)
|
||||
</div>
|
||||
<div className="border border-gray-200 dark:border-gray-700 rounded-lg overflow-hidden bg-white dark:bg-white">
|
||||
<div
|
||||
ref={reconRef}
|
||||
className="relative"
|
||||
style={{
|
||||
aspectRatio: imageNaturalSize ? `${imageNaturalSize.w} / ${imageNaturalSize.h}` : '3 / 4',
|
||||
}}
|
||||
>
|
||||
{cells.map(cell => {
|
||||
if (!cell.bbox_pct || !cell.text) return null
|
||||
const col = colPositions.get(cell.col_index)
|
||||
const cellX = col?.x ?? cell.bbox_pct.x
|
||||
const cellW = col?.w ?? cell.bbox_pct.w
|
||||
const aspect = imageNaturalSize ? imageNaturalSize.h / imageNaturalSize.w : 4 / 3
|
||||
const containerH = reconWidth * aspect
|
||||
const cellHeightPx = containerH * (cell.bbox_pct.h / 100)
|
||||
|
||||
const wordPos = cellWordPositions.get(cell.cell_id)
|
||||
|
||||
// Pixel-analysed: render word-groups at detected positions
|
||||
if (wordPos) {
|
||||
return wordPos.map((wp, i) => {
|
||||
// Auto font-size from pixel analysis, scaled by user slider
|
||||
const autoFontPx = cellHeightPx * wp.fontRatio * fontScale
|
||||
const fs = Math.max(6, autoFontPx)
|
||||
return (
|
||||
<span
|
||||
key={`${cell.cell_id}_${i}`}
|
||||
className="absolute leading-none pointer-events-none select-none"
|
||||
style={{
|
||||
left: `${wp.xPct}%`,
|
||||
top: `${cell.bbox_pct.y}%`,
|
||||
width: `${wp.wPct}%`,
|
||||
height: `${cell.bbox_pct.h}%`,
|
||||
fontSize: `${fs}px`,
|
||||
fontWeight: globalBold ? 'bold' : (cell.is_bold ? 'bold' : 'normal'),
|
||||
fontFamily: "'Liberation Sans', Arial, sans-serif",
|
||||
display: 'flex',
|
||||
alignItems: 'center',
|
||||
whiteSpace: 'nowrap',
|
||||
overflow: 'visible',
|
||||
color: '#1a1a1a',
|
||||
}}
|
||||
>
|
||||
{wp.text}
|
||||
</span>
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
// Fallback: no pixel data — single span for entire cell
|
||||
const fontSize = Math.max(6, cellHeightPx * fontScale)
|
||||
return (
|
||||
<span
|
||||
key={cell.cell_id}
|
||||
className="absolute leading-none pointer-events-none select-none"
|
||||
style={{
|
||||
left: `${cellX}%`,
|
||||
top: `${cell.bbox_pct.y}%`,
|
||||
width: `${cellW}%`,
|
||||
height: `${cell.bbox_pct.h}%`,
|
||||
fontSize: `${fontSize}px`,
|
||||
fontWeight: globalBold ? 'bold' : (cell.is_bold ? 'bold' : 'normal'),
|
||||
paddingLeft: `${leftPaddingPct}%`,
|
||||
fontFamily: "'Liberation Sans', Arial, sans-serif",
|
||||
display: 'flex',
|
||||
alignItems: 'center',
|
||||
whiteSpace: 'pre',
|
||||
overflow: 'visible',
|
||||
color: '#1a1a1a',
|
||||
}}
|
||||
>
|
||||
{cell.text}
|
||||
</span>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Done state: summary + actions */}
|
||||
{status === 'done' && (
|
||||
<div className="space-y-4">
|
||||
{/* Summary */}
|
||||
<div className="bg-gray-50 dark:bg-gray-800/50 rounded-lg p-3 text-xs text-gray-500 dark:text-gray-400">
|
||||
{changes.length === 0 ? (
|
||||
<span>Keine Korrekturen noetig — alle Eintraege sind korrekt.</span>
|
||||
) : (
|
||||
<span>
|
||||
{changes.length} Korrektur{changes.length !== 1 ? 'en' : ''} gefunden ·{' '}
|
||||
{accepted.size} ausgewaehlt ·{' '}
|
||||
{meta?.skipped || 0} uebersprungen (Lautschrift) ·{' '}
|
||||
{totalDuration}ms
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Corrections detail list (if any) */}
|
||||
{changes.length > 0 && (
|
||||
<div className="border border-gray-200 dark:border-gray-700 rounded-lg overflow-hidden">
|
||||
<div className="bg-gray-50 dark:bg-gray-800 px-3 py-2 border-b border-gray-200 dark:border-gray-700">
|
||||
<span className="text-xs font-medium text-gray-600 dark:text-gray-400">
|
||||
Korrekturvorschlaege ({accepted.size}/{changes.length} ausgewaehlt)
|
||||
</span>
|
||||
</div>
|
||||
<table className="w-full text-sm">
|
||||
<thead>
|
||||
<tr className="bg-gray-50/50 dark:bg-gray-800/50 border-b border-gray-200 dark:border-gray-700">
|
||||
<th className="w-10 px-3 py-1.5 text-center">
|
||||
<input type="checkbox" checked={accepted.size === changes.length} onChange={toggleAll}
|
||||
className="rounded border-gray-300 dark:border-gray-600" />
|
||||
</th>
|
||||
<th className="px-2 py-1.5 text-left text-gray-500 dark:text-gray-400 font-medium text-xs">Zeile</th>
|
||||
<th className="px-2 py-1.5 text-left text-gray-500 dark:text-gray-400 font-medium text-xs">Feld</th>
|
||||
<th className="px-2 py-1.5 text-left text-gray-500 dark:text-gray-400 font-medium text-xs">Vorher</th>
|
||||
<th className="px-2 py-1.5 text-left text-gray-500 dark:text-gray-400 font-medium text-xs">Nachher</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{changes.map((change, idx) => (
|
||||
<tr key={idx} className={`border-b border-gray-100 dark:border-gray-700/50 ${
|
||||
accepted.has(idx) ? 'bg-teal-50/50 dark:bg-teal-900/10' : ''
|
||||
}`}>
|
||||
<td className="px-3 py-1.5 text-center">
|
||||
<input type="checkbox" checked={accepted.has(idx)} onChange={() => toggleChange(idx)}
|
||||
className="rounded border-gray-300 dark:border-gray-600" />
|
||||
</td>
|
||||
<td className="px-2 py-1.5 text-gray-500 dark:text-gray-400 font-mono text-xs">R{change.row_index}</td>
|
||||
<td className="px-2 py-1.5">
|
||||
<span className="text-xs px-1.5 py-0.5 rounded bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-400">
|
||||
{FIELD_LABELS[change.field] || change.field}
|
||||
</span>
|
||||
</td>
|
||||
<td className="px-2 py-1.5"><span className="line-through text-red-500 dark:text-red-400 text-xs">{change.old}</span></td>
|
||||
<td className="px-2 py-1.5"><span className="text-green-600 dark:text-green-400 font-medium text-xs">{change.new}</span></td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Actions */}
|
||||
<div className="flex items-center justify-between pt-2">
|
||||
<p className="text-xs text-gray-400">
|
||||
{changes.length > 0 ? `${accepted.size} von ${changes.length} ausgewaehlt` : ''}
|
||||
</p>
|
||||
<div className="flex gap-3">
|
||||
{changes.length > 0 && (
|
||||
<button onClick={onNext}
|
||||
className="px-4 py-2 text-sm border border-gray-300 dark:border-gray-600 rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 transition-colors text-gray-600 dark:text-gray-400">
|
||||
Alle ablehnen
|
||||
</button>
|
||||
)}
|
||||
{changes.length > 0 ? (
|
||||
<button onClick={applyChanges} disabled={applying || accepted.size === 0}
|
||||
className="px-5 py-2 text-sm bg-teal-600 text-white rounded-lg hover:bg-teal-700 disabled:opacity-50 disabled:cursor-not-allowed transition-colors font-medium">
|
||||
{applying ? 'Wird uebernommen...' : `${accepted.size} Korrektur${accepted.size !== 1 ? 'en' : ''} uebernehmen`}
|
||||
</button>
|
||||
) : (
|
||||
<button onClick={onNext}
|
||||
className="px-6 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors font-medium">
|
||||
Weiter →
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
/** Cell content with inline diff for corrections */
|
||||
function CellContent({ text, field, rowChanges }: {
|
||||
text: string
|
||||
field: string
|
||||
rowChanges?: LlmChange[]
|
||||
}) {
|
||||
const change = rowChanges?.find(c => c.field === field)
|
||||
|
||||
if (!text && !change) {
|
||||
return <span className="text-gray-300 dark:text-gray-600">—</span>
|
||||
}
|
||||
|
||||
if (change) {
|
||||
return (
|
||||
<span>
|
||||
<span className="line-through text-red-400 dark:text-red-500 text-xs mr-1">{change.old}</span>
|
||||
<span className="text-green-600 dark:text-green-400 font-medium text-xs">{change.new}</span>
|
||||
</span>
|
||||
)
|
||||
}
|
||||
|
||||
return <span className="text-gray-700 dark:text-gray-300 text-xs">{text}</span>
|
||||
}
|
||||
|
||||
/** Status icon for each row */
|
||||
function StatusIcon({ status }: { status: RowStatus }) {
|
||||
switch (status) {
|
||||
case 'pending':
|
||||
return <span className="text-gray-300 dark:text-gray-600 text-xs">—</span>
|
||||
case 'active':
|
||||
return (
|
||||
<span className="inline-block w-3 h-3 rounded-full bg-yellow-400 animate-pulse" title="Wird geprueft" />
|
||||
)
|
||||
case 'reviewed':
|
||||
return (
|
||||
<svg className="w-4 h-4 text-green-500 inline-block" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" d="M5 13l4 4L19 7" />
|
||||
</svg>
|
||||
)
|
||||
case 'corrected':
|
||||
return (
|
||||
<span className="inline-flex items-center px-1.5 py-0.5 rounded text-[10px] font-medium bg-teal-100 dark:bg-teal-900/30 text-teal-700 dark:text-teal-400">
|
||||
korr.
|
||||
</span>
|
||||
)
|
||||
case 'skipped':
|
||||
return (
|
||||
<span className="inline-flex items-center px-1.5 py-0.5 rounded text-[10px] font-medium bg-gray-100 dark:bg-gray-700 text-gray-500 dark:text-gray-400">
|
||||
skip
|
||||
</span>
|
||||
)
|
||||
}
|
||||
}
|
||||
247
admin-lehrer/components/ocr-pipeline/StepOrientation.tsx
Normal file
247
admin-lehrer/components/ocr-pipeline/StepOrientation.tsx
Normal file
@@ -0,0 +1,247 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useState } from 'react'
|
||||
import type { OrientationResult, SessionInfo } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
import { ImageCompareView } from './ImageCompareView'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
interface StepOrientationProps {
|
||||
sessionId?: string | null
|
||||
onNext: (sessionId: string) => void
|
||||
}
|
||||
|
||||
export function StepOrientation({ sessionId: existingSessionId, onNext }: StepOrientationProps) {
|
||||
const [session, setSession] = useState<SessionInfo | null>(null)
|
||||
const [orientationResult, setOrientationResult] = useState<OrientationResult | null>(null)
|
||||
const [uploading, setUploading] = useState(false)
|
||||
const [detecting, setDetecting] = useState(false)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [dragOver, setDragOver] = useState(false)
|
||||
const [sessionName, setSessionName] = useState('')
|
||||
|
||||
// Reload session data when navigating back
|
||||
useEffect(() => {
|
||||
if (!existingSessionId || session) return
|
||||
|
||||
const loadSession = async () => {
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${existingSessionId}`)
|
||||
if (!res.ok) return
|
||||
const data = await res.json()
|
||||
|
||||
const sessionInfo: SessionInfo = {
|
||||
session_id: data.session_id,
|
||||
filename: data.filename,
|
||||
image_width: data.image_width,
|
||||
image_height: data.image_height,
|
||||
original_image_url: `${KLAUSUR_API}${data.original_image_url}`,
|
||||
}
|
||||
setSession(sessionInfo)
|
||||
|
||||
if (data.orientation_result) {
|
||||
setOrientationResult(data.orientation_result)
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to reload session:', e)
|
||||
}
|
||||
}
|
||||
|
||||
loadSession()
|
||||
}, [existingSessionId, session])
|
||||
|
||||
const handleUpload = useCallback(async (file: File) => {
|
||||
setUploading(true)
|
||||
setError(null)
|
||||
setOrientationResult(null)
|
||||
|
||||
try {
|
||||
const formData = new FormData()
|
||||
formData.append('file', file)
|
||||
if (sessionName.trim()) {
|
||||
formData.append('name', sessionName.trim())
|
||||
}
|
||||
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`, {
|
||||
method: 'POST',
|
||||
body: formData,
|
||||
})
|
||||
|
||||
if (!res.ok) {
|
||||
const err = await res.json().catch(() => ({ detail: res.statusText }))
|
||||
throw new Error(err.detail || 'Upload fehlgeschlagen')
|
||||
}
|
||||
|
||||
const data: SessionInfo = await res.json()
|
||||
data.original_image_url = `${KLAUSUR_API}${data.original_image_url}`
|
||||
setSession(data)
|
||||
|
||||
// Auto-trigger orientation detection
|
||||
setDetecting(true)
|
||||
const orientRes = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${data.session_id}/orientation`, {
|
||||
method: 'POST',
|
||||
})
|
||||
|
||||
if (!orientRes.ok) {
|
||||
throw new Error('Orientierungserkennung fehlgeschlagen')
|
||||
}
|
||||
|
||||
const orientData = await orientRes.json()
|
||||
setOrientationResult({
|
||||
orientation_degrees: orientData.orientation_degrees,
|
||||
corrected: orientData.corrected,
|
||||
duration_seconds: orientData.duration_seconds,
|
||||
})
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||
} finally {
|
||||
setUploading(false)
|
||||
setDetecting(false)
|
||||
}
|
||||
}, [sessionName])
|
||||
|
||||
const handleDrop = useCallback((e: React.DragEvent) => {
|
||||
e.preventDefault()
|
||||
setDragOver(false)
|
||||
const file = e.dataTransfer.files[0]
|
||||
if (file) handleUpload(file)
|
||||
}, [handleUpload])
|
||||
|
||||
const handleFileInput = useCallback((e: React.ChangeEvent<HTMLInputElement>) => {
|
||||
const file = e.target.files?.[0]
|
||||
if (file) handleUpload(file)
|
||||
}, [handleUpload])
|
||||
|
||||
// Upload area (no session yet)
|
||||
if (!session) {
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Session name input */}
|
||||
<div>
|
||||
<label className="block text-sm font-medium text-gray-600 dark:text-gray-400 mb-1">
|
||||
Session-Name (optional)
|
||||
</label>
|
||||
<input
|
||||
type="text"
|
||||
value={sessionName}
|
||||
onChange={(e) => setSessionName(e.target.value)}
|
||||
placeholder="z.B. Unit 3 Seite 42"
|
||||
className="w-full max-w-sm px-3 py-2 text-sm border rounded-lg dark:bg-gray-800 dark:border-gray-600 dark:text-gray-200 focus:outline-none focus:ring-2 focus:ring-teal-500"
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div
|
||||
onDragOver={(e) => { e.preventDefault(); setDragOver(true) }}
|
||||
onDragLeave={() => setDragOver(false)}
|
||||
onDrop={handleDrop}
|
||||
className={`border-2 border-dashed rounded-xl p-12 text-center transition-colors ${
|
||||
dragOver
|
||||
? 'border-teal-400 bg-teal-50 dark:bg-teal-900/20'
|
||||
: 'border-gray-300 dark:border-gray-600 hover:border-teal-400'
|
||||
}`}
|
||||
>
|
||||
{uploading ? (
|
||||
<div className="text-gray-500">
|
||||
<div className="animate-spin inline-block w-8 h-8 border-2 border-teal-500 border-t-transparent rounded-full mb-3" />
|
||||
<p>Wird hochgeladen...</p>
|
||||
</div>
|
||||
) : (
|
||||
<>
|
||||
<div className="text-4xl mb-3">📄</div>
|
||||
<p className="text-gray-600 dark:text-gray-400 mb-2">
|
||||
PDF oder Bild hierher ziehen
|
||||
</p>
|
||||
<p className="text-sm text-gray-400 mb-4">oder</p>
|
||||
<label className="inline-block px-4 py-2 bg-teal-600 text-white rounded-lg cursor-pointer hover:bg-teal-700 transition-colors">
|
||||
Datei auswaehlen
|
||||
<input
|
||||
type="file"
|
||||
accept=".pdf,.png,.jpg,.jpeg,.tiff,.tif"
|
||||
onChange={handleFileInput}
|
||||
className="hidden"
|
||||
/>
|
||||
</label>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
{error && (
|
||||
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// Session active: show orientation result
|
||||
const orientedUrl = orientationResult
|
||||
? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${session.session_id}/image/oriented`
|
||||
: null
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Filename */}
|
||||
<div className="text-sm text-gray-500 dark:text-gray-400">
|
||||
Datei: <span className="font-medium text-gray-700 dark:text-gray-300">{session.filename}</span>
|
||||
{' '}({session.image_width} x {session.image_height} px)
|
||||
</div>
|
||||
|
||||
{/* Loading indicator */}
|
||||
{detecting && (
|
||||
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
|
||||
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
|
||||
Orientierung wird erkannt...
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Image comparison */}
|
||||
<ImageCompareView
|
||||
originalUrl={session.original_image_url}
|
||||
deskewedUrl={orientedUrl}
|
||||
showGrid={false}
|
||||
showBinarized={false}
|
||||
binarizedUrl={null}
|
||||
leftLabel="Original"
|
||||
rightLabel="Orientiert"
|
||||
/>
|
||||
|
||||
{/* Orientation result badge */}
|
||||
{orientationResult && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
||||
<div className="flex items-center gap-3 text-sm">
|
||||
{orientationResult.corrected ? (
|
||||
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-amber-50 dark:bg-amber-900/20 text-amber-700 dark:text-amber-400 text-xs font-medium">
|
||||
🔄 {orientationResult.orientation_degrees}° korrigiert
|
||||
</span>
|
||||
) : (
|
||||
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-green-50 dark:bg-green-900/20 text-green-700 dark:text-green-400 text-xs font-medium">
|
||||
✓ 0° (keine Drehung noetig)
|
||||
</span>
|
||||
)}
|
||||
<span className="text-gray-400 text-xs">
|
||||
{orientationResult.duration_seconds}s
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Next button */}
|
||||
{orientationResult && (
|
||||
<div className="flex justify-end">
|
||||
<button
|
||||
onClick={() => onNext(session.session_id)}
|
||||
className="px-6 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium transition-colors"
|
||||
>
|
||||
Weiter →
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{error && (
|
||||
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
1101
admin-lehrer/components/ocr-pipeline/StepReconstruction.tsx
Normal file
1101
admin-lehrer/components/ocr-pipeline/StepReconstruction.tsx
Normal file
File diff suppressed because it is too large
Load Diff
263
admin-lehrer/components/ocr-pipeline/StepRowDetection.tsx
Normal file
263
admin-lehrer/components/ocr-pipeline/StepRowDetection.tsx
Normal file
@@ -0,0 +1,263 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useState } from 'react'
|
||||
import type { RowResult, RowGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
interface StepRowDetectionProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
}
|
||||
|
||||
export function StepRowDetection({ sessionId, onNext }: StepRowDetectionProps) {
|
||||
const [rowResult, setRowResult] = useState<RowResult | null>(null)
|
||||
const [detecting, setDetecting] = useState(false)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [gtNotes, setGtNotes] = useState('')
|
||||
const [gtSaved, setGtSaved] = useState(false)
|
||||
|
||||
useEffect(() => {
|
||||
if (!sessionId) return
|
||||
|
||||
const fetchSession = async () => {
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
|
||||
if (res.ok) {
|
||||
const info = await res.json()
|
||||
if (info.row_result) {
|
||||
setRowResult(info.row_result)
|
||||
return
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to fetch session info:', e)
|
||||
}
|
||||
// No cached result — run auto
|
||||
runAutoDetection()
|
||||
}
|
||||
|
||||
fetchSession()
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [sessionId])
|
||||
|
||||
const runAutoDetection = useCallback(async () => {
|
||||
if (!sessionId) return
|
||||
setDetecting(true)
|
||||
setError(null)
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/rows`, {
|
||||
method: 'POST',
|
||||
})
|
||||
if (!res.ok) {
|
||||
const err = await res.json().catch(() => ({ detail: res.statusText }))
|
||||
throw new Error(err.detail || 'Zeilenerkennung fehlgeschlagen')
|
||||
}
|
||||
const data: RowResult = await res.json()
|
||||
setRowResult(data)
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||
} finally {
|
||||
setDetecting(false)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
const handleGroundTruth = useCallback(async (isCorrect: boolean) => {
|
||||
if (!sessionId) return
|
||||
const gt: RowGroundTruth = {
|
||||
is_correct: isCorrect,
|
||||
notes: gtNotes || undefined,
|
||||
}
|
||||
try {
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/ground-truth/rows`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(gt),
|
||||
})
|
||||
setGtSaved(true)
|
||||
} catch (e) {
|
||||
console.error('Ground truth save failed:', e)
|
||||
}
|
||||
}, [sessionId, gtNotes])
|
||||
|
||||
if (!sessionId) {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||
<div className="text-5xl mb-4">📏</div>
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||
Schritt 4: Zeilenerkennung
|
||||
</h3>
|
||||
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||
Bitte zuerst Schritte 1-3 abschliessen.
|
||||
</p>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const overlayUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/rows-overlay`
|
||||
const dewarpedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
|
||||
|
||||
const rowTypeColors: Record<string, string> = {
|
||||
header: 'bg-gray-200 dark:bg-gray-600 text-gray-700 dark:text-gray-300',
|
||||
content: 'bg-blue-100 dark:bg-blue-900/30 text-blue-700 dark:text-blue-300',
|
||||
footer: 'bg-gray-200 dark:bg-gray-600 text-gray-700 dark:text-gray-300',
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Loading */}
|
||||
{detecting && (
|
||||
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
|
||||
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
|
||||
Zeilenerkennung laeuft...
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Images: overlay vs clean */}
|
||||
<div className="grid grid-cols-2 gap-4">
|
||||
<div>
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Mit Zeilen-Overlay
|
||||
</div>
|
||||
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900">
|
||||
{rowResult ? (
|
||||
// eslint-disable-next-line @next/next/no-img-element
|
||||
<img
|
||||
src={`${overlayUrl}?t=${Date.now()}`}
|
||||
alt="Zeilen-Overlay"
|
||||
className="w-full h-auto"
|
||||
/>
|
||||
) : (
|
||||
<div className="aspect-[3/4] flex items-center justify-center text-gray-400 text-sm">
|
||||
{detecting ? 'Erkenne Zeilen...' : 'Keine Daten'}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
<div>
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Entzerrtes Bild
|
||||
</div>
|
||||
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900">
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
<img
|
||||
src={dewarpedUrl}
|
||||
alt="Entzerrt"
|
||||
className="w-full h-auto"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Row summary */}
|
||||
{rowResult && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-3">
|
||||
<div className="flex items-center justify-between">
|
||||
<h4 className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||
Ergebnis: {rowResult.total_rows} Zeilen erkannt
|
||||
</h4>
|
||||
<span className="text-xs text-gray-400">
|
||||
{rowResult.duration_seconds}s
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{/* Type summary badges */}
|
||||
<div className="flex gap-2">
|
||||
{Object.entries(rowResult.summary).map(([type, count]) => (
|
||||
<span
|
||||
key={type}
|
||||
className={`px-2 py-0.5 rounded text-xs font-medium ${rowTypeColors[type] || 'bg-gray-100 text-gray-600'}`}
|
||||
>
|
||||
{type}: {count}
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
|
||||
{/* Row list */}
|
||||
<div className="max-h-64 overflow-y-auto space-y-1">
|
||||
{rowResult.rows.map((row) => (
|
||||
<div
|
||||
key={row.index}
|
||||
className={`flex items-center gap-3 px-3 py-1.5 rounded text-xs font-mono ${
|
||||
row.row_type === 'header' || row.row_type === 'footer'
|
||||
? 'bg-gray-50 dark:bg-gray-700/50 text-gray-500'
|
||||
: 'text-gray-600 dark:text-gray-400'
|
||||
}`}
|
||||
>
|
||||
<span className="w-8 text-right text-gray-400">R{row.index}</span>
|
||||
<span className={`px-1.5 py-0.5 rounded text-[10px] uppercase font-semibold ${rowTypeColors[row.row_type] || ''}`}>
|
||||
{row.row_type}
|
||||
</span>
|
||||
<span>y={row.y}</span>
|
||||
<span>h={row.height}px</span>
|
||||
<span>{row.word_count} Woerter</span>
|
||||
{row.gap_before > 0 && (
|
||||
<span className="text-gray-400">gap={row.gap_before}px</span>
|
||||
)}
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Controls */}
|
||||
{rowResult && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-3">
|
||||
<div className="flex items-center gap-3">
|
||||
<button
|
||||
onClick={() => runAutoDetection()}
|
||||
disabled={detecting}
|
||||
className="px-3 py-1.5 text-xs border rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 dark:border-gray-600 disabled:opacity-50"
|
||||
>
|
||||
Erneut erkennen
|
||||
</button>
|
||||
|
||||
<div className="flex-1" />
|
||||
|
||||
{/* Ground truth */}
|
||||
{!gtSaved ? (
|
||||
<>
|
||||
<input
|
||||
type="text"
|
||||
placeholder="Notizen (optional)"
|
||||
value={gtNotes}
|
||||
onChange={(e) => setGtNotes(e.target.value)}
|
||||
className="px-2 py-1 text-xs border rounded dark:bg-gray-700 dark:border-gray-600 w-48"
|
||||
/>
|
||||
<button
|
||||
onClick={() => handleGroundTruth(true)}
|
||||
className="px-3 py-1.5 text-xs bg-green-600 text-white rounded-lg hover:bg-green-700"
|
||||
>
|
||||
Korrekt
|
||||
</button>
|
||||
<button
|
||||
onClick={() => handleGroundTruth(false)}
|
||||
className="px-3 py-1.5 text-xs bg-red-600 text-white rounded-lg hover:bg-red-700"
|
||||
>
|
||||
Fehlerhaft
|
||||
</button>
|
||||
</>
|
||||
) : (
|
||||
<span className="text-xs text-green-600 dark:text-green-400">
|
||||
Ground Truth gespeichert
|
||||
</span>
|
||||
)}
|
||||
|
||||
<button
|
||||
onClick={onNext}
|
||||
className="px-4 py-1.5 text-xs bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium"
|
||||
>
|
||||
Weiter
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{error && (
|
||||
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
619
admin-lehrer/components/ocr-pipeline/StepStructureDetection.tsx
Normal file
619
admin-lehrer/components/ocr-pipeline/StepStructureDetection.tsx
Normal file
@@ -0,0 +1,619 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useRef, useState } from 'react'
|
||||
import type { ExcludeRegion, StructureResult } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
interface StepStructureDetectionProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
}
|
||||
|
||||
const COLOR_HEX: Record<string, string> = {
|
||||
red: '#dc2626',
|
||||
orange: '#ea580c',
|
||||
yellow: '#ca8a04',
|
||||
green: '#16a34a',
|
||||
blue: '#2563eb',
|
||||
purple: '#9333ea',
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a mouse event on the image container to image-pixel coordinates.
|
||||
* The image uses object-contain inside an A4-ratio container, so we need
|
||||
* to account for letterboxing.
|
||||
*/
|
||||
function mouseToImageCoords(
|
||||
e: React.MouseEvent,
|
||||
containerEl: HTMLElement,
|
||||
imgWidth: number,
|
||||
imgHeight: number,
|
||||
): { x: number; y: number } | null {
|
||||
const rect = containerEl.getBoundingClientRect()
|
||||
const containerW = rect.width
|
||||
const containerH = rect.height
|
||||
|
||||
// object-contain: image is scaled to fit, centered
|
||||
const scaleX = containerW / imgWidth
|
||||
const scaleY = containerH / imgHeight
|
||||
const scale = Math.min(scaleX, scaleY)
|
||||
|
||||
const renderedW = imgWidth * scale
|
||||
const renderedH = imgHeight * scale
|
||||
const offsetX = (containerW - renderedW) / 2
|
||||
const offsetY = (containerH - renderedH) / 2
|
||||
|
||||
const relX = e.clientX - rect.left - offsetX
|
||||
const relY = e.clientY - rect.top - offsetY
|
||||
|
||||
if (relX < 0 || relY < 0 || relX > renderedW || relY > renderedH) {
|
||||
return null
|
||||
}
|
||||
|
||||
return {
|
||||
x: Math.round(relX / scale),
|
||||
y: Math.round(relY / scale),
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert image-pixel coordinates to container-relative percentages
|
||||
* for overlay positioning.
|
||||
*/
|
||||
function imageToOverlayPct(
|
||||
region: { x: number; y: number; w: number; h: number },
|
||||
containerW: number,
|
||||
containerH: number,
|
||||
imgWidth: number,
|
||||
imgHeight: number,
|
||||
): { left: string; top: string; width: string; height: string } {
|
||||
const scaleX = containerW / imgWidth
|
||||
const scaleY = containerH / imgHeight
|
||||
const scale = Math.min(scaleX, scaleY)
|
||||
|
||||
const renderedW = imgWidth * scale
|
||||
const renderedH = imgHeight * scale
|
||||
const offsetX = (containerW - renderedW) / 2
|
||||
const offsetY = (containerH - renderedH) / 2
|
||||
|
||||
const left = offsetX + region.x * scale
|
||||
const top = offsetY + region.y * scale
|
||||
const width = region.w * scale
|
||||
const height = region.h * scale
|
||||
|
||||
return {
|
||||
left: `${(left / containerW) * 100}%`,
|
||||
top: `${(top / containerH) * 100}%`,
|
||||
width: `${(width / containerW) * 100}%`,
|
||||
height: `${(height / containerH) * 100}%`,
|
||||
}
|
||||
}
|
||||
|
||||
export function StepStructureDetection({ sessionId, onNext }: StepStructureDetectionProps) {
|
||||
const [result, setResult] = useState<StructureResult | null>(null)
|
||||
const [detecting, setDetecting] = useState(false)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [hasRun, setHasRun] = useState(false)
|
||||
const [overlayTs, setOverlayTs] = useState(0)
|
||||
|
||||
// Exclude region drawing state
|
||||
const [excludeRegions, setExcludeRegions] = useState<ExcludeRegion[]>([])
|
||||
const [drawing, setDrawing] = useState(false)
|
||||
const [drawStart, setDrawStart] = useState<{ x: number; y: number } | null>(null)
|
||||
const [drawCurrent, setDrawCurrent] = useState<{ x: number; y: number } | null>(null)
|
||||
const [saving, setSaving] = useState(false)
|
||||
const [drawMode, setDrawMode] = useState(false)
|
||||
|
||||
const containerRef = useRef<HTMLDivElement>(null)
|
||||
const [containerSize, setContainerSize] = useState({ w: 0, h: 0 })
|
||||
|
||||
// Track container size for overlay positioning
|
||||
useEffect(() => {
|
||||
const el = containerRef.current
|
||||
if (!el) return
|
||||
const obs = new ResizeObserver((entries) => {
|
||||
for (const entry of entries) {
|
||||
setContainerSize({ w: entry.contentRect.width, h: entry.contentRect.height })
|
||||
}
|
||||
})
|
||||
obs.observe(el)
|
||||
return () => obs.disconnect()
|
||||
}, [])
|
||||
|
||||
// Auto-trigger detection on mount
|
||||
useEffect(() => {
|
||||
if (!sessionId || hasRun) return
|
||||
setHasRun(true)
|
||||
|
||||
const runDetection = async () => {
|
||||
setDetecting(true)
|
||||
setError(null)
|
||||
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/detect-structure`, {
|
||||
method: 'POST',
|
||||
})
|
||||
|
||||
if (!res.ok) {
|
||||
throw new Error('Strukturerkennung fehlgeschlagen')
|
||||
}
|
||||
|
||||
const data = await res.json()
|
||||
setResult(data)
|
||||
setExcludeRegions(data.exclude_regions || [])
|
||||
setOverlayTs(Date.now())
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||
} finally {
|
||||
setDetecting(false)
|
||||
}
|
||||
}
|
||||
|
||||
runDetection()
|
||||
}, [sessionId, hasRun])
|
||||
|
||||
const handleRerun = async () => {
|
||||
if (!sessionId) return
|
||||
setDetecting(true)
|
||||
setError(null)
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/detect-structure`, {
|
||||
method: 'POST',
|
||||
})
|
||||
if (!res.ok) throw new Error('Erneute Erkennung fehlgeschlagen')
|
||||
const data = await res.json()
|
||||
setResult(data)
|
||||
setExcludeRegions(data.exclude_regions || [])
|
||||
setOverlayTs(Date.now())
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||
} finally {
|
||||
setDetecting(false)
|
||||
}
|
||||
}
|
||||
|
||||
// Save exclude regions to backend
|
||||
const saveExcludeRegions = useCallback(async (regions: ExcludeRegion[]) => {
|
||||
if (!sessionId) return
|
||||
setSaving(true)
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/exclude-regions`, {
|
||||
method: 'PUT',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ regions }),
|
||||
})
|
||||
if (!res.ok) throw new Error('Speichern fehlgeschlagen')
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Speichern fehlgeschlagen')
|
||||
} finally {
|
||||
setSaving(false)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
// Mouse handlers for drawing exclude rectangles
|
||||
const handleMouseDown = useCallback((e: React.MouseEvent) => {
|
||||
if (!drawMode || !containerRef.current || !result) return
|
||||
const coords = mouseToImageCoords(e, containerRef.current, result.image_width, result.image_height)
|
||||
if (coords) {
|
||||
setDrawing(true)
|
||||
setDrawStart(coords)
|
||||
setDrawCurrent(coords)
|
||||
}
|
||||
}, [drawMode, result])
|
||||
|
||||
const handleMouseMove = useCallback((e: React.MouseEvent) => {
|
||||
if (!drawing || !containerRef.current || !result) return
|
||||
const coords = mouseToImageCoords(e, containerRef.current, result.image_width, result.image_height)
|
||||
if (coords) {
|
||||
setDrawCurrent(coords)
|
||||
}
|
||||
}, [drawing, result])
|
||||
|
||||
const handleMouseUp = useCallback(() => {
|
||||
if (!drawing || !drawStart || !drawCurrent) {
|
||||
setDrawing(false)
|
||||
return
|
||||
}
|
||||
|
||||
const x = Math.min(drawStart.x, drawCurrent.x)
|
||||
const y = Math.min(drawStart.y, drawCurrent.y)
|
||||
const w = Math.abs(drawCurrent.x - drawStart.x)
|
||||
const h = Math.abs(drawCurrent.y - drawStart.y)
|
||||
|
||||
// Minimum size to avoid accidental clicks
|
||||
if (w > 10 && h > 10) {
|
||||
const newRegion: ExcludeRegion = { x, y, w, h, label: `Bereich ${excludeRegions.length + 1}` }
|
||||
const updated = [...excludeRegions, newRegion]
|
||||
setExcludeRegions(updated)
|
||||
saveExcludeRegions(updated)
|
||||
}
|
||||
|
||||
setDrawing(false)
|
||||
setDrawStart(null)
|
||||
setDrawCurrent(null)
|
||||
}, [drawing, drawStart, drawCurrent, excludeRegions, saveExcludeRegions])
|
||||
|
||||
const handleDeleteRegion = useCallback(async (index: number) => {
|
||||
if (!sessionId) return
|
||||
setSaving(true)
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/exclude-regions/${index}`, {
|
||||
method: 'DELETE',
|
||||
})
|
||||
if (!res.ok) throw new Error('Loeschen fehlgeschlagen')
|
||||
const updated = excludeRegions.filter((_, i) => i !== index)
|
||||
setExcludeRegions(updated)
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Loeschen fehlgeschlagen')
|
||||
} finally {
|
||||
setSaving(false)
|
||||
}
|
||||
}, [sessionId, excludeRegions])
|
||||
|
||||
if (!sessionId) {
|
||||
return <div className="text-sm text-gray-400">Keine Session ausgewaehlt.</div>
|
||||
}
|
||||
|
||||
const croppedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
|
||||
const overlayUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/structure-overlay${overlayTs ? `?t=${overlayTs}` : ''}`
|
||||
|
||||
// Current drag rectangle in image coords
|
||||
const dragRect = drawing && drawStart && drawCurrent
|
||||
? {
|
||||
x: Math.min(drawStart.x, drawCurrent.x),
|
||||
y: Math.min(drawStart.y, drawCurrent.y),
|
||||
w: Math.abs(drawCurrent.x - drawStart.x),
|
||||
h: Math.abs(drawCurrent.y - drawStart.y),
|
||||
}
|
||||
: null
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Loading indicator */}
|
||||
{detecting && (
|
||||
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
|
||||
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
|
||||
Dokumentstruktur wird analysiert...
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Draw mode toggle */}
|
||||
{result && (
|
||||
<div className="flex items-center gap-3">
|
||||
<button
|
||||
onClick={() => setDrawMode(!drawMode)}
|
||||
className={`px-4 py-2 text-sm rounded-lg font-medium transition-colors ${
|
||||
drawMode
|
||||
? 'bg-red-600 text-white hover:bg-red-700'
|
||||
: 'bg-gray-100 dark:bg-gray-700 text-gray-700 dark:text-gray-300 hover:bg-gray-200 dark:hover:bg-gray-600'
|
||||
}`}
|
||||
>
|
||||
{drawMode ? 'Zeichnen beenden' : 'Ausschlussbereich zeichnen'}
|
||||
</button>
|
||||
{drawMode && (
|
||||
<span className="text-xs text-red-600 dark:text-red-400">
|
||||
Rechteck auf dem Bild zeichnen um Bereiche von der OCR-Erkennung auszuschliessen
|
||||
</span>
|
||||
)}
|
||||
{saving && (
|
||||
<span className="text-xs text-gray-400">Speichern...</span>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Two-column image comparison */}
|
||||
<div className="grid grid-cols-1 lg:grid-cols-2 gap-4">
|
||||
{/* Left: Original document with exclude region drawing */}
|
||||
<div className="space-y-2">
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
|
||||
Original {excludeRegions.length > 0 && `(${excludeRegions.length} Ausschlussbereich${excludeRegions.length !== 1 ? 'e' : ''})`}
|
||||
</div>
|
||||
<div
|
||||
ref={containerRef}
|
||||
className={`relative bg-gray-100 dark:bg-gray-800 rounded-lg overflow-hidden ${
|
||||
drawMode ? 'cursor-crosshair' : ''
|
||||
}`}
|
||||
style={{ aspectRatio: '210/297' }}
|
||||
onMouseDown={handleMouseDown}
|
||||
onMouseMove={handleMouseMove}
|
||||
onMouseUp={handleMouseUp}
|
||||
onMouseLeave={() => {
|
||||
if (drawing) {
|
||||
handleMouseUp()
|
||||
}
|
||||
}}
|
||||
>
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
<img
|
||||
src={croppedUrl}
|
||||
alt="Originaldokument"
|
||||
className="w-full h-full object-contain pointer-events-none"
|
||||
draggable={false}
|
||||
onError={(e) => {
|
||||
(e.target as HTMLImageElement).style.display = 'none'
|
||||
}}
|
||||
/>
|
||||
|
||||
{/* Saved exclude regions overlay */}
|
||||
{result && containerSize.w > 0 && excludeRegions.map((region, i) => {
|
||||
const pos = imageToOverlayPct(region, containerSize.w, containerSize.h, result.image_width, result.image_height)
|
||||
return (
|
||||
<div
|
||||
key={i}
|
||||
className="absolute border-2 border-red-500 bg-red-500/20 group"
|
||||
style={pos}
|
||||
>
|
||||
<div className="absolute -top-5 left-0 flex items-center gap-1 opacity-0 group-hover:opacity-100 transition-opacity">
|
||||
<span className="text-[10px] bg-red-600 text-white px-1 rounded whitespace-nowrap">
|
||||
{region.label || `Bereich ${i + 1}`}
|
||||
</span>
|
||||
<button
|
||||
onClick={(e) => { e.stopPropagation(); handleDeleteRegion(i) }}
|
||||
className="w-4 h-4 bg-red-600 text-white rounded-full text-[10px] flex items-center justify-center hover:bg-red-700"
|
||||
>
|
||||
x
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
|
||||
{/* Current drag rectangle */}
|
||||
{dragRect && result && containerSize.w > 0 && (() => {
|
||||
const pos = imageToOverlayPct(dragRect, containerSize.w, containerSize.h, result.image_width, result.image_height)
|
||||
return (
|
||||
<div
|
||||
className="absolute border-2 border-red-500 border-dashed bg-red-500/15"
|
||||
style={pos}
|
||||
/>
|
||||
)
|
||||
})()}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Right: Structure overlay */}
|
||||
<div className="space-y-2">
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
|
||||
Erkannte Struktur
|
||||
</div>
|
||||
<div className="relative bg-gray-100 dark:bg-gray-800 rounded-lg overflow-hidden" style={{ aspectRatio: '210/297' }}>
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
<img
|
||||
src={overlayUrl}
|
||||
alt="Strukturerkennung"
|
||||
className="w-full h-full object-contain"
|
||||
onError={(e) => {
|
||||
(e.target as HTMLImageElement).style.display = 'none'
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Exclude regions list */}
|
||||
{excludeRegions.length > 0 && (
|
||||
<div className="bg-red-50 dark:bg-red-900/10 rounded-lg border border-red-200 dark:border-red-800 p-3">
|
||||
<h4 className="text-xs font-medium text-red-700 dark:text-red-400 mb-2">
|
||||
Ausschlussbereiche ({excludeRegions.length}) — Woerter in diesen Bereichen werden nicht erkannt
|
||||
</h4>
|
||||
<div className="space-y-1">
|
||||
{excludeRegions.map((region, i) => (
|
||||
<div key={i} className="flex items-center gap-3 text-xs">
|
||||
<span className="w-3 h-3 rounded-sm flex-shrink-0 bg-red-500/30 border border-red-500" />
|
||||
<span className="text-red-700 dark:text-red-400 font-medium">
|
||||
{region.label || `Bereich ${i + 1}`}
|
||||
</span>
|
||||
<span className="font-mono text-red-600/70 dark:text-red-400/70">
|
||||
{region.w}x{region.h}px @ ({region.x}, {region.y})
|
||||
</span>
|
||||
<button
|
||||
onClick={() => handleDeleteRegion(i)}
|
||||
className="ml-auto text-red-500 hover:text-red-700 dark:hover:text-red-300"
|
||||
>
|
||||
Entfernen
|
||||
</button>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Result info */}
|
||||
{result && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4 space-y-3">
|
||||
{/* Summary badges */}
|
||||
<div className="flex flex-wrap items-center gap-3 text-sm">
|
||||
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-teal-50 dark:bg-teal-900/20 text-teal-700 dark:text-teal-400 text-xs font-medium">
|
||||
{result.zones.length} Zone(n)
|
||||
</span>
|
||||
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-amber-50 dark:bg-amber-900/20 text-amber-700 dark:text-amber-400 text-xs font-medium">
|
||||
{result.boxes.length} Box(en)
|
||||
</span>
|
||||
{result.graphics && result.graphics.length > 0 && (
|
||||
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-purple-50 dark:bg-purple-900/20 text-purple-700 dark:text-purple-400 text-xs font-medium">
|
||||
{result.graphics.length} Grafik(en)
|
||||
</span>
|
||||
)}
|
||||
{result.has_words && (
|
||||
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-blue-50 dark:bg-blue-900/20 text-blue-700 dark:text-blue-400 text-xs font-medium">
|
||||
{result.word_count} Woerter
|
||||
</span>
|
||||
)}
|
||||
{excludeRegions.length > 0 && (
|
||||
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-red-50 dark:bg-red-900/20 text-red-700 dark:text-red-400 text-xs font-medium">
|
||||
{excludeRegions.length} Ausschluss
|
||||
</span>
|
||||
)}
|
||||
{(result.border_ghosts_removed ?? 0) > 0 && (
|
||||
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-red-50 dark:bg-red-900/20 text-red-700 dark:text-red-400 text-xs font-medium">
|
||||
{result.border_ghosts_removed} Rahmenlinien entfernt
|
||||
</span>
|
||||
)}
|
||||
<span className="text-gray-400 text-xs ml-auto">
|
||||
{result.image_width}x{result.image_height}px | {result.duration_seconds}s
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{/* Boxes detail */}
|
||||
{result.boxes.length > 0 && (
|
||||
<div>
|
||||
<h4 className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-2">Erkannte Boxen</h4>
|
||||
<div className="space-y-1.5">
|
||||
{result.boxes.map((box, i) => (
|
||||
<div key={i} className="flex items-center gap-3 text-xs">
|
||||
<span
|
||||
className="w-3 h-3 rounded-sm flex-shrink-0 border border-gray-300 dark:border-gray-600"
|
||||
style={{ backgroundColor: box.bg_color_hex || '#6b7280' }}
|
||||
/>
|
||||
<span className="text-gray-600 dark:text-gray-400">
|
||||
Box {i + 1}:
|
||||
</span>
|
||||
<span className="font-mono text-gray-500">
|
||||
{box.w}x{box.h}px @ ({box.x}, {box.y})
|
||||
</span>
|
||||
{box.bg_color_name && box.bg_color_name !== 'unknown' && box.bg_color_name !== 'white' && (
|
||||
<span className="px-1.5 py-0.5 rounded bg-gray-100 dark:bg-gray-700 text-gray-500">
|
||||
{box.bg_color_name}
|
||||
</span>
|
||||
)}
|
||||
{box.border_thickness > 0 && (
|
||||
<span className="text-gray-400">
|
||||
Rahmen: {box.border_thickness}px
|
||||
</span>
|
||||
)}
|
||||
<span className="text-gray-400">
|
||||
{Math.round(box.confidence * 100)}%
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Zones detail */}
|
||||
<div>
|
||||
<h4 className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-2">Seitenzonen</h4>
|
||||
<div className="flex flex-wrap gap-2">
|
||||
{result.zones.map((zone) => (
|
||||
<span
|
||||
key={zone.index}
|
||||
className={`inline-flex items-center gap-1 px-2 py-1 rounded text-[11px] font-medium ${
|
||||
zone.zone_type === 'box'
|
||||
? 'bg-amber-50 dark:bg-amber-900/20 text-amber-700 dark:text-amber-300 border border-amber-200 dark:border-amber-800'
|
||||
: 'bg-gray-50 dark:bg-gray-800 text-gray-500 dark:text-gray-400 border border-gray-200 dark:border-gray-700'
|
||||
}`}
|
||||
>
|
||||
{zone.zone_type === 'box' ? 'Box' : 'Inhalt'} {zone.index}
|
||||
<span className="text-[10px] font-normal opacity-70">
|
||||
({zone.w}x{zone.h})
|
||||
</span>
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Graphics / visual elements */}
|
||||
{result.graphics && result.graphics.length > 0 && (
|
||||
<div>
|
||||
<h4 className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-2">
|
||||
Graphische Elemente ({result.graphics.length})
|
||||
</h4>
|
||||
{/* Summary by shape */}
|
||||
{(() => {
|
||||
const shapeCounts: Record<string, number> = {}
|
||||
for (const g of result.graphics) {
|
||||
shapeCounts[g.shape] = (shapeCounts[g.shape] || 0) + 1
|
||||
}
|
||||
return (
|
||||
<div className="flex flex-wrap gap-2 mb-2">
|
||||
{Object.entries(shapeCounts)
|
||||
.sort(([, a], [, b]) => b - a)
|
||||
.map(([shape, count]) => (
|
||||
<span
|
||||
key={shape}
|
||||
className="inline-flex items-center gap-1 px-2 py-1 rounded text-[11px] bg-purple-50 dark:bg-purple-900/20 text-purple-700 dark:text-purple-300 border border-purple-200 dark:border-purple-800"
|
||||
>
|
||||
{shape === 'arrow' ? '→' : shape === 'circle' ? '●' : shape === 'line' ? '─' : shape === 'exclamation' ? '❗' : shape === 'dot' ? '•' : shape === 'illustration' ? '🖼' : '◆'}
|
||||
{' '}{shape} <span className="font-semibold">x{count}</span>
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
)
|
||||
})()}
|
||||
{/* Individual graphics list */}
|
||||
<div className="space-y-1.5 max-h-40 overflow-y-auto">
|
||||
{result.graphics.map((g, i) => (
|
||||
<div key={i} className="flex items-center gap-3 text-xs">
|
||||
<span
|
||||
className="w-3 h-3 rounded-full flex-shrink-0 border border-gray-300 dark:border-gray-600"
|
||||
style={{ backgroundColor: g.color_hex || '#6b7280' }}
|
||||
/>
|
||||
<span className="text-gray-600 dark:text-gray-400 font-medium min-w-[60px]">
|
||||
{g.shape}
|
||||
</span>
|
||||
<span className="font-mono text-gray-500">
|
||||
{g.w}x{g.h}px @ ({g.x}, {g.y})
|
||||
</span>
|
||||
<span className="text-gray-400">
|
||||
{g.color_name}
|
||||
</span>
|
||||
<span className="text-gray-400">
|
||||
{Math.round(g.confidence * 100)}%
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Color regions */}
|
||||
{Object.keys(result.color_pixel_counts).length > 0 && (
|
||||
<div>
|
||||
<h4 className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-2">Erkannte Farben</h4>
|
||||
<div className="flex flex-wrap gap-2">
|
||||
{Object.entries(result.color_pixel_counts)
|
||||
.sort(([, a], [, b]) => b - a)
|
||||
.map(([name, count]) => (
|
||||
<span key={name} className="inline-flex items-center gap-1.5 px-2 py-1 rounded text-[11px] bg-gray-50 dark:bg-gray-800 border border-gray-200 dark:border-gray-700">
|
||||
<span
|
||||
className="w-2.5 h-2.5 rounded-full"
|
||||
style={{ backgroundColor: COLOR_HEX[name] || '#6b7280' }}
|
||||
/>
|
||||
<span className="text-gray-600 dark:text-gray-400">{name}</span>
|
||||
<span className="text-gray-400 text-[10px]">{count.toLocaleString()}px</span>
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Action buttons */}
|
||||
{result && (
|
||||
<div className="flex justify-between">
|
||||
<button
|
||||
onClick={handleRerun}
|
||||
disabled={detecting}
|
||||
className="px-4 py-2 text-sm text-gray-500 hover:text-gray-700 dark:text-gray-400 dark:hover:text-gray-200 transition-colors disabled:opacity-50"
|
||||
>
|
||||
Erneut erkennen
|
||||
</button>
|
||||
<button
|
||||
onClick={onNext}
|
||||
className="px-6 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium transition-colors"
|
||||
>
|
||||
Weiter →
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{error && (
|
||||
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
936
admin-lehrer/components/ocr-pipeline/StepWordRecognition.tsx
Normal file
936
admin-lehrer/components/ocr-pipeline/StepWordRecognition.tsx
Normal file
@@ -0,0 +1,936 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useRef, useState } from 'react'
|
||||
import type { GridResult, GridCell, WordEntry, WordGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
/** Render text with \n as line breaks */
|
||||
function MultilineText({ text }: { text: string }) {
|
||||
if (!text) return <span className="text-gray-300 dark:text-gray-600">—</span>
|
||||
const lines = text.split('\n')
|
||||
if (lines.length === 1) return <>{text}</>
|
||||
return <>{lines.map((line, i) => (
|
||||
<span key={i}>{line}{i < lines.length - 1 && <br />}</span>
|
||||
))}</>
|
||||
}
|
||||
|
||||
/** Column type → human-readable header */
|
||||
function colTypeLabel(colType: string): string {
|
||||
const labels: Record<string, string> = {
|
||||
column_en: 'English',
|
||||
column_de: 'Deutsch',
|
||||
column_example: 'Example',
|
||||
column_text: 'Text',
|
||||
column_marker: 'Marker',
|
||||
page_ref: 'Seite',
|
||||
}
|
||||
return labels[colType] || colType.replace('column_', '')
|
||||
}
|
||||
|
||||
/** Column type → color class */
|
||||
function colTypeColor(colType: string): string {
|
||||
const colors: Record<string, string> = {
|
||||
column_en: 'text-blue-600 dark:text-blue-400',
|
||||
column_de: 'text-green-600 dark:text-green-400',
|
||||
column_example: 'text-orange-600 dark:text-orange-400',
|
||||
column_text: 'text-purple-600 dark:text-purple-400',
|
||||
column_marker: 'text-gray-500 dark:text-gray-400',
|
||||
}
|
||||
return colors[colType] || 'text-gray-600 dark:text-gray-400'
|
||||
}
|
||||
|
||||
interface StepWordRecognitionProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
goToStep: (step: number) => void
|
||||
/** Skip _heal_row_gaps in cell grid (better overlay positioning) */
|
||||
skipHealGaps?: boolean
|
||||
}
|
||||
|
||||
export function StepWordRecognition({ sessionId, onNext, goToStep, skipHealGaps = false }: StepWordRecognitionProps) {
|
||||
const [gridResult, setGridResult] = useState<GridResult | null>(null)
|
||||
const [detecting, setDetecting] = useState(false)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [gtNotes, setGtNotes] = useState('')
|
||||
const [gtSaved, setGtSaved] = useState(false)
|
||||
|
||||
// Step-through labeling state
|
||||
const [activeIndex, setActiveIndex] = useState(0)
|
||||
const [editedEntries, setEditedEntries] = useState<WordEntry[]>([])
|
||||
const [editedCells, setEditedCells] = useState<GridCell[]>([])
|
||||
const [mode, setMode] = useState<'overview' | 'labeling'>('overview')
|
||||
const [ocrEngine, setOcrEngine] = useState<'auto' | 'tesseract' | 'rapid' | 'paddle'>('auto')
|
||||
const [usedEngine, setUsedEngine] = useState<string>('')
|
||||
const [pronunciation, setPronunciation] = useState<'british' | 'american'>('british')
|
||||
const [gridMethod, setGridMethod] = useState<'v2' | 'words_first'>('v2')
|
||||
|
||||
// Streaming progress state
|
||||
const [streamProgress, setStreamProgress] = useState<{ current: number; total: number } | null>(null)
|
||||
|
||||
const enRef = useRef<HTMLInputElement>(null)
|
||||
const tableEndRef = useRef<HTMLDivElement>(null)
|
||||
|
||||
const isVocab = gridResult?.layout === 'vocab'
|
||||
|
||||
useEffect(() => {
|
||||
if (!sessionId) return
|
||||
// Always run fresh detection — word-lookup is fast (~0.03s)
|
||||
// and avoids stale cached results from previous pipeline versions.
|
||||
runAutoDetection()
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [sessionId])
|
||||
|
||||
const applyGridResult = (data: GridResult) => {
|
||||
setGridResult(data)
|
||||
setUsedEngine(data.ocr_engine || '')
|
||||
if (data.layout === 'vocab' && data.entries) {
|
||||
initEntries(data.entries)
|
||||
}
|
||||
if (data.cells) {
|
||||
setEditedCells(data.cells.map(c => ({ ...c, status: c.status || 'pending' })))
|
||||
}
|
||||
}
|
||||
|
||||
const initEntries = (entries: WordEntry[]) => {
|
||||
setEditedEntries(entries.map(e => ({ ...e, status: e.status || 'pending' })))
|
||||
setActiveIndex(0)
|
||||
}
|
||||
|
||||
const runAutoDetection = useCallback(async (engine?: string) => {
|
||||
if (!sessionId) return
|
||||
const eng = engine || ocrEngine
|
||||
setDetecting(true)
|
||||
setError(null)
|
||||
setStreamProgress(null)
|
||||
setEditedCells([])
|
||||
setEditedEntries([])
|
||||
setGridResult(null)
|
||||
|
||||
try {
|
||||
// PP-OCRv5 forces words_first on the backend, so align frontend accordingly
|
||||
const effectiveGridMethod = eng === 'paddle' ? 'words_first' : gridMethod
|
||||
const useStream = effectiveGridMethod === 'v2'
|
||||
|
||||
// Retry once if initial request fails (e.g. after container restart,
|
||||
// session cache may not be warm yet when navigating via wizard)
|
||||
let res: Response | null = null
|
||||
for (let attempt = 0; attempt < 2; attempt++) {
|
||||
res = await fetch(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/words?stream=${useStream ? 'true' : 'false'}&engine=${eng}&pronunciation=${pronunciation}${skipHealGaps ? '&skip_heal_gaps=true' : ''}&grid_method=${effectiveGridMethod}`,
|
||||
{ method: 'POST' },
|
||||
)
|
||||
if (res.ok) break
|
||||
if (attempt === 0 && (res.status === 400 || res.status === 404)) {
|
||||
// Wait briefly for cache to warm up, then retry
|
||||
await new Promise(r => setTimeout(r, 2000))
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
if (!res || !res.ok) {
|
||||
const err = await res?.json().catch(() => ({ detail: res?.statusText })) || { detail: 'Worterkennung fehlgeschlagen' }
|
||||
throw new Error(err.detail || 'Worterkennung fehlgeschlagen')
|
||||
}
|
||||
|
||||
// words_first / pp-ocrv5 returns plain JSON (no streaming)
|
||||
if (!useStream) {
|
||||
const data = await res.json() as GridResult
|
||||
applyGridResult(data)
|
||||
return
|
||||
}
|
||||
|
||||
const reader = res.body!.getReader()
|
||||
const decoder = new TextDecoder()
|
||||
let buffer = ''
|
||||
let streamLayout: string | null = null
|
||||
let streamColumnsUsed: GridResult['columns_used'] = []
|
||||
let streamGridShape: GridResult['grid_shape'] | null = null
|
||||
let streamCells: GridCell[] = []
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read()
|
||||
if (done) break
|
||||
buffer += decoder.decode(value, { stream: true })
|
||||
|
||||
// Parse SSE events (separated by \n\n)
|
||||
while (buffer.includes('\n\n')) {
|
||||
const idx = buffer.indexOf('\n\n')
|
||||
const chunk = buffer.slice(0, idx).trim()
|
||||
buffer = buffer.slice(idx + 2)
|
||||
|
||||
if (!chunk.startsWith('data: ')) continue
|
||||
const dataStr = chunk.slice(6) // strip "data: "
|
||||
|
||||
let event: any
|
||||
try {
|
||||
event = JSON.parse(dataStr)
|
||||
} catch {
|
||||
continue
|
||||
}
|
||||
|
||||
if (event.type === 'meta') {
|
||||
streamLayout = event.layout || 'generic'
|
||||
streamGridShape = event.grid_shape || null
|
||||
// Show partial grid result so UI renders structure
|
||||
setGridResult(prev => ({
|
||||
...prev,
|
||||
layout: event.layout || 'generic',
|
||||
grid_shape: event.grid_shape,
|
||||
columns_used: [],
|
||||
cells: [],
|
||||
summary: { total_cells: event.grid_shape?.total_cells || 0, non_empty_cells: 0, low_confidence: 0 },
|
||||
duration_seconds: 0,
|
||||
ocr_engine: '',
|
||||
} as GridResult))
|
||||
}
|
||||
|
||||
if (event.type === 'columns') {
|
||||
streamColumnsUsed = event.columns_used || []
|
||||
setGridResult(prev => prev ? { ...prev, columns_used: streamColumnsUsed } : prev)
|
||||
}
|
||||
|
||||
if (event.type === 'cell') {
|
||||
const cell: GridCell = { ...event.cell, status: 'pending' }
|
||||
streamCells = [...streamCells, cell]
|
||||
setEditedCells(streamCells)
|
||||
setStreamProgress(event.progress)
|
||||
// Auto-scroll table to bottom
|
||||
setTimeout(() => tableEndRef.current?.scrollIntoView({ behavior: 'smooth', block: 'nearest' }), 16)
|
||||
}
|
||||
|
||||
if (event.type === 'complete') {
|
||||
// Build final GridResult
|
||||
const finalResult: GridResult = {
|
||||
cells: streamCells,
|
||||
grid_shape: streamGridShape || { rows: 0, cols: 0, total_cells: streamCells.length },
|
||||
columns_used: streamColumnsUsed,
|
||||
layout: streamLayout || 'generic',
|
||||
image_width: 0,
|
||||
image_height: 0,
|
||||
duration_seconds: event.duration_seconds || 0,
|
||||
ocr_engine: event.ocr_engine || '',
|
||||
summary: event.summary || {},
|
||||
}
|
||||
|
||||
// If vocab: apply post-processed entries from complete event
|
||||
if (event.vocab_entries) {
|
||||
finalResult.entries = event.vocab_entries
|
||||
finalResult.vocab_entries = event.vocab_entries
|
||||
finalResult.entry_count = event.vocab_entries.length
|
||||
}
|
||||
|
||||
applyGridResult(finalResult)
|
||||
setUsedEngine(event.ocr_engine || '')
|
||||
setStreamProgress(null)
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||
} finally {
|
||||
setDetecting(false)
|
||||
}
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [sessionId, ocrEngine, pronunciation, gridMethod])
|
||||
|
||||
const handleGroundTruth = useCallback(async (isCorrect: boolean) => {
|
||||
if (!sessionId) return
|
||||
const gt: WordGroundTruth = {
|
||||
is_correct: isCorrect,
|
||||
corrected_entries: isCorrect ? undefined : (isVocab ? editedEntries : undefined),
|
||||
notes: gtNotes || undefined,
|
||||
}
|
||||
try {
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/ground-truth/words`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(gt),
|
||||
})
|
||||
setGtSaved(true)
|
||||
} catch (e) {
|
||||
console.error('Ground truth save failed:', e)
|
||||
}
|
||||
}, [sessionId, gtNotes, editedEntries, isVocab])
|
||||
|
||||
// Vocab mode: update entry field
|
||||
const updateEntry = (index: number, field: 'english' | 'german' | 'example', value: string) => {
|
||||
setEditedEntries(prev => prev.map((e, i) =>
|
||||
i === index ? { ...e, [field]: value, status: 'edited' as const } : e
|
||||
))
|
||||
}
|
||||
|
||||
// Generic mode: update cell text
|
||||
const updateCell = (cellId: string, value: string) => {
|
||||
setEditedCells(prev => prev.map(c =>
|
||||
c.cell_id === cellId ? { ...c, text: value, status: 'edited' as const } : c
|
||||
))
|
||||
}
|
||||
|
||||
// Step-through: confirm current row (always cell-based)
|
||||
const confirmEntry = () => {
|
||||
const rowCells = getRowCells(activeIndex)
|
||||
const cellIds = new Set(rowCells.map(c => c.cell_id))
|
||||
setEditedCells(prev => prev.map(c =>
|
||||
cellIds.has(c.cell_id) ? { ...c, status: c.status === 'edited' ? 'edited' : 'confirmed' } : c
|
||||
))
|
||||
const maxIdx = getUniqueRowCount() - 1
|
||||
if (activeIndex < maxIdx) {
|
||||
setActiveIndex(activeIndex + 1)
|
||||
}
|
||||
}
|
||||
|
||||
// Step-through: skip current row
|
||||
const skipEntry = () => {
|
||||
const rowCells = getRowCells(activeIndex)
|
||||
const cellIds = new Set(rowCells.map(c => c.cell_id))
|
||||
setEditedCells(prev => prev.map(c =>
|
||||
cellIds.has(c.cell_id) ? { ...c, status: 'skipped' as const } : c
|
||||
))
|
||||
const maxIdx = getUniqueRowCount() - 1
|
||||
if (activeIndex < maxIdx) {
|
||||
setActiveIndex(activeIndex + 1)
|
||||
}
|
||||
}
|
||||
|
||||
// Helper: get unique row indices from cells
|
||||
const getUniqueRowCount = () => {
|
||||
if (!editedCells.length) return 0
|
||||
return new Set(editedCells.map(c => c.row_index)).size
|
||||
}
|
||||
|
||||
// Helper: get cells for a given row index (by position in sorted unique rows)
|
||||
const getRowCells = (rowPosition: number) => {
|
||||
const uniqueRows = [...new Set(editedCells.map(c => c.row_index))].sort((a, b) => a - b)
|
||||
const rowIdx = uniqueRows[rowPosition]
|
||||
return editedCells.filter(c => c.row_index === rowIdx)
|
||||
}
|
||||
|
||||
// Focus english input when active entry changes in labeling mode
|
||||
useEffect(() => {
|
||||
if (mode === 'labeling' && enRef.current) {
|
||||
enRef.current.focus()
|
||||
}
|
||||
}, [activeIndex, mode])
|
||||
|
||||
// Keyboard shortcuts in labeling mode
|
||||
useEffect(() => {
|
||||
if (mode !== 'labeling') return
|
||||
const handler = (e: KeyboardEvent) => {
|
||||
if (e.key === 'Enter' && !e.shiftKey) {
|
||||
e.preventDefault()
|
||||
confirmEntry()
|
||||
} else if (e.key === 'ArrowDown' && e.ctrlKey) {
|
||||
e.preventDefault()
|
||||
skipEntry()
|
||||
} else if (e.key === 'ArrowUp' && e.ctrlKey) {
|
||||
e.preventDefault()
|
||||
if (activeIndex > 0) setActiveIndex(activeIndex - 1)
|
||||
}
|
||||
}
|
||||
window.addEventListener('keydown', handler)
|
||||
return () => window.removeEventListener('keydown', handler)
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [mode, activeIndex, editedEntries, editedCells])
|
||||
|
||||
if (!sessionId) {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||
<div className="text-5xl mb-4">🔤</div>
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||
Schritt 5: Worterkennung
|
||||
</h3>
|
||||
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||
Bitte zuerst Schritte 1-4 abschliessen.
|
||||
</p>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const overlayUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/words-overlay`
|
||||
const dewarpedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
|
||||
|
||||
const confColor = (conf: number) => {
|
||||
if (conf >= 70) return 'text-green-600 dark:text-green-400'
|
||||
if (conf >= 50) return 'text-yellow-600 dark:text-yellow-400'
|
||||
return 'text-red-600 dark:text-red-400'
|
||||
}
|
||||
|
||||
const statusBadge = (status?: string) => {
|
||||
const map: Record<string, string> = {
|
||||
pending: 'bg-gray-100 dark:bg-gray-700 text-gray-500',
|
||||
confirmed: 'bg-green-100 dark:bg-green-900/30 text-green-700 dark:text-green-400',
|
||||
edited: 'bg-blue-100 dark:bg-blue-900/30 text-blue-700 dark:text-blue-400',
|
||||
skipped: 'bg-orange-100 dark:bg-orange-900/30 text-orange-700 dark:text-orange-400',
|
||||
}
|
||||
return map[status || 'pending'] || map.pending
|
||||
}
|
||||
|
||||
const summary = gridResult?.summary
|
||||
const columnsUsed = gridResult?.columns_used || []
|
||||
const gridShape = gridResult?.grid_shape
|
||||
|
||||
// Counts for labeling progress (always cell-based)
|
||||
const confirmedRowIds = new Set(
|
||||
editedCells.filter(c => c.status === 'confirmed' || c.status === 'edited').map(c => c.row_index)
|
||||
)
|
||||
const confirmedCount = confirmedRowIds.size
|
||||
const totalCount = getUniqueRowCount()
|
||||
|
||||
// Group cells by row for generic table display
|
||||
const cellsByRow: Map<number, GridCell[]> = new Map()
|
||||
for (const cell of editedCells) {
|
||||
const existing = cellsByRow.get(cell.row_index) || []
|
||||
existing.push(cell)
|
||||
cellsByRow.set(cell.row_index, existing)
|
||||
}
|
||||
const sortedRowIndices = [...cellsByRow.keys()].sort((a, b) => a - b)
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Loading with streaming progress */}
|
||||
{detecting && (
|
||||
<div className="space-y-1">
|
||||
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
|
||||
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
|
||||
{streamProgress
|
||||
? `Zelle ${streamProgress.current}/${streamProgress.total} erkannt...`
|
||||
: 'Worterkennung startet...'}
|
||||
</div>
|
||||
{streamProgress && streamProgress.total > 0 && (
|
||||
<div className="w-full bg-gray-200 dark:bg-gray-700 rounded-full h-1.5">
|
||||
<div
|
||||
className="bg-teal-500 h-1.5 rounded-full transition-all duration-150"
|
||||
style={{ width: `${(streamProgress.current / streamProgress.total) * 100}%` }}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Layout badge + Mode toggle */}
|
||||
{gridResult && (
|
||||
<div className="flex items-center gap-2">
|
||||
{/* Layout badge */}
|
||||
<span className={`px-2 py-0.5 rounded text-[10px] uppercase font-semibold ${
|
||||
isVocab
|
||||
? 'bg-indigo-100 dark:bg-indigo-900/30 text-indigo-700 dark:text-indigo-300'
|
||||
: 'bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-400'
|
||||
}`}>
|
||||
{isVocab ? 'Vokabel-Layout' : 'Generisch'}
|
||||
</span>
|
||||
|
||||
{gridShape && (
|
||||
<span className="text-[10px] text-gray-400">
|
||||
{gridShape.rows}×{gridShape.cols} = {gridShape.total_cells} Zellen
|
||||
</span>
|
||||
)}
|
||||
|
||||
<div className="flex-1" />
|
||||
|
||||
<button
|
||||
onClick={() => setMode('overview')}
|
||||
className={`px-3 py-1.5 text-xs rounded-lg font-medium transition-colors ${
|
||||
mode === 'overview'
|
||||
? 'bg-teal-600 text-white'
|
||||
: 'bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-300 hover:bg-gray-200 dark:hover:bg-gray-600'
|
||||
}`}
|
||||
>
|
||||
Uebersicht
|
||||
</button>
|
||||
<button
|
||||
onClick={() => setMode('labeling')}
|
||||
className={`px-3 py-1.5 text-xs rounded-lg font-medium transition-colors ${
|
||||
mode === 'labeling'
|
||||
? 'bg-teal-600 text-white'
|
||||
: 'bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-300 hover:bg-gray-200 dark:hover:bg-gray-600'
|
||||
}`}
|
||||
>
|
||||
Labeling ({confirmedCount}/{totalCount})
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Overview mode */}
|
||||
{mode === 'overview' && (
|
||||
<>
|
||||
{/* Images: overlay vs clean */}
|
||||
<div className="grid grid-cols-2 gap-4">
|
||||
<div>
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Mit Grid-Overlay
|
||||
</div>
|
||||
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900">
|
||||
{gridResult ? (
|
||||
// eslint-disable-next-line @next/next/no-img-element
|
||||
<img
|
||||
src={`${overlayUrl}?t=${Date.now()}`}
|
||||
alt="Wort-Overlay"
|
||||
className="w-full h-auto"
|
||||
/>
|
||||
) : (
|
||||
<div className="aspect-[3/4] flex items-center justify-center text-gray-400 text-sm">
|
||||
{detecting ? 'Erkenne Woerter...' : 'Keine Daten'}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
<div>
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Entzerrtes Bild
|
||||
</div>
|
||||
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900">
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
<img
|
||||
src={dewarpedUrl}
|
||||
alt="Entzerrt"
|
||||
className="w-full h-auto"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Result summary (only after streaming completes) */}
|
||||
{gridResult && summary && !detecting && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-3">
|
||||
<div className="flex items-center justify-between">
|
||||
<h4 className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||
Ergebnis: {summary.non_empty_cells}/{summary.total_cells} Zellen mit Text
|
||||
({sortedRowIndices.length} Zeilen, {columnsUsed.length} Spalten)
|
||||
</h4>
|
||||
<span className="text-xs text-gray-400">
|
||||
{gridResult.duration_seconds}s
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{/* Summary badges */}
|
||||
<div className="flex gap-2 flex-wrap">
|
||||
<span className="px-2 py-0.5 rounded text-xs font-medium bg-blue-100 dark:bg-blue-900/30 text-blue-700 dark:text-blue-300">
|
||||
Zellen: {summary.non_empty_cells}/{summary.total_cells}
|
||||
</span>
|
||||
{columnsUsed.map((col, i) => (
|
||||
<span key={i} className={`px-2 py-0.5 rounded text-xs font-medium bg-gray-100 dark:bg-gray-700 ${colTypeColor(col.type)}`}>
|
||||
C{col.index}: {colTypeLabel(col.type)}
|
||||
</span>
|
||||
))}
|
||||
{summary.low_confidence > 0 && (
|
||||
<span className="px-2 py-0.5 rounded text-xs font-medium bg-red-100 dark:bg-red-900/30 text-red-700 dark:text-red-300">
|
||||
Unsicher: {summary.low_confidence}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Entry/Cell table */}
|
||||
<div className="max-h-80 overflow-y-auto">
|
||||
{/* Unified dynamic table — columns driven by columns_used */}
|
||||
<table className="w-full text-xs">
|
||||
<thead className="sticky top-0 bg-white dark:bg-gray-800">
|
||||
<tr className="text-left text-gray-500 dark:text-gray-400 border-b dark:border-gray-700">
|
||||
<th className="py-1 pr-2 w-12">Zeile</th>
|
||||
{columnsUsed.map((col, i) => (
|
||||
<th key={i} className={`py-1 pr-2 ${colTypeColor(col.type)}`}>
|
||||
{colTypeLabel(col.type)}
|
||||
</th>
|
||||
))}
|
||||
<th className="py-1 w-12 text-right">Conf</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{sortedRowIndices.map((rowIdx, posIdx) => {
|
||||
const rowCells = cellsByRow.get(rowIdx) || []
|
||||
const avgConf = rowCells.length
|
||||
? Math.round(rowCells.reduce((s, c) => s + c.confidence, 0) / rowCells.length)
|
||||
: 0
|
||||
return (
|
||||
<tr
|
||||
key={rowIdx}
|
||||
className={`border-b dark:border-gray-700/50 ${
|
||||
posIdx === activeIndex ? 'bg-teal-50 dark:bg-teal-900/20' : ''
|
||||
}`}
|
||||
onClick={() => { setActiveIndex(posIdx); setMode('labeling') }}
|
||||
>
|
||||
<td className="py-1 pr-2 text-gray-400 font-mono text-[10px]">
|
||||
R{String(rowIdx).padStart(2, '0')}
|
||||
</td>
|
||||
{columnsUsed.map((col) => {
|
||||
const cell = rowCells.find(c => c.col_index === col.index)
|
||||
return (
|
||||
<td key={col.index} className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300 cursor-pointer">
|
||||
<MultilineText text={cell?.text || ''} />
|
||||
</td>
|
||||
)
|
||||
})}
|
||||
<td className={`py-1 text-right font-mono ${confColor(avgConf)}`}>
|
||||
{avgConf}%
|
||||
</td>
|
||||
</tr>
|
||||
)
|
||||
})}
|
||||
</tbody>
|
||||
</table>
|
||||
<div ref={tableEndRef} />
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Streaming cell table (shown while detecting, before complete) */}
|
||||
{detecting && editedCells.length > 0 && !gridResult?.summary?.non_empty_cells && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-3">
|
||||
<h4 className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||
Live: {editedCells.length} Zellen erkannt...
|
||||
</h4>
|
||||
<div className="max-h-80 overflow-y-auto">
|
||||
<table className="w-full text-xs">
|
||||
<thead className="sticky top-0 bg-white dark:bg-gray-800">
|
||||
<tr className="text-left text-gray-500 dark:text-gray-400 border-b dark:border-gray-700">
|
||||
<th className="py-1 pr-2 w-12">Zelle</th>
|
||||
{columnsUsed.map((col, i) => (
|
||||
<th key={i} className={`py-1 pr-2 ${colTypeColor(col.type)}`}>
|
||||
{colTypeLabel(col.type)}
|
||||
</th>
|
||||
))}
|
||||
<th className="py-1 w-12 text-right">Conf</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{(() => {
|
||||
const liveByRow: Map<number, GridCell[]> = new Map()
|
||||
for (const cell of editedCells) {
|
||||
const existing = liveByRow.get(cell.row_index) || []
|
||||
existing.push(cell)
|
||||
liveByRow.set(cell.row_index, existing)
|
||||
}
|
||||
const liveSorted = [...liveByRow.keys()].sort((a, b) => a - b)
|
||||
return liveSorted.map(rowIdx => {
|
||||
const rowCells = liveByRow.get(rowIdx) || []
|
||||
const avgConf = rowCells.length
|
||||
? Math.round(rowCells.reduce((s, c) => s + c.confidence, 0) / rowCells.length)
|
||||
: 0
|
||||
return (
|
||||
<tr key={rowIdx} className="border-b dark:border-gray-700/50 animate-fade-in">
|
||||
<td className="py-1 pr-2 text-gray-400 font-mono text-[10px]">
|
||||
R{String(rowIdx).padStart(2, '0')}
|
||||
</td>
|
||||
{columnsUsed.map((col) => {
|
||||
const cell = rowCells.find(c => c.col_index === col.index)
|
||||
return (
|
||||
<td key={col.index} className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300">
|
||||
<MultilineText text={cell?.text || ''} />
|
||||
</td>
|
||||
)
|
||||
})}
|
||||
<td className={`py-1 text-right font-mono ${confColor(avgConf)}`}>
|
||||
{avgConf}%
|
||||
</td>
|
||||
</tr>
|
||||
)
|
||||
})
|
||||
})()}
|
||||
</tbody>
|
||||
</table>
|
||||
<div ref={tableEndRef} />
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
|
||||
{/* Labeling mode */}
|
||||
{mode === 'labeling' && editedCells.length > 0 && (
|
||||
<div className="grid grid-cols-3 gap-4">
|
||||
{/* Left 2/3: Image with highlighted active row */}
|
||||
<div className="col-span-2">
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Zeile {activeIndex + 1} von {getUniqueRowCount()}
|
||||
</div>
|
||||
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900 relative">
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
<img
|
||||
src={`${overlayUrl}?t=${Date.now()}`}
|
||||
alt="Wort-Overlay"
|
||||
className="w-full h-auto"
|
||||
/>
|
||||
{/* Highlight overlay for active row */}
|
||||
{(() => {
|
||||
const rowCells = getRowCells(activeIndex)
|
||||
return rowCells.map(cell => (
|
||||
<div
|
||||
key={cell.cell_id}
|
||||
className="absolute border-2 border-yellow-400 bg-yellow-400/10 pointer-events-none"
|
||||
style={{
|
||||
left: `${cell.bbox_pct.x}%`,
|
||||
top: `${cell.bbox_pct.y}%`,
|
||||
width: `${cell.bbox_pct.w}%`,
|
||||
height: `${cell.bbox_pct.h}%`,
|
||||
}}
|
||||
/>
|
||||
))
|
||||
})()}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Right 1/3: Editable fields */}
|
||||
<div className="space-y-3">
|
||||
{/* Navigation */}
|
||||
<div className="flex items-center justify-between">
|
||||
<button
|
||||
onClick={() => setActiveIndex(Math.max(0, activeIndex - 1))}
|
||||
disabled={activeIndex === 0}
|
||||
className="px-2 py-1 text-xs border rounded hover:bg-gray-50 dark:hover:bg-gray-700 dark:border-gray-600 disabled:opacity-30"
|
||||
>
|
||||
Zurueck
|
||||
</button>
|
||||
<span className="text-xs text-gray-500">
|
||||
{activeIndex + 1} / {getUniqueRowCount()}
|
||||
</span>
|
||||
<button
|
||||
onClick={() => setActiveIndex(Math.min(
|
||||
getUniqueRowCount() - 1,
|
||||
activeIndex + 1
|
||||
))}
|
||||
disabled={activeIndex >= getUniqueRowCount() - 1}
|
||||
className="px-2 py-1 text-xs border rounded hover:bg-gray-50 dark:hover:bg-gray-700 dark:border-gray-600 disabled:opacity-30"
|
||||
>
|
||||
Weiter
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Status badge */}
|
||||
<div className="flex items-center gap-2">
|
||||
{(() => {
|
||||
const rowCells = getRowCells(activeIndex)
|
||||
const avgConf = rowCells.length
|
||||
? Math.round(rowCells.reduce((s, c) => s + c.confidence, 0) / rowCells.length)
|
||||
: 0
|
||||
return (
|
||||
<span className={`text-xs font-mono ${confColor(avgConf)}`}>
|
||||
{avgConf}% Konfidenz
|
||||
</span>
|
||||
)
|
||||
})()}
|
||||
</div>
|
||||
|
||||
{/* Editable fields — one per column, driven by columns_used */}
|
||||
<div className="space-y-2">
|
||||
{(() => {
|
||||
const rowCells = getRowCells(activeIndex)
|
||||
return columnsUsed.map((col, colIdx) => {
|
||||
const cell = rowCells.find(c => c.col_index === col.index)
|
||||
if (!cell) return null
|
||||
return (
|
||||
<div key={col.index}>
|
||||
<div className="flex items-center gap-1 mb-0.5">
|
||||
<label className={`text-[10px] font-medium ${colTypeColor(col.type)}`}>
|
||||
{colTypeLabel(col.type)}
|
||||
</label>
|
||||
<span className="text-[9px] text-gray-400">{cell.cell_id}</span>
|
||||
</div>
|
||||
{/* Cell crop */}
|
||||
<div className="border rounded dark:border-gray-700 overflow-hidden bg-white dark:bg-gray-900 h-10 relative mb-1">
|
||||
<CellCrop imageUrl={dewarpedUrl} bbox={cell.bbox_pct} />
|
||||
</div>
|
||||
<textarea
|
||||
ref={colIdx === 0 ? enRef as any : undefined}
|
||||
rows={Math.max(1, (cell.text || '').split('\n').length)}
|
||||
value={cell.text || ''}
|
||||
onChange={(e) => updateCell(cell.cell_id, e.target.value)}
|
||||
className="w-full px-2 py-1.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600 font-mono resize-none"
|
||||
/>
|
||||
</div>
|
||||
)
|
||||
})
|
||||
})()}
|
||||
</div>
|
||||
|
||||
{/* Action buttons */}
|
||||
<div className="flex gap-2">
|
||||
<button
|
||||
onClick={confirmEntry}
|
||||
className="flex-1 px-3 py-1.5 text-xs bg-green-600 text-white rounded-lg hover:bg-green-700 font-medium"
|
||||
>
|
||||
Bestaetigen (Enter)
|
||||
</button>
|
||||
<button
|
||||
onClick={skipEntry}
|
||||
className="px-3 py-1.5 text-xs border rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 dark:border-gray-600"
|
||||
>
|
||||
Skip
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Shortcuts hint */}
|
||||
<div className="text-[10px] text-gray-400 space-y-0.5">
|
||||
<div>Enter = Bestaetigen & weiter</div>
|
||||
<div>Ctrl+Down = Ueberspringen</div>
|
||||
<div>Ctrl+Up = Zurueck</div>
|
||||
</div>
|
||||
|
||||
{/* Row list (compact) */}
|
||||
<div className="border-t dark:border-gray-700 pt-2 mt-2">
|
||||
<div className="text-[10px] font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Alle Zeilen
|
||||
</div>
|
||||
<div className="max-h-48 overflow-y-auto space-y-0.5">
|
||||
{sortedRowIndices.map((rowIdx, posIdx) => {
|
||||
const rowCells = cellsByRow.get(rowIdx) || []
|
||||
const textParts = rowCells.filter(c => c.text).map(c => c.text.replace(/\n/g, ' '))
|
||||
return (
|
||||
<div
|
||||
key={rowIdx}
|
||||
onClick={() => setActiveIndex(posIdx)}
|
||||
className={`flex items-center gap-1 px-2 py-1 rounded text-[10px] cursor-pointer transition-colors ${
|
||||
posIdx === activeIndex
|
||||
? 'bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700'
|
||||
: 'hover:bg-gray-50 dark:hover:bg-gray-700/50'
|
||||
}`}
|
||||
>
|
||||
<span className="w-6 text-right text-gray-400 font-mono">R{String(rowIdx).padStart(2, '0')}</span>
|
||||
<span className="truncate text-gray-600 dark:text-gray-400 font-mono">
|
||||
{textParts.join(' \u2192 ') || '\u2014'}
|
||||
</span>
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Controls */}
|
||||
{gridResult && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-3">
|
||||
<div className="flex items-center gap-3 flex-wrap">
|
||||
{/* Grid method selector */}
|
||||
<select
|
||||
value={gridMethod}
|
||||
onChange={(e) => setGridMethod(e.target.value as 'v2' | 'words_first')}
|
||||
className="px-2 py-1.5 text-xs border rounded-lg dark:bg-gray-700 dark:border-gray-600"
|
||||
>
|
||||
<option value="v2">Standard (v2)</option>
|
||||
<option value="words_first">Words-First</option>
|
||||
</select>
|
||||
|
||||
{/* OCR Engine selector */}
|
||||
<select
|
||||
value={ocrEngine}
|
||||
onChange={(e) => setOcrEngine(e.target.value as 'auto' | 'tesseract' | 'rapid' | 'paddle')}
|
||||
className="px-2 py-1.5 text-xs border rounded-lg dark:bg-gray-700 dark:border-gray-600"
|
||||
>
|
||||
<option value="auto">Auto (RapidOCR wenn verfuegbar)</option>
|
||||
<option value="rapid">RapidOCR (ONNX)</option>
|
||||
<option value="tesseract">Tesseract</option>
|
||||
<option value="paddle">PP-OCRv5 (lokal)</option>
|
||||
</select>
|
||||
|
||||
{/* Pronunciation selector (only for vocab) */}
|
||||
{isVocab && (
|
||||
<select
|
||||
value={pronunciation}
|
||||
onChange={(e) => setPronunciation(e.target.value as 'british' | 'american')}
|
||||
className="px-2 py-1.5 text-xs border rounded-lg dark:bg-gray-700 dark:border-gray-600"
|
||||
>
|
||||
<option value="british">Britisch (RP)</option>
|
||||
<option value="american">Amerikanisch</option>
|
||||
</select>
|
||||
)}
|
||||
|
||||
<button
|
||||
onClick={() => runAutoDetection()}
|
||||
disabled={detecting}
|
||||
className="px-3 py-1.5 text-xs border rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 dark:border-gray-600 disabled:opacity-50"
|
||||
>
|
||||
Erneut erkennen
|
||||
</button>
|
||||
|
||||
{/* Show which engine was used */}
|
||||
{usedEngine && (
|
||||
<span className={`px-2 py-0.5 rounded text-[10px] uppercase font-semibold ${
|
||||
usedEngine === 'rapid' || usedEngine === 'paddle'
|
||||
? 'bg-purple-100 dark:bg-purple-900/30 text-purple-700 dark:text-purple-300'
|
||||
: 'bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-400'
|
||||
}`}>
|
||||
{usedEngine === 'paddle' ? 'pp-ocrv5' : usedEngine}
|
||||
</span>
|
||||
)}
|
||||
|
||||
<button
|
||||
onClick={() => goToStep(3)}
|
||||
className="px-3 py-1.5 text-xs border rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 dark:border-gray-600 text-orange-600 dark:text-orange-400 border-orange-300 dark:border-orange-700"
|
||||
>
|
||||
Zeilen korrigieren (Step 4)
|
||||
</button>
|
||||
|
||||
<div className="flex-1" />
|
||||
|
||||
{/* Ground truth */}
|
||||
{!gtSaved ? (
|
||||
<>
|
||||
<input
|
||||
type="text"
|
||||
placeholder="Notizen (optional)"
|
||||
value={gtNotes}
|
||||
onChange={(e) => setGtNotes(e.target.value)}
|
||||
className="px-2 py-1 text-xs border rounded dark:bg-gray-700 dark:border-gray-600 w-48"
|
||||
/>
|
||||
<button
|
||||
onClick={() => handleGroundTruth(true)}
|
||||
className="px-3 py-1.5 text-xs bg-green-600 text-white rounded-lg hover:bg-green-700"
|
||||
>
|
||||
Korrekt
|
||||
</button>
|
||||
<button
|
||||
onClick={() => handleGroundTruth(false)}
|
||||
className="px-3 py-1.5 text-xs bg-red-600 text-white rounded-lg hover:bg-red-700"
|
||||
>
|
||||
Fehlerhaft
|
||||
</button>
|
||||
</>
|
||||
) : (
|
||||
<span className="text-xs text-green-600 dark:text-green-400">
|
||||
Ground Truth gespeichert
|
||||
</span>
|
||||
)}
|
||||
|
||||
<button
|
||||
onClick={onNext}
|
||||
className="px-4 py-1.5 text-xs bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium"
|
||||
>
|
||||
Weiter
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{error && (
|
||||
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* CellCrop: Shows a cropped portion of the dewarped image based on percent bbox.
|
||||
* Uses CSS background-image + background-position for efficient cropping.
|
||||
*/
|
||||
function CellCrop({ imageUrl, bbox }: { imageUrl: string; bbox: { x: number; y: number; w: number; h: number } }) {
|
||||
// Scale factor: how much to zoom into the cell
|
||||
const scaleX = 100 / bbox.w
|
||||
const scaleY = 100 / bbox.h
|
||||
const scale = Math.min(scaleX, scaleY, 8) // Cap zoom at 8x
|
||||
|
||||
return (
|
||||
<div
|
||||
className="w-full h-full"
|
||||
style={{
|
||||
backgroundImage: `url(${imageUrl})`,
|
||||
backgroundSize: `${scale * 100}%`,
|
||||
backgroundPosition: `${-bbox.x * scale}% ${-bbox.y * scale}%`,
|
||||
backgroundRepeat: 'no-repeat',
|
||||
}}
|
||||
/>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,176 @@
|
||||
/**
|
||||
* Tests for useSlideWordPositions hook.
|
||||
*
|
||||
* The hook computes word positions from OCR word_boxes or pixel projection.
|
||||
* Since Canvas/Image are not available in jsdom, we test the pure computation
|
||||
* logic by extracting and verifying the WordPosition interface contract.
|
||||
*/
|
||||
import { describe, it, expect } from 'vitest'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// WordPosition interface (mirrored from useSlideWordPositions.ts)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface WordPosition {
|
||||
xPct: number
|
||||
wPct: number
|
||||
yPct: number
|
||||
hPct: number
|
||||
text: string
|
||||
fontRatio: number
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Pure computation functions extracted from the hook for testing
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Word-box path: compute WordPosition from an OCR word_box.
|
||||
* Replicates the word_boxes.map() logic in useSlideWordPositions.
|
||||
*/
|
||||
function wordBoxToPosition(
|
||||
box: { text: string; left: number; top: number; width: number; height: number },
|
||||
imgW: number,
|
||||
imgH: number,
|
||||
): WordPosition {
|
||||
return {
|
||||
xPct: (box.left / imgW) * 100,
|
||||
wPct: (box.width / imgW) * 100,
|
||||
yPct: (box.top / imgH) * 100,
|
||||
hPct: (box.height / imgH) * 100,
|
||||
text: box.text,
|
||||
fontRatio: 1.0,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fallback path (no word_boxes): spread tokens evenly across cell bbox.
|
||||
* Replicates the fallback logic in useSlideWordPositions.
|
||||
*/
|
||||
function fallbackPositions(
|
||||
tokens: string[],
|
||||
bboxPct: { x: number; y: number; w: number; h: number },
|
||||
): WordPosition[] {
|
||||
const fallbackW = bboxPct.w / tokens.length
|
||||
return tokens.map((t, i) => ({
|
||||
xPct: bboxPct.x + i * fallbackW,
|
||||
wPct: fallbackW,
|
||||
yPct: bboxPct.y,
|
||||
hPct: bboxPct.h,
|
||||
text: t,
|
||||
fontRatio: 1.0,
|
||||
}))
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('wordBoxToPosition (word-box path)', () => {
|
||||
it('should compute percentage positions from pixel coordinates', () => {
|
||||
const box = { text: 'hello', left: 100, top: 200, width: 80, height: 20 }
|
||||
const wp = wordBoxToPosition(box, 1000, 2000)
|
||||
|
||||
expect(wp.xPct).toBeCloseTo(10, 1) // 100/1000 * 100
|
||||
expect(wp.wPct).toBeCloseTo(8, 1) // 80/1000 * 100
|
||||
expect(wp.yPct).toBeCloseTo(10, 1) // 200/2000 * 100
|
||||
expect(wp.hPct).toBeCloseTo(1, 1) // 20/2000 * 100
|
||||
expect(wp.text).toBe('hello')
|
||||
expect(wp.fontRatio).toBe(1.0)
|
||||
})
|
||||
|
||||
it('should produce different yPct for words on different lines', () => {
|
||||
const imgW = 1000, imgH = 2000
|
||||
const word1 = wordBoxToPosition({ text: 'line1', left: 50, top: 100, width: 60, height: 20 }, imgW, imgH)
|
||||
const word2 = wordBoxToPosition({ text: 'line2', left: 50, top: 130, width: 60, height: 20 }, imgW, imgH)
|
||||
|
||||
expect(word1.yPct).not.toEqual(word2.yPct)
|
||||
expect(word2.yPct).toBeGreaterThan(word1.yPct)
|
||||
})
|
||||
|
||||
it('should handle word at origin', () => {
|
||||
const wp = wordBoxToPosition({ text: 'a', left: 0, top: 0, width: 50, height: 25 }, 500, 500)
|
||||
expect(wp.xPct).toBe(0)
|
||||
expect(wp.yPct).toBe(0)
|
||||
expect(wp.wPct).toBeCloseTo(10, 1)
|
||||
expect(wp.hPct).toBeCloseTo(5, 1)
|
||||
})
|
||||
|
||||
it('should handle word at bottom-right corner', () => {
|
||||
const wp = wordBoxToPosition({ text: 'z', left: 900, top: 1900, width: 100, height: 100 }, 1000, 2000)
|
||||
expect(wp.xPct).toBe(90)
|
||||
expect(wp.yPct).toBe(95)
|
||||
expect(wp.wPct).toBe(10)
|
||||
expect(wp.hPct).toBe(5)
|
||||
})
|
||||
})
|
||||
|
||||
|
||||
describe('fallbackPositions (no word_boxes)', () => {
|
||||
it('should spread tokens evenly across cell width', () => {
|
||||
const bbox = { x: 10, y: 20, w: 60, h: 5 }
|
||||
const positions = fallbackPositions(['apple', 'Apfel'], bbox)
|
||||
|
||||
expect(positions.length).toBe(2)
|
||||
expect(positions[0].xPct).toBeCloseTo(10, 1)
|
||||
expect(positions[1].xPct).toBeCloseTo(40, 1) // 10 + 30
|
||||
expect(positions[0].wPct).toBeCloseTo(30, 1)
|
||||
expect(positions[1].wPct).toBeCloseTo(30, 1)
|
||||
})
|
||||
|
||||
it('should use cell bbox for Y position (all words same Y)', () => {
|
||||
const bbox = { x: 5, y: 30, w: 80, h: 4 }
|
||||
const positions = fallbackPositions(['a', 'b', 'c'], bbox)
|
||||
|
||||
for (const wp of positions) {
|
||||
expect(wp.yPct).toBe(30)
|
||||
expect(wp.hPct).toBe(4)
|
||||
}
|
||||
})
|
||||
|
||||
it('should handle single token', () => {
|
||||
const bbox = { x: 15, y: 25, w: 50, h: 6 }
|
||||
const positions = fallbackPositions(['word'], bbox)
|
||||
|
||||
expect(positions.length).toBe(1)
|
||||
expect(positions[0].xPct).toBe(15)
|
||||
expect(positions[0].wPct).toBe(50)
|
||||
expect(positions[0].yPct).toBe(25)
|
||||
expect(positions[0].hPct).toBe(6)
|
||||
})
|
||||
})
|
||||
|
||||
|
||||
describe('WordPosition yPct/hPct contract', () => {
|
||||
it('word-box path: yPct comes from box.top, not cell bbox', () => {
|
||||
// This is the key fix: multi-line cells should NOT stack words at cell center
|
||||
const cellBbox = { x: 10, y: 20, w: 60, h: 10 } // cell spans y=20% to y=30%
|
||||
const imgW = 1000, imgH = 1000
|
||||
|
||||
// Two words on different lines within the same cell
|
||||
const word1 = wordBoxToPosition({ text: 'line1', left: 100, top: 200, width: 80, height: 20 }, imgW, imgH)
|
||||
const word2 = wordBoxToPosition({ text: 'line2', left: 100, top: 260, width: 80, height: 20 }, imgW, imgH)
|
||||
|
||||
// word1 should be at y=20%, word2 at y=26% — NOT both at cellBbox.y (20%)
|
||||
expect(word1.yPct).toBeCloseTo(20, 1)
|
||||
expect(word2.yPct).toBeCloseTo(26, 1)
|
||||
expect(word1.yPct).not.toEqual(word2.yPct)
|
||||
|
||||
// Both should have individual heights from their box, not cell height
|
||||
expect(word1.hPct).toBeCloseTo(2, 1)
|
||||
expect(word2.hPct).toBeCloseTo(2, 1)
|
||||
// Cell height would be 10% — word height is 2%, confirming per-word sizing
|
||||
expect(word1.hPct).toBeLessThan(cellBbox.h)
|
||||
})
|
||||
|
||||
it('fallback path: yPct equals cell bbox.y (no per-word data)', () => {
|
||||
const bbox = { x: 10, y: 45, w: 30, h: 8 }
|
||||
const positions = fallbackPositions(['a', 'b'], bbox)
|
||||
|
||||
// Without word_boxes, all words use cell bbox Y — expected behavior
|
||||
expect(positions[0].yPct).toBe(bbox.y)
|
||||
expect(positions[1].yPct).toBe(bbox.y)
|
||||
expect(positions[0].hPct).toBe(bbox.h)
|
||||
expect(positions[1].hPct).toBe(bbox.h)
|
||||
})
|
||||
})
|
||||
198
admin-lehrer/components/ocr-pipeline/usePixelWordPositions.ts
Normal file
198
admin-lehrer/components/ocr-pipeline/usePixelWordPositions.ts
Normal file
@@ -0,0 +1,198 @@
|
||||
import { useEffect, useState } from 'react'
|
||||
import type { GridCell } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
export interface WordPosition {
|
||||
xPct: number
|
||||
wPct: number
|
||||
text: string
|
||||
fontRatio: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Shared hook: analyse dark-pixel clusters on an image to determine
|
||||
* the exact horizontal position & auto-font-size of word groups in each cell.
|
||||
*
|
||||
* When rotation=180, the image is rotated 180° before pixel analysis.
|
||||
* Cell coordinates are transformed to the rotated space for reading,
|
||||
* and cluster positions are mirrored back to the original coordinate system.
|
||||
*
|
||||
* Returns a Map<cell_id, WordPosition[]>.
|
||||
*/
|
||||
export function usePixelWordPositions(
|
||||
imageUrl: string,
|
||||
cells: GridCell[],
|
||||
active: boolean,
|
||||
rotation: 0 | 180 = 0,
|
||||
): Map<string, WordPosition[]> {
|
||||
const [cellWordPositions, setCellWordPositions] = useState<Map<string, WordPosition[]>>(new Map())
|
||||
|
||||
useEffect(() => {
|
||||
if (!active || cells.length === 0 || !imageUrl) return
|
||||
|
||||
const img = new Image()
|
||||
img.crossOrigin = 'anonymous'
|
||||
img.onload = () => {
|
||||
const imgW = img.naturalWidth
|
||||
const imgH = img.naturalHeight
|
||||
|
||||
const canvas = document.createElement('canvas')
|
||||
canvas.width = imgW
|
||||
canvas.height = imgH
|
||||
const ctx = canvas.getContext('2d')
|
||||
if (!ctx) return
|
||||
|
||||
if (rotation === 180) {
|
||||
// Draw image rotated 180°
|
||||
ctx.translate(imgW, imgH)
|
||||
ctx.rotate(Math.PI)
|
||||
ctx.drawImage(img, 0, 0)
|
||||
ctx.setTransform(1, 0, 0, 1, 0, 0) // reset transform for measureText
|
||||
} else {
|
||||
ctx.drawImage(img, 0, 0)
|
||||
}
|
||||
|
||||
const refFontSize = 40
|
||||
const fontFam = "'Liberation Sans', Arial, sans-serif"
|
||||
ctx.font = `${refFontSize}px ${fontFam}`
|
||||
|
||||
const positions = new Map<string, WordPosition[]>()
|
||||
|
||||
for (const cell of cells) {
|
||||
if (!cell.bbox_pct || !cell.text) continue
|
||||
|
||||
// Split by 3+ whitespace into word-groups
|
||||
const groups = cell.text.split(/\s{3,}/).map(s => s.trim()).filter(Boolean)
|
||||
|
||||
// Cell pixel region — when rotated 180°, transform coordinates
|
||||
let cx: number, cy: number
|
||||
const cw = Math.round(cell.bbox_pct.w / 100 * imgW)
|
||||
const ch = Math.round(cell.bbox_pct.h / 100 * imgH)
|
||||
|
||||
if (rotation === 180) {
|
||||
// In rotated image: (x,y) maps to (W-x-w, H-y-h)
|
||||
cx = Math.round((100 - cell.bbox_pct.x - cell.bbox_pct.w) / 100 * imgW)
|
||||
cy = Math.round((100 - cell.bbox_pct.y - cell.bbox_pct.h) / 100 * imgH)
|
||||
} else {
|
||||
cx = Math.round(cell.bbox_pct.x / 100 * imgW)
|
||||
cy = Math.round(cell.bbox_pct.y / 100 * imgH)
|
||||
}
|
||||
if (cw <= 0 || ch <= 0) continue
|
||||
// Clamp to image bounds
|
||||
if (cx < 0) cx = 0
|
||||
if (cy < 0) cy = 0
|
||||
if (cx + cw > imgW || cy + ch > imgH) continue
|
||||
|
||||
const imageData = ctx.getImageData(cx, cy, cw, ch)
|
||||
|
||||
// Vertical projection: count dark pixels per column
|
||||
const proj = new Float32Array(cw)
|
||||
for (let y = 0; y < ch; y++) {
|
||||
for (let x = 0; x < cw; x++) {
|
||||
const idx = (y * cw + x) * 4
|
||||
const lum = 0.299 * imageData.data[idx] + 0.587 * imageData.data[idx + 1] + 0.114 * imageData.data[idx + 2]
|
||||
if (lum < 128) proj[x]++
|
||||
}
|
||||
}
|
||||
|
||||
// Find dark-pixel clusters (word groups on the image)
|
||||
const threshold = Math.max(1, ch * 0.03)
|
||||
const minGap = Math.max(5, Math.round(cw * 0.02))
|
||||
let clusters: { start: number; end: number }[] = []
|
||||
let inCluster = false
|
||||
let clStart = 0
|
||||
let gap = 0
|
||||
|
||||
for (let x = 0; x < cw; x++) {
|
||||
if (proj[x] >= threshold) {
|
||||
if (!inCluster) { clStart = x; inCluster = true }
|
||||
gap = 0
|
||||
} else if (inCluster) {
|
||||
gap++
|
||||
if (gap > minGap) {
|
||||
clusters.push({ start: clStart, end: x - gap })
|
||||
inCluster = false
|
||||
gap = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
if (inCluster) clusters.push({ start: clStart, end: cw - 1 - gap })
|
||||
|
||||
if (clusters.length === 0) continue
|
||||
|
||||
// When rotated 180°, mirror clusters back to original coordinate system
|
||||
// A cluster at (start, end) in rotated space = (cw-1-end, cw-1-start) in original
|
||||
if (rotation === 180) {
|
||||
clusters = clusters.map(c => ({
|
||||
start: cw - 1 - c.end,
|
||||
end: cw - 1 - c.start,
|
||||
})).reverse() // reverse to restore left-to-right order in original space
|
||||
}
|
||||
|
||||
const wordPos: WordPosition[] = []
|
||||
|
||||
if (groups.length <= 1) {
|
||||
// Single group: position at first cluster, merge all clusters for width
|
||||
const firstCl = clusters[0]
|
||||
const lastCl = clusters[clusters.length - 1]
|
||||
const clusterW = lastCl.end - firstCl.start + 1
|
||||
const measured = ctx.measureText(cell.text.trim())
|
||||
const autoFontPx = refFontSize * (clusterW / measured.width)
|
||||
const fontRatio = Math.min(autoFontPx / ch, 1.0)
|
||||
wordPos.push({
|
||||
xPct: cell.bbox_pct.x + (firstCl.start / cw) * cell.bbox_pct.w,
|
||||
wPct: ((lastCl.end - firstCl.start + 1) / cw) * cell.bbox_pct.w,
|
||||
text: cell.text.trim(),
|
||||
fontRatio,
|
||||
})
|
||||
} else if (clusters.length >= groups.length) {
|
||||
// Multiple groups: match to clusters left-to-right
|
||||
for (let i = 0; i < groups.length; i++) {
|
||||
const cl = clusters[i]
|
||||
const clusterW = cl.end - cl.start + 1
|
||||
const measured = ctx.measureText(groups[i])
|
||||
const autoFontPx = refFontSize * (clusterW / measured.width)
|
||||
const fontRatio = Math.min(autoFontPx / ch, 1.0)
|
||||
wordPos.push({
|
||||
xPct: cell.bbox_pct.x + (cl.start / cw) * cell.bbox_pct.w,
|
||||
wPct: ((cl.end - cl.start + 1) / cw) * cell.bbox_pct.w,
|
||||
text: groups[i],
|
||||
fontRatio,
|
||||
})
|
||||
}
|
||||
} else {
|
||||
continue // fewer clusters than groups — skip
|
||||
}
|
||||
|
||||
positions.set(cell.cell_id, wordPos)
|
||||
}
|
||||
|
||||
// Normalise: find the most common fontRatio (mode) and apply it to all
|
||||
const allRatios: number[] = []
|
||||
for (const wps of positions.values()) {
|
||||
for (const wp of wps) allRatios.push(wp.fontRatio)
|
||||
}
|
||||
if (allRatios.length > 0) {
|
||||
// Bucket ratios to 2 decimal places, find mode
|
||||
const buckets = new Map<number, number>()
|
||||
for (const r of allRatios) {
|
||||
const key = Math.round(r * 50) / 50 // round to nearest 0.02
|
||||
buckets.set(key, (buckets.get(key) || 0) + 1)
|
||||
}
|
||||
let modeRatio = allRatios[0]
|
||||
let modeCount = 0
|
||||
for (const [ratio, count] of buckets) {
|
||||
if (count > modeCount) { modeRatio = ratio; modeCount = count }
|
||||
}
|
||||
// Apply mode to all word positions
|
||||
for (const wps of positions.values()) {
|
||||
for (const wp of wps) wp.fontRatio = modeRatio
|
||||
}
|
||||
}
|
||||
|
||||
setCellWordPositions(positions)
|
||||
}
|
||||
img.src = imageUrl
|
||||
}, [active, cells, imageUrl, rotation])
|
||||
|
||||
return cellWordPositions
|
||||
}
|
||||
@@ -234,28 +234,6 @@ export const MODULE_REGISTRY: BackendModule[] = [
|
||||
},
|
||||
priority: 'high'
|
||||
},
|
||||
{
|
||||
id: 'llm-compare',
|
||||
name: 'LLM Vergleich',
|
||||
description: 'Vergleich verschiedener KI-Modelle und Provider',
|
||||
category: 'ai',
|
||||
backend: {
|
||||
service: 'python-backend',
|
||||
port: 8000,
|
||||
basePath: '/api/llm',
|
||||
endpoints: [
|
||||
{ path: '/providers', method: 'GET', description: 'Verfuegbare Provider' },
|
||||
{ path: '/compare', method: 'POST', description: 'Modelle vergleichen' },
|
||||
{ path: '/benchmark', method: 'POST', description: 'Benchmark ausfuehren' },
|
||||
]
|
||||
},
|
||||
frontend: {
|
||||
adminV2Page: '/ai/llm-compare',
|
||||
oldAdminPage: '/admin/llm-compare',
|
||||
status: 'connected'
|
||||
},
|
||||
priority: 'medium'
|
||||
},
|
||||
{
|
||||
id: 'magic-help',
|
||||
name: 'Magic Help (TrOCR)',
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
* All DSGVO and Compliance modules are now consolidated under the SDK.
|
||||
*/
|
||||
|
||||
export type CategoryId = 'compliance-sdk' | 'ai' | 'education' | 'website' | 'sdk-docs'
|
||||
export type CategoryId = 'communication' | 'ai' | 'education' | 'website' | 'sdk-docs'
|
||||
|
||||
export interface NavModule {
|
||||
id: string
|
||||
@@ -31,23 +31,47 @@ export interface NavCategory {
|
||||
|
||||
export const navigation: NavCategory[] = [
|
||||
// =========================================================================
|
||||
// Compliance SDK - Alle Datenschutz-, Compliance- und SDK-Module
|
||||
// Kommunikation — Video, Voice, Alerts
|
||||
// =========================================================================
|
||||
{
|
||||
id: 'compliance-sdk',
|
||||
name: 'Compliance SDK',
|
||||
icon: 'shield',
|
||||
color: '#8b5cf6', // Violet-500
|
||||
colorClass: 'compliance-sdk',
|
||||
description: 'DSGVO, Audit, GRC & SDK-Werkzeuge',
|
||||
id: 'communication',
|
||||
name: 'Kommunikation',
|
||||
icon: 'mail',
|
||||
color: '#f59e0b', // Amber-500
|
||||
colorClass: 'communication',
|
||||
description: 'Video & Chat, Voice Service, E-Mail, Alerts',
|
||||
modules: [
|
||||
{
|
||||
id: 'catalog-manager',
|
||||
name: 'Katalogverwaltung',
|
||||
href: '/dashboard/catalog-manager',
|
||||
description: 'SDK-Kataloge & Auswahltabellen',
|
||||
purpose: 'Zentrale Verwaltung aller Dropdown- und Auswahltabellen im SDK. Systemkataloge (Risiken, Massnahmen, Vorlagen) anzeigen und benutzerdefinierte Eintraege ergaenzen, bearbeiten und loeschen.',
|
||||
audience: ['DSB', 'Compliance Officer', 'Administratoren'],
|
||||
id: 'mail',
|
||||
name: 'Unified Inbox',
|
||||
href: '/communication/mail',
|
||||
description: 'E-Mail-Konten & KI-Analyse',
|
||||
purpose: 'E-Mail-Konten verwalten und KI-Kategorisierung nutzen. IMAP/SMTP Konfiguration, Vorlagen und Audit-Log.',
|
||||
audience: ['Support', 'Admins'],
|
||||
},
|
||||
{
|
||||
id: 'video-chat',
|
||||
name: 'Video & Chat',
|
||||
href: '/communication/video-chat',
|
||||
description: 'Matrix & Jitsi Monitoring',
|
||||
purpose: 'Dashboard fuer Matrix Synapse und Jitsi Meet. Service-Status, aktive Meetings, Traffic-Analyse und Ressourcen-Empfehlungen.',
|
||||
audience: ['Admins', 'DevOps'],
|
||||
},
|
||||
{
|
||||
id: 'voice-service',
|
||||
name: 'Voice Service',
|
||||
href: '/communication/matrix',
|
||||
description: 'PersonaPlex-7B & TaskOrchestrator',
|
||||
purpose: 'Voice-First Interface Konfiguration und Architektur-Dokumentation. Live Demo, Task States, Intents und DSGVO-Informationen.',
|
||||
audience: ['Entwickler', 'Admins'],
|
||||
},
|
||||
{
|
||||
id: 'alerts',
|
||||
name: 'Alerts Monitoring',
|
||||
href: '/communication/alerts',
|
||||
description: 'Google Alerts & Feed-Ueberwachung',
|
||||
purpose: 'Google Alerts und RSS-Feeds fuer relevante Neuigkeiten ueberwachen. Topics, Regeln, Relevanz-Profil und Digest-Generierung.',
|
||||
audience: ['Marketing', 'Admins'],
|
||||
},
|
||||
],
|
||||
},
|
||||
@@ -108,16 +132,6 @@ export const navigation: NavCategory[] = [
|
||||
// -----------------------------------------------------------------------
|
||||
// KI-Werkzeuge: Standalone-Tools fuer Entwicklung & QA
|
||||
// -----------------------------------------------------------------------
|
||||
{
|
||||
id: 'llm-compare',
|
||||
name: 'LLM Vergleich',
|
||||
href: '/ai/llm-compare',
|
||||
description: 'KI-Provider Vergleich',
|
||||
purpose: 'Vergleichen Sie verschiedene LLM-Anbieter (Ollama, OpenAI, Anthropic) hinsichtlich Qualitaet, Geschwindigkeit und Kosten. Standalone-Werkzeug fuer Modell-Evaluation.',
|
||||
audience: ['Entwickler', 'Data Scientists'],
|
||||
oldAdminPath: '/admin/llm-compare',
|
||||
subgroup: 'KI-Werkzeuge',
|
||||
},
|
||||
{
|
||||
id: 'ocr-compare',
|
||||
name: 'OCR Vergleich',
|
||||
@@ -127,6 +141,24 @@ export const navigation: NavCategory[] = [
|
||||
audience: ['Entwickler', 'Data Scientists', 'Lehrer'],
|
||||
subgroup: 'KI-Werkzeuge',
|
||||
},
|
||||
{
|
||||
id: 'ocr-pipeline',
|
||||
name: 'OCR Pipeline',
|
||||
href: '/ai/ocr-pipeline',
|
||||
description: 'Schrittweise Seitenrekonstruktion',
|
||||
purpose: 'Schrittweise Seitenrekonstruktion: Scan begradigen, Spalten erkennen, Woerter lokalisieren und die Seite Wort fuer Wort nachbauen. 6-Schritt-Pipeline mit Ground Truth Validierung.',
|
||||
audience: ['Entwickler', 'Data Scientists'],
|
||||
subgroup: 'KI-Werkzeuge',
|
||||
},
|
||||
{
|
||||
id: 'ocr-overlay',
|
||||
name: 'OCR Overlay',
|
||||
href: '/ai/ocr-overlay',
|
||||
description: 'Ganzseitige Overlay-Rekonstruktion',
|
||||
purpose: 'Arbeitsblatt ohne Spaltenerkennung direkt als Overlay rekonstruieren. Vereinfachte 7-Schritt-Pipeline.',
|
||||
audience: ['Entwickler'],
|
||||
subgroup: 'KI-Werkzeuge',
|
||||
},
|
||||
{
|
||||
id: 'test-quality',
|
||||
name: 'Test Quality (BQAS)',
|
||||
|
||||
@@ -23,7 +23,7 @@ export const roles: Role[] = [
|
||||
name: 'Entwickler',
|
||||
description: 'Voller Zugriff auf alle Bereiche',
|
||||
icon: 'code',
|
||||
visibleCategories: ['compliance-sdk', 'ai', 'education', 'website'],
|
||||
visibleCategories: ['communication', 'ai', 'education', 'website'],
|
||||
color: 'bg-primary-100 border-primary-300 text-primary-700',
|
||||
},
|
||||
{
|
||||
@@ -31,7 +31,7 @@ export const roles: Role[] = [
|
||||
name: 'Manager',
|
||||
description: 'Executive Uebersicht',
|
||||
icon: 'chart',
|
||||
visibleCategories: ['compliance-sdk', 'website'],
|
||||
visibleCategories: ['communication', 'website'],
|
||||
color: 'bg-blue-100 border-blue-300 text-blue-700',
|
||||
},
|
||||
{
|
||||
@@ -39,7 +39,7 @@ export const roles: Role[] = [
|
||||
name: 'Auditor',
|
||||
description: 'Compliance Pruefung',
|
||||
icon: 'clipboard',
|
||||
visibleCategories: ['compliance-sdk'],
|
||||
visibleCategories: ['communication'],
|
||||
color: 'bg-amber-100 border-amber-300 text-amber-700',
|
||||
},
|
||||
{
|
||||
@@ -47,7 +47,7 @@ export const roles: Role[] = [
|
||||
name: 'DSB',
|
||||
description: 'Datenschutzbeauftragter',
|
||||
icon: 'shield',
|
||||
visibleCategories: ['compliance-sdk'],
|
||||
visibleCategories: ['communication'],
|
||||
color: 'bg-purple-100 border-purple-300 text-purple-700',
|
||||
},
|
||||
]
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
const nextConfig = {
|
||||
output: 'standalone',
|
||||
reactStrictMode: true,
|
||||
// Force unique build ID to bust browser caches on each deploy
|
||||
generateBuildId: () => `build-${Date.now()}`,
|
||||
// TODO: Remove after fixing type incompatibilities from restore
|
||||
typescript: {
|
||||
ignoreBuildErrors: true,
|
||||
|
||||
463
admin-lehrer/package-lock.json
generated
463
admin-lehrer/package-lock.json
generated
@@ -8,6 +8,7 @@
|
||||
"name": "breakpilot-admin-v2",
|
||||
"version": "1.0.0",
|
||||
"dependencies": {
|
||||
"bpmn-js": "^18.0.1",
|
||||
"jspdf": "^4.1.0",
|
||||
"jszip": "^3.10.1",
|
||||
"lucide-react": "^0.468.0",
|
||||
@@ -15,6 +16,7 @@
|
||||
"react": "^18.3.1",
|
||||
"react-dom": "^18.3.1",
|
||||
"reactflow": "^11.11.4",
|
||||
"recharts": "^2.15.0",
|
||||
"uuid": "^13.0.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
@@ -428,6 +430,16 @@
|
||||
"node": ">=6.9.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@bpmn-io/diagram-js-ui": {
|
||||
"version": "0.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@bpmn-io/diagram-js-ui/-/diagram-js-ui-0.2.3.tgz",
|
||||
"integrity": "sha512-OGyjZKvGK8tHSZ0l7RfeKhilGoOGtFDcoqSGYkX0uhFlo99OVZ9Jn1K7TJGzcE9BdKwvA5Y5kGqHEhdTxHvFfw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"htm": "^3.1.1",
|
||||
"preact": "^10.11.2"
|
||||
}
|
||||
},
|
||||
"node_modules/@csstools/color-helpers": {
|
||||
"version": "5.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@csstools/color-helpers/-/color-helpers-5.1.0.tgz",
|
||||
@@ -2996,6 +3008,39 @@
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/bpmn-js": {
|
||||
"version": "18.12.0",
|
||||
"resolved": "https://registry.npmjs.org/bpmn-js/-/bpmn-js-18.12.0.tgz",
|
||||
"integrity": "sha512-Dg2O+r7jpBwLgWGpManc7P4ZfZQfxTVi2xNtXR3Q2G5Hx1RVYVFoNsQED8+FPCgjy6m7ZQbxKP1sjCJt5rbtBg==",
|
||||
"license": "SEE LICENSE IN LICENSE",
|
||||
"dependencies": {
|
||||
"bpmn-moddle": "^10.0.0",
|
||||
"diagram-js": "^15.9.0",
|
||||
"diagram-js-direct-editing": "^3.3.0",
|
||||
"ids": "^3.0.0",
|
||||
"inherits-browser": "^0.1.0",
|
||||
"min-dash": "^5.0.0",
|
||||
"min-dom": "^5.2.0",
|
||||
"tiny-svg": "^4.1.4"
|
||||
},
|
||||
"engines": {
|
||||
"node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/bpmn-moddle": {
|
||||
"version": "10.0.0",
|
||||
"resolved": "https://registry.npmjs.org/bpmn-moddle/-/bpmn-moddle-10.0.0.tgz",
|
||||
"integrity": "sha512-vXePD5jkatcILmM3zwJG/m6IIHIghTGB7WvgcdEraEw8E8VdJHrTgrvBUhbzqaXJpnsGQz15QS936xeBY6l9aA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"min-dash": "^5.0.0",
|
||||
"moddle": "^8.0.0",
|
||||
"moddle-xml": "^12.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 20.12"
|
||||
}
|
||||
},
|
||||
"node_modules/braces": {
|
||||
"version": "3.0.3",
|
||||
"resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz",
|
||||
@@ -3153,6 +3198,15 @@
|
||||
"integrity": "sha512-IV3Ou0jSMzZrd3pZ48nLkT9DA7Ag1pnPzaiQhpW7c3RbcqqzvzzVu+L8gfqMp/8IM2MQtSiqaCxrrcfu8I8rMA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/clsx": {
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/clsx/-/clsx-2.1.1.tgz",
|
||||
"integrity": "sha512-eYm0QWBtUrBWZWG0d386OGAw16Z995PiOVo2B7bjWSbHedGl5e0ZWaq65kOGgUSNesEIDkB9ISbTg/JK9dhCZA==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=6"
|
||||
}
|
||||
},
|
||||
"node_modules/commander": {
|
||||
"version": "4.1.1",
|
||||
"resolved": "https://registry.npmjs.org/commander/-/commander-4.1.1.tgz",
|
||||
@@ -3262,9 +3316,20 @@
|
||||
"version": "3.2.3",
|
||||
"resolved": "https://registry.npmjs.org/csstype/-/csstype-3.2.3.tgz",
|
||||
"integrity": "sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ==",
|
||||
"devOptional": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/d3-array": {
|
||||
"version": "3.2.4",
|
||||
"resolved": "https://registry.npmjs.org/d3-array/-/d3-array-3.2.4.tgz",
|
||||
"integrity": "sha512-tdQAmyA18i4J7wprpYq8ClcxZy3SC31QMeByyCFyRt7BVHdREQZ5lpzoe5mFEYZUWe+oq8HBvk9JjpibyEV4Jg==",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"internmap": "1 - 2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/d3-color": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/d3-color/-/d3-color-3.1.0.tgz",
|
||||
@@ -3305,6 +3370,15 @@
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/d3-format": {
|
||||
"version": "3.1.2",
|
||||
"resolved": "https://registry.npmjs.org/d3-format/-/d3-format-3.1.2.tgz",
|
||||
"integrity": "sha512-AJDdYOdnyRDV5b6ArilzCPPwc1ejkHcoyFarqlPqT7zRYjhavcT3uSrqcMvsgh2CgoPbK3RCwyHaVyxYcP2Arg==",
|
||||
"license": "ISC",
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/d3-interpolate": {
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://registry.npmjs.org/d3-interpolate/-/d3-interpolate-3.0.1.tgz",
|
||||
@@ -3317,6 +3391,31 @@
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/d3-path": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/d3-path/-/d3-path-3.1.0.tgz",
|
||||
"integrity": "sha512-p3KP5HCf/bvjBSSKuXid6Zqijx7wIfNW+J/maPs+iwR35at5JCbLUT0LzF1cnjbCHWhqzQTIN2Jpe8pRebIEFQ==",
|
||||
"license": "ISC",
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/d3-scale": {
|
||||
"version": "4.0.2",
|
||||
"resolved": "https://registry.npmjs.org/d3-scale/-/d3-scale-4.0.2.tgz",
|
||||
"integrity": "sha512-GZW464g1SH7ag3Y7hXjf8RoUuAFIqklOAq3MRl4OaWabTFJY9PN/E1YklhXLh+OQ3fM9yS2nOkCoS+WLZ6kvxQ==",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"d3-array": "2.10.0 - 3",
|
||||
"d3-format": "1 - 3",
|
||||
"d3-interpolate": "1.2.0 - 3",
|
||||
"d3-time": "2.1.1 - 3",
|
||||
"d3-time-format": "2 - 4"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/d3-selection": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz",
|
||||
@@ -3326,6 +3425,42 @@
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/d3-shape": {
|
||||
"version": "3.2.0",
|
||||
"resolved": "https://registry.npmjs.org/d3-shape/-/d3-shape-3.2.0.tgz",
|
||||
"integrity": "sha512-SaLBuwGm3MOViRq2ABk3eLoxwZELpH6zhl3FbAoJ7Vm1gofKx6El1Ib5z23NUEhF9AsGl7y+dzLe5Cw2AArGTA==",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"d3-path": "^3.1.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/d3-time": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/d3-time/-/d3-time-3.1.0.tgz",
|
||||
"integrity": "sha512-VqKjzBLejbSMT4IgbmVgDjpkYrNWUYJnbCGo874u7MMKIWsILRX+OpX/gTk8MqjpT1A/c6HY2dCA77ZN0lkQ2Q==",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"d3-array": "2 - 3"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/d3-time-format": {
|
||||
"version": "4.1.0",
|
||||
"resolved": "https://registry.npmjs.org/d3-time-format/-/d3-time-format-4.1.0.tgz",
|
||||
"integrity": "sha512-dJxPBlzC7NugB2PDLwo9Q8JiTR3M3e4/XANkreKSUxF8vvXKqm1Yfq4Q5dl8budlunRVlUUaDUgFt7eA8D6NLg==",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"d3-time": "1 - 3"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/d3-timer": {
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://registry.npmjs.org/d3-timer/-/d3-timer-3.0.1.tgz",
|
||||
@@ -3409,6 +3544,12 @@
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/decimal.js-light": {
|
||||
"version": "2.5.1",
|
||||
"resolved": "https://registry.npmjs.org/decimal.js-light/-/decimal.js-light-2.5.1.tgz",
|
||||
"integrity": "sha512-qIMFpTMZmny+MMIitAB6D7iVPEorVw6YQRWkvarTkT4tBeSLLiHzcwj6q0MmYSFCiVpiqPJTJEYIrpcPzVEIvg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/dequal": {
|
||||
"version": "2.0.3",
|
||||
"resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz",
|
||||
@@ -3429,6 +3570,51 @@
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/diagram-js": {
|
||||
"version": "15.9.1",
|
||||
"resolved": "https://registry.npmjs.org/diagram-js/-/diagram-js-15.9.1.tgz",
|
||||
"integrity": "sha512-2JsGmyeTo6o39beq2e/UkTfMopQSM27eXBUzbYQ+1m5VhEnQDkcjcrnRCjcObLMzzXSE/LSJyYhji90sqBFodQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@bpmn-io/diagram-js-ui": "^0.2.3",
|
||||
"clsx": "^2.1.1",
|
||||
"didi": "^11.0.0",
|
||||
"inherits-browser": "^0.1.0",
|
||||
"min-dash": "^5.0.0",
|
||||
"min-dom": "^5.2.0",
|
||||
"object-refs": "^0.4.0",
|
||||
"path-intersection": "^4.1.0",
|
||||
"tiny-svg": "^4.1.4"
|
||||
},
|
||||
"engines": {
|
||||
"node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/diagram-js-direct-editing": {
|
||||
"version": "3.3.0",
|
||||
"resolved": "https://registry.npmjs.org/diagram-js-direct-editing/-/diagram-js-direct-editing-3.3.0.tgz",
|
||||
"integrity": "sha512-EjXYb35J3qBU8lLz5U81hn7wNykVmF7U5DXZ7BvPok2IX7rmPz+ZyaI5AEMiqaC6lpSnHqPxFcPgKEiJcAiv5w==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"min-dash": "^5.0.0",
|
||||
"min-dom": "^5.2.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": "*"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"diagram-js": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/didi": {
|
||||
"version": "11.0.0",
|
||||
"resolved": "https://registry.npmjs.org/didi/-/didi-11.0.0.tgz",
|
||||
"integrity": "sha512-PzCfRzQttvFpVcYMbSF7h8EsWjeJpVjWH4qDhB5LkMi1ILvHq4Ob0vhM2wLFziPkbUBi+PAo7ODbe2sacR7nJQ==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 20.12"
|
||||
}
|
||||
},
|
||||
"node_modules/didyoumean": {
|
||||
"version": "1.2.2",
|
||||
"resolved": "https://registry.npmjs.org/didyoumean/-/didyoumean-1.2.2.tgz",
|
||||
@@ -3451,6 +3637,28 @@
|
||||
"license": "MIT",
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/dom-helpers": {
|
||||
"version": "5.2.1",
|
||||
"resolved": "https://registry.npmjs.org/dom-helpers/-/dom-helpers-5.2.1.tgz",
|
||||
"integrity": "sha512-nRCa7CK3VTrM2NmGkIy4cbK7IZlgBE/PYMn55rrXefr5xXDP0LdtfPnblFDoVdcAfslJ7or6iqAUnx0CCGIWQA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@babel/runtime": "^7.8.7",
|
||||
"csstype": "^3.0.2"
|
||||
}
|
||||
},
|
||||
"node_modules/domify": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/domify/-/domify-3.0.0.tgz",
|
||||
"integrity": "sha512-bs2yO68JDFOm6rKv8f0EnrM2cENduhRkpqOtt/s5l5JBA/eqGBZCzLPmdYoHtJ6utgLGgcBajFsEQbl12pT0lQ==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=20"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/dompurify": {
|
||||
"version": "3.3.1",
|
||||
"resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.3.1.tgz",
|
||||
@@ -3550,6 +3758,12 @@
|
||||
"@types/estree": "^1.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/eventemitter3": {
|
||||
"version": "4.0.7",
|
||||
"resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-4.0.7.tgz",
|
||||
"integrity": "sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/expect-type": {
|
||||
"version": "1.3.0",
|
||||
"resolved": "https://registry.npmjs.org/expect-type/-/expect-type-1.3.0.tgz",
|
||||
@@ -3560,6 +3774,15 @@
|
||||
"node": ">=12.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/fast-equals": {
|
||||
"version": "5.4.0",
|
||||
"resolved": "https://registry.npmjs.org/fast-equals/-/fast-equals-5.4.0.tgz",
|
||||
"integrity": "sha512-jt2DW/aNFNwke7AUd+Z+e6pz39KO5rzdbbFCg2sGafS4mk13MI7Z8O5z9cADNn5lhGODIgLwug6TZO2ctf7kcw==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=6.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/fast-glob": {
|
||||
"version": "3.3.3",
|
||||
"resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.3.tgz",
|
||||
@@ -3705,6 +3928,12 @@
|
||||
"node": ">= 0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/htm": {
|
||||
"version": "3.1.1",
|
||||
"resolved": "https://registry.npmjs.org/htm/-/htm-3.1.1.tgz",
|
||||
"integrity": "sha512-983Vyg8NwUE7JkZ6NmOqpCZ+sh1bKv2iYTlUkzlWmA5JD2acKoxd4KVxbMmxX/85mtfdnDmTFoNKcg5DGAvxNQ==",
|
||||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/html-encoding-sniffer": {
|
||||
"version": "6.0.0",
|
||||
"resolved": "https://registry.npmjs.org/html-encoding-sniffer/-/html-encoding-sniffer-6.0.0.tgz",
|
||||
@@ -3760,6 +3989,15 @@
|
||||
"node": ">= 14"
|
||||
}
|
||||
},
|
||||
"node_modules/ids": {
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://registry.npmjs.org/ids/-/ids-3.0.1.tgz",
|
||||
"integrity": "sha512-mr0zAgpgA/hzCrHB0DnoTG6xZjNC3ABs4eaksXrpVtfaDatA2SVdDb1ZPLjmKjqzp4kexQRuHXwDWQILVK8FZQ==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 20.12"
|
||||
}
|
||||
},
|
||||
"node_modules/immediate": {
|
||||
"version": "3.0.6",
|
||||
"resolved": "https://registry.npmjs.org/immediate/-/immediate-3.0.6.tgz",
|
||||
@@ -3782,6 +4020,21 @@
|
||||
"integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==",
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/inherits-browser": {
|
||||
"version": "0.1.0",
|
||||
"resolved": "https://registry.npmjs.org/inherits-browser/-/inherits-browser-0.1.0.tgz",
|
||||
"integrity": "sha512-CJHHvW3jQ6q7lzsXPpapLdMx5hDpSF3FSh45pwsj6bKxJJ8Nl8v43i5yXnr3BdfOimGHKyniewQtnAIp3vyJJw==",
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/internmap": {
|
||||
"version": "2.0.3",
|
||||
"resolved": "https://registry.npmjs.org/internmap/-/internmap-2.0.3.tgz",
|
||||
"integrity": "sha512-5Hh7Y1wQbvY5ooGgPbDaL5iYLAPzMTUrjMulskHLH6wnv/A+1q5rgEaiuqEjB+oxGXIVZs1FF+R/KPN3ZSQYYg==",
|
||||
"license": "ISC",
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/iobuffer": {
|
||||
"version": "5.4.0",
|
||||
"resolved": "https://registry.npmjs.org/iobuffer/-/iobuffer-5.4.0.tgz",
|
||||
@@ -4009,6 +4262,12 @@
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/lodash": {
|
||||
"version": "4.17.23",
|
||||
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.23.tgz",
|
||||
"integrity": "sha512-LgVTMpQtIopCi79SJeDiP0TfWi5CNEc/L/aRdTh3yIvmZXTnheWpKjSZhnvMl8iXbC1tFg9gdHHDMLoV7CnG+w==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/loose-envify": {
|
||||
"version": "1.4.0",
|
||||
"resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz",
|
||||
@@ -4092,6 +4351,22 @@
|
||||
"node": ">=8.6"
|
||||
}
|
||||
},
|
||||
"node_modules/min-dash": {
|
||||
"version": "5.0.0",
|
||||
"resolved": "https://registry.npmjs.org/min-dash/-/min-dash-5.0.0.tgz",
|
||||
"integrity": "sha512-EGuoBnVL7/Fnv2sqakpX5WGmZehZ3YMmLayT7sM8E9DRU74kkeyMg4Rik1lsOkR2GbFNeBca4/L+UfU6gF0Edw==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/min-dom": {
|
||||
"version": "5.3.0",
|
||||
"resolved": "https://registry.npmjs.org/min-dom/-/min-dom-5.3.0.tgz",
|
||||
"integrity": "sha512-0w5FEBgPAyHhmFojW3zxd7we3D+m5XYS3E/06OyvxmbHJoiQVa4Nagj6RWvoAKYRw5xth6cP5TMePc5cR1M9hA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"domify": "^3.0.0",
|
||||
"min-dash": "^5.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/min-indent": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/min-indent/-/min-indent-1.0.1.tgz",
|
||||
@@ -4102,6 +4377,31 @@
|
||||
"node": ">=4"
|
||||
}
|
||||
},
|
||||
"node_modules/moddle": {
|
||||
"version": "8.1.0",
|
||||
"resolved": "https://registry.npmjs.org/moddle/-/moddle-8.1.0.tgz",
|
||||
"integrity": "sha512-dBddc1CNuZHgro8nQWwfPZ2BkyLWdnxoNpPu9d+XKPN96DAiiBOeBw527ft++ebDuFez5PMdaR3pgUgoOaUGrA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"min-dash": "^5.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/moddle-xml": {
|
||||
"version": "12.0.0",
|
||||
"resolved": "https://registry.npmjs.org/moddle-xml/-/moddle-xml-12.0.0.tgz",
|
||||
"integrity": "sha512-NJc2+sCe4tvuGlaUBcoZcYf6j9f+z+qxHOyGm/LB3ZrlJXVPPHoBTg/KXgDRCufdBJhJ3AheFs3QU/abABNzRg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"min-dash": "^5.0.0",
|
||||
"saxen": "^11.0.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 18"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"moddle": ">= 6.2.0"
|
||||
}
|
||||
},
|
||||
"node_modules/ms": {
|
||||
"version": "2.1.3",
|
||||
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
|
||||
@@ -4240,7 +4540,6 @@
|
||||
"version": "4.1.1",
|
||||
"resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
|
||||
"integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
@@ -4256,6 +4555,15 @@
|
||||
"node": ">= 6"
|
||||
}
|
||||
},
|
||||
"node_modules/object-refs": {
|
||||
"version": "0.4.0",
|
||||
"resolved": "https://registry.npmjs.org/object-refs/-/object-refs-0.4.0.tgz",
|
||||
"integrity": "sha512-6kJqKWryKZmtte6QYvouas0/EIJKPI1/MMIuRsiBlNuhIMfqYTggzX2F1AJ2+cDs288xyi9GL7FyasHINR98BQ==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/obug": {
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/obug/-/obug-2.1.1.tgz",
|
||||
@@ -4286,6 +4594,15 @@
|
||||
"url": "https://github.com/inikulin/parse5?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/path-intersection": {
|
||||
"version": "4.1.0",
|
||||
"resolved": "https://registry.npmjs.org/path-intersection/-/path-intersection-4.1.0.tgz",
|
||||
"integrity": "sha512-urUP6WvhnxbHPdHYl6L7Yrc6+1ny6uOFKPCzPxTSUSYGHG0o94RmI7SvMMaScNAM5RtTf08bg4skc6/kjfne3A==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 14.20"
|
||||
}
|
||||
},
|
||||
"node_modules/path-parse": {
|
||||
"version": "1.0.7",
|
||||
"resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.7.tgz",
|
||||
@@ -4555,6 +4872,16 @@
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/preact": {
|
||||
"version": "10.28.4",
|
||||
"resolved": "https://registry.npmjs.org/preact/-/preact-10.28.4.tgz",
|
||||
"integrity": "sha512-uKFfOHWuSNpRFVTnljsCluEFq57OKT+0QdOiQo8XWnQ/pSvg7OpX5eNOejELXJMWy+BwM2nobz0FkvzmnpCNsQ==",
|
||||
"license": "MIT",
|
||||
"funding": {
|
||||
"type": "opencollective",
|
||||
"url": "https://opencollective.com/preact"
|
||||
}
|
||||
},
|
||||
"node_modules/pretty-format": {
|
||||
"version": "27.5.1",
|
||||
"resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-27.5.1.tgz",
|
||||
@@ -4577,6 +4904,23 @@
|
||||
"integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/prop-types": {
|
||||
"version": "15.8.1",
|
||||
"resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.8.1.tgz",
|
||||
"integrity": "sha512-oj87CgZICdulUohogVAR7AjlC0327U4el4L6eAvOqCeudMDVU0NThNaV+b9Df4dXgSP1gXMTnPdhfe/2qDH5cg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"loose-envify": "^1.4.0",
|
||||
"object-assign": "^4.1.1",
|
||||
"react-is": "^16.13.1"
|
||||
}
|
||||
},
|
||||
"node_modules/prop-types/node_modules/react-is": {
|
||||
"version": "16.13.1",
|
||||
"resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz",
|
||||
"integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/punycode": {
|
||||
"version": "2.3.1",
|
||||
"resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz",
|
||||
@@ -4661,6 +5005,37 @@
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/react-smooth": {
|
||||
"version": "4.0.4",
|
||||
"resolved": "https://registry.npmjs.org/react-smooth/-/react-smooth-4.0.4.tgz",
|
||||
"integrity": "sha512-gnGKTpYwqL0Iii09gHobNolvX4Kiq4PKx6eWBCYYix+8cdw+cGo3do906l1NBPKkSWx1DghC1dlWG9L2uGd61Q==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"fast-equals": "^5.0.1",
|
||||
"prop-types": "^15.8.1",
|
||||
"react-transition-group": "^4.4.5"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0",
|
||||
"react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/react-transition-group": {
|
||||
"version": "4.4.5",
|
||||
"resolved": "https://registry.npmjs.org/react-transition-group/-/react-transition-group-4.4.5.tgz",
|
||||
"integrity": "sha512-pZcd1MCJoiKiBR2NRxeCRg13uCXbydPnmB4EOeRrY7480qNWO8IIgQG6zlDkm6uRMsURXPuKq0GWtiM59a5Q6g==",
|
||||
"license": "BSD-3-Clause",
|
||||
"dependencies": {
|
||||
"@babel/runtime": "^7.5.5",
|
||||
"dom-helpers": "^5.0.1",
|
||||
"loose-envify": "^1.4.0",
|
||||
"prop-types": "^15.6.2"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"react": ">=16.6.0",
|
||||
"react-dom": ">=16.6.0"
|
||||
}
|
||||
},
|
||||
"node_modules/reactflow": {
|
||||
"version": "11.11.4",
|
||||
"resolved": "https://registry.npmjs.org/reactflow/-/reactflow-11.11.4.tgz",
|
||||
@@ -4717,6 +5092,44 @@
|
||||
"node": ">=8.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/recharts": {
|
||||
"version": "2.15.4",
|
||||
"resolved": "https://registry.npmjs.org/recharts/-/recharts-2.15.4.tgz",
|
||||
"integrity": "sha512-UT/q6fwS3c1dHbXv2uFgYJ9BMFHu3fwnd7AYZaEQhXuYQ4hgsxLvsUXzGdKeZrW5xopzDCvuA2N41WJ88I7zIw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"clsx": "^2.0.0",
|
||||
"eventemitter3": "^4.0.1",
|
||||
"lodash": "^4.17.21",
|
||||
"react-is": "^18.3.1",
|
||||
"react-smooth": "^4.0.4",
|
||||
"recharts-scale": "^0.4.4",
|
||||
"tiny-invariant": "^1.3.1",
|
||||
"victory-vendor": "^36.6.8"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"react": "^16.0.0 || ^17.0.0 || ^18.0.0 || ^19.0.0",
|
||||
"react-dom": "^16.0.0 || ^17.0.0 || ^18.0.0 || ^19.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/recharts-scale": {
|
||||
"version": "0.4.5",
|
||||
"resolved": "https://registry.npmjs.org/recharts-scale/-/recharts-scale-0.4.5.tgz",
|
||||
"integrity": "sha512-kivNFO+0OcUNu7jQquLXAxz1FIwZj8nrj+YkOKc5694NbjCvcT6aSZiIzNzd2Kul4o4rTto8QVR9lMNtxD4G1w==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"decimal.js-light": "^2.4.1"
|
||||
}
|
||||
},
|
||||
"node_modules/recharts/node_modules/react-is": {
|
||||
"version": "18.3.1",
|
||||
"resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz",
|
||||
"integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/redent": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/redent/-/redent-3.0.0.tgz",
|
||||
@@ -4865,6 +5278,15 @@
|
||||
"integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/saxen": {
|
||||
"version": "11.0.2",
|
||||
"resolved": "https://registry.npmjs.org/saxen/-/saxen-11.0.2.tgz",
|
||||
"integrity": "sha512-WDb4gqac8uiJzOdOdVpr9NWh9NrJMm7Brn5GX2Poj+mjE/QTXqYQENr8T/mom54dDDgbd3QjwTg23TRHYiWXRA==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 20.12"
|
||||
}
|
||||
},
|
||||
"node_modules/saxes": {
|
||||
"version": "6.0.0",
|
||||
"resolved": "https://registry.npmjs.org/saxes/-/saxes-6.0.0.tgz",
|
||||
@@ -5160,6 +5582,21 @@
|
||||
"node": ">=0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/tiny-invariant": {
|
||||
"version": "1.3.3",
|
||||
"resolved": "https://registry.npmjs.org/tiny-invariant/-/tiny-invariant-1.3.3.tgz",
|
||||
"integrity": "sha512-+FbBPE1o9QAYvviau/qC5SE3caw21q3xkvWKBtja5vgqOWIHHJ3ioaq1VPfn/Szqctz2bU/oYeKd9/z5BL+PVg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/tiny-svg": {
|
||||
"version": "4.1.4",
|
||||
"resolved": "https://registry.npmjs.org/tiny-svg/-/tiny-svg-4.1.4.tgz",
|
||||
"integrity": "sha512-cBaEACCbouYrQc9RG+eTXnPYosX1Ijqty/I6DdXovwDd89Pwu4jcmpOR7BuFEF9YCcd7/AWwasE0207WMK7hdw==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 20"
|
||||
}
|
||||
},
|
||||
"node_modules/tinybench": {
|
||||
"version": "2.9.0",
|
||||
"resolved": "https://registry.npmjs.org/tinybench/-/tinybench-2.9.0.tgz",
|
||||
@@ -5407,6 +5844,28 @@
|
||||
"uuid": "dist-node/bin/uuid"
|
||||
}
|
||||
},
|
||||
"node_modules/victory-vendor": {
|
||||
"version": "36.9.2",
|
||||
"resolved": "https://registry.npmjs.org/victory-vendor/-/victory-vendor-36.9.2.tgz",
|
||||
"integrity": "sha512-PnpQQMuxlwYdocC8fIJqVXvkeViHYzotI+NJrCuav0ZYFoq912ZHBk3mCeuj+5/VpodOjPe1z0Fk2ihgzlXqjQ==",
|
||||
"license": "MIT AND ISC",
|
||||
"dependencies": {
|
||||
"@types/d3-array": "^3.0.3",
|
||||
"@types/d3-ease": "^3.0.0",
|
||||
"@types/d3-interpolate": "^3.0.1",
|
||||
"@types/d3-scale": "^4.0.2",
|
||||
"@types/d3-shape": "^3.1.0",
|
||||
"@types/d3-time": "^3.0.0",
|
||||
"@types/d3-timer": "^3.0.0",
|
||||
"d3-array": "^3.1.6",
|
||||
"d3-ease": "^3.0.1",
|
||||
"d3-interpolate": "^3.0.1",
|
||||
"d3-scale": "^4.0.2",
|
||||
"d3-shape": "^3.1.0",
|
||||
"d3-time": "^3.0.0",
|
||||
"d3-timer": "^3.0.1"
|
||||
}
|
||||
},
|
||||
"node_modules/vite": {
|
||||
"version": "7.3.1",
|
||||
"resolved": "https://registry.npmjs.org/vite/-/vite-7.3.1.tgz",
|
||||
|
||||
@@ -18,7 +18,6 @@
|
||||
"test:all": "vitest run && playwright test --project=chromium"
|
||||
},
|
||||
"dependencies": {
|
||||
"bpmn-js": "^18.0.1",
|
||||
"jspdf": "^4.1.0",
|
||||
"jszip": "^3.10.1",
|
||||
"lucide-react": "^0.468.0",
|
||||
@@ -27,6 +26,7 @@
|
||||
"react-dom": "^18.3.1",
|
||||
"reactflow": "^11.11.4",
|
||||
"recharts": "^2.15.0",
|
||||
"fabric": "^6.0.0",
|
||||
"uuid": "^13.0.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
||||
1454
admin-lehrer/public/fonts/Inter-VariableFont.woff2
Normal file
1454
admin-lehrer/public/fonts/Inter-VariableFont.woff2
Normal file
File diff suppressed because one or more lines are too long
@@ -119,13 +119,6 @@ export const AI_PIPELINE_MODULES: AIModuleLink[] = [
|
||||
* Kein direkter Datenfluss zur Pipeline.
|
||||
*/
|
||||
export const AI_TOOLS_MODULES: AIModuleLink[] = [
|
||||
{
|
||||
id: 'llm-compare',
|
||||
name: 'LLM Vergleich',
|
||||
href: '/ai/llm-compare',
|
||||
description: 'KI-Provider Vergleich & Evaluation',
|
||||
icon: '⚖️',
|
||||
},
|
||||
{
|
||||
id: 'test-quality',
|
||||
name: 'Test Quality (BQAS)',
|
||||
@@ -212,27 +205,7 @@ export const AI_MODULE_RELATIONS: Record<string, AIModuleLink[]> = {
|
||||
},
|
||||
],
|
||||
// KI-Werkzeuge Relations (Standalone-Tools)
|
||||
'llm-compare': [
|
||||
{
|
||||
id: 'test-quality',
|
||||
name: 'Test Quality (BQAS)',
|
||||
href: '/ai/test-quality',
|
||||
description: 'Golden Suite & Synthetic Tests',
|
||||
},
|
||||
{
|
||||
id: 'agents',
|
||||
name: 'Agent Management',
|
||||
href: '/ai/agents',
|
||||
description: 'Multi-Agent System',
|
||||
},
|
||||
],
|
||||
'test-quality': [
|
||||
{
|
||||
id: 'llm-compare',
|
||||
name: 'LLM Vergleich',
|
||||
href: '/ai/llm-compare',
|
||||
description: 'KI-Provider vergleichen',
|
||||
},
|
||||
{
|
||||
id: 'klausur-korrektur',
|
||||
name: 'Klausur-Korrektur',
|
||||
|
||||
@@ -1,323 +0,0 @@
|
||||
# =========================================================
|
||||
# BreakPilot Lehrer — KI-Lehrerplattform (Coolify)
|
||||
# =========================================================
|
||||
# Requires: breakpilot-core must be running
|
||||
# Deployed via Coolify. SSL termination handled by Traefik.
|
||||
# External services (managed separately in Coolify):
|
||||
# - PostgreSQL, Qdrant, S3-compatible storage
|
||||
# =========================================================
|
||||
|
||||
networks:
|
||||
breakpilot-network:
|
||||
external: true
|
||||
name: breakpilot-network
|
||||
|
||||
volumes:
|
||||
klausur_uploads:
|
||||
eh_uploads:
|
||||
ocr_labeling:
|
||||
paddle_models:
|
||||
lehrer_backend_data:
|
||||
opensearch_data:
|
||||
|
||||
services:
|
||||
|
||||
# =========================================================
|
||||
# FRONTEND
|
||||
# =========================================================
|
||||
admin-lehrer:
|
||||
build:
|
||||
context: ./admin-lehrer
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
NEXT_PUBLIC_API_URL: ${NEXT_PUBLIC_API_URL:-https://api-lehrer.breakpilot.ai}
|
||||
NEXT_PUBLIC_OLD_ADMIN_URL: ${NEXT_PUBLIC_OLD_ADMIN_URL:-}
|
||||
NEXT_PUBLIC_KLAUSUR_SERVICE_URL: ${NEXT_PUBLIC_KLAUSUR_SERVICE_URL:-https://klausur.breakpilot.ai}
|
||||
NEXT_PUBLIC_VOICE_SERVICE_URL: ${NEXT_PUBLIC_VOICE_SERVICE_URL:-wss://voice.breakpilot.ai}
|
||||
container_name: bp-lehrer-admin
|
||||
expose:
|
||||
- "3000"
|
||||
volumes:
|
||||
- lehrer_backend_data:/app/data
|
||||
environment:
|
||||
NODE_ENV: production
|
||||
BACKEND_URL: http://backend-lehrer:8001
|
||||
CONSENT_SERVICE_URL: http://bp-core-consent-service:8081
|
||||
KLAUSUR_SERVICE_URL: http://klausur-service:8086
|
||||
OLLAMA_URL: ${OLLAMA_URL:-}
|
||||
depends_on:
|
||||
backend-lehrer:
|
||||
condition: service_started
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.admin-lehrer.rule=Host(`admin-lehrer.breakpilot.ai`)"
|
||||
- "traefik.http.routers.admin-lehrer.entrypoints=https"
|
||||
- "traefik.http.routers.admin-lehrer.tls=true"
|
||||
- "traefik.http.routers.admin-lehrer.tls.certresolver=letsencrypt"
|
||||
- "traefik.http.services.admin-lehrer.loadbalancer.server.port=3000"
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- breakpilot-network
|
||||
|
||||
studio-v2:
|
||||
build:
|
||||
context: ./studio-v2
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
NEXT_PUBLIC_VOICE_SERVICE_URL: ${NEXT_PUBLIC_VOICE_SERVICE_URL:-wss://voice.breakpilot.ai}
|
||||
NEXT_PUBLIC_KLAUSUR_SERVICE_URL: ${NEXT_PUBLIC_KLAUSUR_SERVICE_URL:-https://klausur.breakpilot.ai}
|
||||
container_name: bp-lehrer-studio-v2
|
||||
expose:
|
||||
- "3001"
|
||||
environment:
|
||||
NODE_ENV: production
|
||||
BACKEND_URL: http://backend-lehrer:8001
|
||||
depends_on:
|
||||
- backend-lehrer
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.studio.rule=Host(`app.breakpilot.ai`)"
|
||||
- "traefik.http.routers.studio.entrypoints=https"
|
||||
- "traefik.http.routers.studio.tls=true"
|
||||
- "traefik.http.routers.studio.tls.certresolver=letsencrypt"
|
||||
- "traefik.http.services.studio.loadbalancer.server.port=3001"
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- breakpilot-network
|
||||
|
||||
website:
|
||||
build:
|
||||
context: ./website
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
NEXT_PUBLIC_BILLING_API_URL: ${NEXT_PUBLIC_BILLING_API_URL:-https://api-core.breakpilot.ai}
|
||||
NEXT_PUBLIC_APP_URL: ${NEXT_PUBLIC_APP_URL:-https://app.breakpilot.ai}
|
||||
NEXT_PUBLIC_KLAUSUR_SERVICE_URL: ${NEXT_PUBLIC_KLAUSUR_SERVICE_URL:-https://klausur.breakpilot.ai}
|
||||
NEXT_PUBLIC_VOICE_SERVICE_URL: ${NEXT_PUBLIC_VOICE_SERVICE_URL:-wss://voice.breakpilot.ai}
|
||||
NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY: ${NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY:-}
|
||||
container_name: bp-lehrer-website
|
||||
expose:
|
||||
- "3000"
|
||||
environment:
|
||||
NODE_ENV: production
|
||||
VAST_API_KEY: ${VAST_API_KEY:-}
|
||||
CONTROL_API_KEY: ${CONTROL_API_KEY:-}
|
||||
BACKEND_URL: http://backend-lehrer:8001
|
||||
CONSENT_SERVICE_URL: http://bp-core-consent-service:8081
|
||||
EDU_SEARCH_URL: ${EDU_SEARCH_URL:-}
|
||||
EDU_SEARCH_API_KEY: ${EDU_SEARCH_API_KEY:-}
|
||||
depends_on:
|
||||
- backend-lehrer
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.website.rule=Host(`www.breakpilot.ai`)"
|
||||
- "traefik.http.routers.website.entrypoints=https"
|
||||
- "traefik.http.routers.website.tls=true"
|
||||
- "traefik.http.routers.website.tls.certresolver=letsencrypt"
|
||||
- "traefik.http.services.website.loadbalancer.server.port=3000"
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- breakpilot-network
|
||||
|
||||
# =========================================================
|
||||
# BACKEND
|
||||
# =========================================================
|
||||
backend-lehrer:
|
||||
build:
|
||||
context: ./backend-lehrer
|
||||
dockerfile: Dockerfile
|
||||
container_name: bp-lehrer-backend
|
||||
user: "0:0"
|
||||
expose:
|
||||
- "8001"
|
||||
volumes:
|
||||
- lehrer_backend_data:/app/data
|
||||
environment:
|
||||
PORT: 8001
|
||||
DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT:-5432}/${POSTGRES_DB}?options=-csearch_path%3Dlehrer,core,public
|
||||
JWT_SECRET: ${JWT_SECRET}
|
||||
ENVIRONMENT: production
|
||||
CONSENT_SERVICE_URL: http://bp-core-consent-service:8081
|
||||
KLAUSUR_SERVICE_URL: http://klausur-service:8086
|
||||
TROCR_SERVICE_URL: ${TROCR_SERVICE_URL:-}
|
||||
CAMUNDA_URL: ${CAMUNDA_URL:-}
|
||||
VALKEY_URL: redis://bp-core-valkey:6379/0
|
||||
SESSION_TTL_HOURS: ${SESSION_TTL_HOURS:-24}
|
||||
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
|
||||
DEBUG: "false"
|
||||
ALERTS_AGENT_ENABLED: ${ALERTS_AGENT_ENABLED:-false}
|
||||
VAST_API_KEY: ${VAST_API_KEY:-}
|
||||
VAST_INSTANCE_ID: ${VAST_INSTANCE_ID:-}
|
||||
CONTROL_API_KEY: ${CONTROL_API_KEY:-}
|
||||
OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-}
|
||||
OLLAMA_ENABLED: ${OLLAMA_ENABLED:-false}
|
||||
OLLAMA_DEFAULT_MODEL: ${OLLAMA_DEFAULT_MODEL:-}
|
||||
OLLAMA_VISION_MODEL: ${OLLAMA_VISION_MODEL:-}
|
||||
OLLAMA_CORRECTION_MODEL: ${OLLAMA_CORRECTION_MODEL:-}
|
||||
OLLAMA_TIMEOUT: ${OLLAMA_TIMEOUT:-120}
|
||||
GAME_USE_DATABASE: ${GAME_USE_DATABASE:-true}
|
||||
GAME_REQUIRE_AUTH: ${GAME_REQUIRE_AUTH:-true}
|
||||
GAME_REQUIRE_BILLING: ${GAME_REQUIRE_BILLING:-true}
|
||||
GAME_LLM_MODEL: ${GAME_LLM_MODEL:-}
|
||||
SMTP_HOST: ${SMTP_HOST}
|
||||
SMTP_PORT: ${SMTP_PORT:-587}
|
||||
SMTP_USERNAME: ${SMTP_USERNAME}
|
||||
SMTP_PASSWORD: ${SMTP_PASSWORD}
|
||||
SMTP_FROM_NAME: ${SMTP_FROM_NAME:-BreakPilot}
|
||||
SMTP_FROM_ADDR: ${SMTP_FROM_ADDR:-noreply@breakpilot.ai}
|
||||
RAG_SERVICE_URL: http://bp-core-rag-service:8097
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.backend-lehrer.rule=Host(`api-lehrer.breakpilot.ai`)"
|
||||
- "traefik.http.routers.backend-lehrer.entrypoints=https"
|
||||
- "traefik.http.routers.backend-lehrer.tls=true"
|
||||
- "traefik.http.routers.backend-lehrer.tls.certresolver=letsencrypt"
|
||||
- "traefik.http.services.backend-lehrer.loadbalancer.server.port=8001"
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- breakpilot-network
|
||||
|
||||
# =========================================================
|
||||
# MICROSERVICES
|
||||
# =========================================================
|
||||
klausur-service:
|
||||
build:
|
||||
context: ./klausur-service
|
||||
dockerfile: Dockerfile
|
||||
container_name: bp-lehrer-klausur-service
|
||||
expose:
|
||||
- "8086"
|
||||
volumes:
|
||||
- klausur_uploads:/app/uploads
|
||||
- eh_uploads:/app/eh-uploads
|
||||
- ocr_labeling:/app/ocr-labeling
|
||||
- paddle_models:/root/.paddlex
|
||||
environment:
|
||||
JWT_SECRET: ${JWT_SECRET}
|
||||
BACKEND_URL: http://backend-lehrer:8001
|
||||
SCHOOL_SERVICE_URL: http://school-service:8084
|
||||
ENVIRONMENT: production
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT:-5432}/${POSTGRES_DB}
|
||||
EMBEDDING_SERVICE_URL: http://bp-core-embedding-service:8087
|
||||
QDRANT_URL: ${QDRANT_URL}
|
||||
MINIO_ENDPOINT: ${S3_ENDPOINT}
|
||||
MINIO_ACCESS_KEY: ${S3_ACCESS_KEY}
|
||||
MINIO_SECRET_KEY: ${S3_SECRET_KEY}
|
||||
MINIO_BUCKET: ${S3_BUCKET:-breakpilot-rag}
|
||||
MINIO_SECURE: ${S3_SECURE:-true}
|
||||
PADDLEOCR_SERVICE_URL: ${PADDLEOCR_SERVICE_URL:-}
|
||||
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
|
||||
OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-}
|
||||
OLLAMA_ENABLED: ${OLLAMA_ENABLED:-false}
|
||||
OLLAMA_DEFAULT_MODEL: ${OLLAMA_DEFAULT_MODEL:-}
|
||||
OLLAMA_VISION_MODEL: ${OLLAMA_VISION_MODEL:-}
|
||||
OLLAMA_CORRECTION_MODEL: ${OLLAMA_CORRECTION_MODEL:-}
|
||||
RAG_SERVICE_URL: http://bp-core-rag-service:8097
|
||||
depends_on:
|
||||
school-service:
|
||||
condition: service_started
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://127.0.0.1:8086/health"]
|
||||
interval: 30s
|
||||
timeout: 30s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.klausur.rule=Host(`klausur.breakpilot.ai`)"
|
||||
- "traefik.http.routers.klausur.entrypoints=https"
|
||||
- "traefik.http.routers.klausur.tls=true"
|
||||
- "traefik.http.routers.klausur.tls.certresolver=letsencrypt"
|
||||
- "traefik.http.services.klausur.loadbalancer.server.port=8086"
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- breakpilot-network
|
||||
|
||||
school-service:
|
||||
build:
|
||||
context: ./school-service
|
||||
dockerfile: Dockerfile
|
||||
container_name: bp-lehrer-school-service
|
||||
expose:
|
||||
- "8084"
|
||||
environment:
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT:-5432}/${POSTGRES_DB}
|
||||
JWT_SECRET: ${JWT_SECRET}
|
||||
PORT: 8084
|
||||
ENVIRONMENT: production
|
||||
ALLOWED_ORIGINS: "*"
|
||||
LLM_GATEWAY_URL: http://backend-lehrer:8001/llm
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- breakpilot-network
|
||||
|
||||
# =========================================================
|
||||
# EDU SEARCH
|
||||
# =========================================================
|
||||
opensearch:
|
||||
image: opensearchproject/opensearch:2.11.1
|
||||
container_name: bp-lehrer-opensearch
|
||||
environment:
|
||||
- cluster.name=edu-search-cluster
|
||||
- node.name=opensearch-node1
|
||||
- discovery.type=single-node
|
||||
- bootstrap.memory_lock=true
|
||||
- "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m"
|
||||
- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_PASSWORD:-Admin123!}
|
||||
- plugins.security.disabled=true
|
||||
ulimits:
|
||||
memlock:
|
||||
soft: -1
|
||||
hard: -1
|
||||
nofile:
|
||||
soft: 65536
|
||||
hard: 65536
|
||||
volumes:
|
||||
- opensearch_data:/usr/share/opensearch/data
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -s http://localhost:9200 >/dev/null || exit 1"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 5
|
||||
start_period: 60s
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- breakpilot-network
|
||||
|
||||
edu-search-service:
|
||||
build:
|
||||
context: ./edu-search-service
|
||||
dockerfile: Dockerfile
|
||||
container_name: bp-lehrer-edu-search
|
||||
expose:
|
||||
- "8088"
|
||||
environment:
|
||||
PORT: 8088
|
||||
OPENSEARCH_URL: http://opensearch:9200
|
||||
OPENSEARCH_USERNAME: admin
|
||||
OPENSEARCH_PASSWORD: ${OPENSEARCH_PASSWORD:-Admin123!}
|
||||
INDEX_NAME: bp_documents_v1
|
||||
EDU_SEARCH_API_KEY: ${EDU_SEARCH_API_KEY:-}
|
||||
USER_AGENT: "BreakpilotEduCrawler/1.0 (+contact: security@breakpilot.com)"
|
||||
RATE_LIMIT_PER_SEC: "0.2"
|
||||
MAX_DEPTH: "4"
|
||||
MAX_PAGES_PER_RUN: "500"
|
||||
DB_HOST: ${POSTGRES_HOST}
|
||||
DB_PORT: ${POSTGRES_PORT:-5432}
|
||||
DB_USER: ${POSTGRES_USER}
|
||||
DB_PASSWORD: ${POSTGRES_PASSWORD}
|
||||
DB_NAME: ${POSTGRES_DB}
|
||||
DB_SSLMODE: disable
|
||||
STAFF_CRAWLER_EMAIL: crawler@breakpilot.de
|
||||
depends_on:
|
||||
opensearch:
|
||||
condition: service_healthy
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8088/v1/health"]
|
||||
interval: 30s
|
||||
timeout: 3s
|
||||
start_period: 10s
|
||||
retries: 3
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- breakpilot-network
|
||||
@@ -15,11 +15,24 @@ volumes:
|
||||
eh_uploads:
|
||||
ocr_labeling:
|
||||
paddle_models:
|
||||
lighton_models:
|
||||
paddleocr_models:
|
||||
transcription_models:
|
||||
transcription_temp:
|
||||
lehrer_backend_data:
|
||||
opensearch_data:
|
||||
# Communication (Jitsi + Matrix)
|
||||
synapse_data:
|
||||
synapse_db_data:
|
||||
jitsi_web_config:
|
||||
jitsi_web_crontabs:
|
||||
jitsi_transcripts:
|
||||
jitsi_prosody_config:
|
||||
jitsi_prosody_plugins:
|
||||
jitsi_jicofo_config:
|
||||
jitsi_jvb_config:
|
||||
# Voice
|
||||
voice_session_data:
|
||||
|
||||
services:
|
||||
|
||||
@@ -154,7 +167,6 @@ services:
|
||||
CONSENT_SERVICE_URL: http://bp-core-consent-service:8081
|
||||
KLAUSUR_SERVICE_URL: http://klausur-service:8086
|
||||
TROCR_SERVICE_URL: http://paddleocr-service:8095
|
||||
CAMUNDA_URL: http://bp-core-camunda:8080
|
||||
VALKEY_URL: redis://bp-core-valkey:6379/0
|
||||
SESSION_TTL_HOURS: ${SESSION_TTL_HOURS:-24}
|
||||
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
|
||||
@@ -209,6 +221,7 @@ services:
|
||||
- eh_uploads:/app/eh-uploads
|
||||
- ocr_labeling:/app/ocr-labeling
|
||||
- paddle_models:/root/.paddlex
|
||||
- lighton_models:/root/.cache/huggingface
|
||||
environment:
|
||||
JWT_SECRET: ${JWT_SECRET:-your-super-secret-jwt-key-change-in-production}
|
||||
BACKEND_URL: http://backend-lehrer:8001
|
||||
@@ -223,6 +236,8 @@ services:
|
||||
MINIO_BUCKET: ${MINIO_BUCKET:-breakpilot-rag}
|
||||
MINIO_SECURE: "false"
|
||||
PADDLEOCR_SERVICE_URL: http://paddleocr-service:8095
|
||||
PADDLEOCR_REMOTE_URL: ${PADDLEOCR_REMOTE_URL:-https://hetzner.meghsakha.com:8095}
|
||||
PADDLEOCR_API_KEY: ${PADDLEOCR_API_KEY:-}
|
||||
VAULT_ADDR: http://bp-core-vault:8200
|
||||
VAULT_TOKEN: ${VAULT_TOKEN:-breakpilot-dev-token}
|
||||
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
|
||||
@@ -231,6 +246,12 @@ services:
|
||||
OLLAMA_DEFAULT_MODEL: ${OLLAMA_DEFAULT_MODEL:-llama3.2}
|
||||
OLLAMA_VISION_MODEL: ${OLLAMA_VISION_MODEL:-llama3.2-vision}
|
||||
OLLAMA_CORRECTION_MODEL: ${OLLAMA_CORRECTION_MODEL:-llama3.2}
|
||||
OLLAMA_REVIEW_MODEL: ${OLLAMA_REVIEW_MODEL:-qwen3:0.6b}
|
||||
OLLAMA_REVIEW_BATCH_SIZE: ${OLLAMA_REVIEW_BATCH_SIZE:-20}
|
||||
REVIEW_ENGINE: ${REVIEW_ENGINE:-spell}
|
||||
OCR_ENGINE: ${OCR_ENGINE:-auto}
|
||||
OLLAMA_HTR_MODEL: ${OLLAMA_HTR_MODEL:-qwen2.5vl:32b}
|
||||
HTR_FALLBACK_MODEL: ${HTR_FALLBACK_MODEL:-trocr-large}
|
||||
RAG_SERVICE_URL: http://bp-core-rag-service:8097
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
@@ -373,6 +394,216 @@ services:
|
||||
networks:
|
||||
- breakpilot-network
|
||||
|
||||
# =========================================================
|
||||
# VOICE SERVICE
|
||||
# =========================================================
|
||||
voice-service:
|
||||
build:
|
||||
context: ./voice-service
|
||||
dockerfile: Dockerfile
|
||||
container_name: bp-lehrer-voice-service
|
||||
platform: linux/arm64
|
||||
expose:
|
||||
- "8091"
|
||||
volumes:
|
||||
- voice_session_data:/app/data/sessions
|
||||
environment:
|
||||
PORT: 8091
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER:-breakpilot}:${POSTGRES_PASSWORD:-breakpilot123}@bp-core-postgres:5432/${POSTGRES_DB:-breakpilot_db}
|
||||
VALKEY_URL: redis://bp-core-valkey:6379/0
|
||||
KLAUSUR_SERVICE_URL: http://klausur-service:8086
|
||||
OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-http://host.docker.internal:11434}
|
||||
OLLAMA_VOICE_MODEL: ${OLLAMA_VOICE_MODEL:-llama3.2}
|
||||
ENVIRONMENT: ${ENVIRONMENT:-development}
|
||||
JWT_SECRET: ${JWT_SECRET:-your-super-secret-jwt-key-change-in-production}
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
depends_on:
|
||||
core-health-check:
|
||||
condition: service_completed_successfully
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://127.0.0.1:8091/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
start_period: 60s
|
||||
retries: 3
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- breakpilot-network
|
||||
|
||||
# =========================================================
|
||||
# COMMUNICATION: Jitsi Meet
|
||||
# =========================================================
|
||||
jitsi-web:
|
||||
image: jitsi/web:stable-9823
|
||||
container_name: bp-lehrer-jitsi-web
|
||||
expose:
|
||||
- "80"
|
||||
volumes:
|
||||
- jitsi_web_config:/config
|
||||
- jitsi_web_crontabs:/var/spool/cron/crontabs
|
||||
- jitsi_transcripts:/usr/share/jitsi-meet/transcripts
|
||||
environment:
|
||||
ENABLE_XMPP_WEBSOCKET: "true"
|
||||
ENABLE_COLIBRI_WEBSOCKET: "true"
|
||||
XMPP_DOMAIN: ${XMPP_DOMAIN:-meet.jitsi}
|
||||
XMPP_BOSH_URL_BASE: http://jitsi-xmpp:5280
|
||||
XMPP_MUC_DOMAIN: ${XMPP_MUC_DOMAIN:-muc.meet.jitsi}
|
||||
XMPP_GUEST_DOMAIN: ${XMPP_GUEST_DOMAIN:-guest.meet.jitsi}
|
||||
TZ: ${TZ:-Europe/Berlin}
|
||||
PUBLIC_URL: ${JITSI_PUBLIC_URL:-https://macmini:8443}
|
||||
JICOFO_AUTH_USER: focus
|
||||
ENABLE_AUTH: ${JITSI_ENABLE_AUTH:-false}
|
||||
ENABLE_GUESTS: "true"
|
||||
ENABLE_RECORDING: "true"
|
||||
ENABLE_LIVESTREAMING: "false"
|
||||
DISABLE_HTTPS: "true"
|
||||
APP_NAME: "BreakPilot Meet"
|
||||
NATIVE_APP_NAME: "BreakPilot Meet"
|
||||
PROVIDER_NAME: "BreakPilot"
|
||||
depends_on:
|
||||
- jitsi-xmpp
|
||||
networks:
|
||||
breakpilot-network:
|
||||
aliases:
|
||||
- meet.jitsi
|
||||
|
||||
jitsi-xmpp:
|
||||
image: jitsi/prosody:stable-9823
|
||||
container_name: bp-lehrer-jitsi-xmpp
|
||||
volumes:
|
||||
- jitsi_prosody_config:/config
|
||||
- jitsi_prosody_plugins:/prosody-plugins-custom
|
||||
environment:
|
||||
XMPP_DOMAIN: ${XMPP_DOMAIN:-meet.jitsi}
|
||||
XMPP_AUTH_DOMAIN: ${XMPP_AUTH_DOMAIN:-auth.meet.jitsi}
|
||||
XMPP_MUC_DOMAIN: ${XMPP_MUC_DOMAIN:-muc.meet.jitsi}
|
||||
XMPP_INTERNAL_MUC_DOMAIN: ${XMPP_INTERNAL_MUC_DOMAIN:-internal-muc.meet.jitsi}
|
||||
XMPP_GUEST_DOMAIN: ${XMPP_GUEST_DOMAIN:-guest.meet.jitsi}
|
||||
XMPP_RECORDER_DOMAIN: ${XMPP_RECORDER_DOMAIN:-recorder.meet.jitsi}
|
||||
XMPP_CROSS_DOMAIN: "true"
|
||||
TZ: ${TZ:-Europe/Berlin}
|
||||
JICOFO_AUTH_USER: focus
|
||||
JICOFO_AUTH_PASSWORD: ${JICOFO_AUTH_PASSWORD:-jicofo_secret}
|
||||
JVB_AUTH_USER: jvb
|
||||
JVB_AUTH_PASSWORD: ${JVB_AUTH_PASSWORD:-jvb_secret}
|
||||
JIBRI_XMPP_USER: jibri
|
||||
JIBRI_XMPP_PASSWORD: ${JIBRI_XMPP_PASSWORD:-jibri_secret}
|
||||
JIBRI_RECORDER_USER: recorder
|
||||
JIBRI_RECORDER_PASSWORD: ${JIBRI_RECORDER_PASSWORD:-recorder_secret}
|
||||
LOG_LEVEL: ${XMPP_LOG_LEVEL:-warn}
|
||||
PUBLIC_URL: ${JITSI_PUBLIC_URL:-https://macmini:8443}
|
||||
ENABLE_AUTH: ${JITSI_ENABLE_AUTH:-false}
|
||||
ENABLE_GUESTS: "true"
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
breakpilot-network:
|
||||
aliases:
|
||||
- xmpp.meet.jitsi
|
||||
|
||||
jitsi-jicofo:
|
||||
image: jitsi/jicofo:stable-9823
|
||||
container_name: bp-lehrer-jitsi-jicofo
|
||||
volumes:
|
||||
- jitsi_jicofo_config:/config
|
||||
environment:
|
||||
XMPP_DOMAIN: ${XMPP_DOMAIN:-meet.jitsi}
|
||||
XMPP_AUTH_DOMAIN: ${XMPP_AUTH_DOMAIN:-auth.meet.jitsi}
|
||||
XMPP_MUC_DOMAIN: ${XMPP_MUC_DOMAIN:-muc.meet.jitsi}
|
||||
XMPP_INTERNAL_MUC_DOMAIN: ${XMPP_INTERNAL_MUC_DOMAIN:-internal-muc.meet.jitsi}
|
||||
XMPP_SERVER: jitsi-xmpp
|
||||
JICOFO_AUTH_USER: focus
|
||||
JICOFO_AUTH_PASSWORD: ${JICOFO_AUTH_PASSWORD:-jicofo_secret}
|
||||
TZ: ${TZ:-Europe/Berlin}
|
||||
ENABLE_AUTH: ${JITSI_ENABLE_AUTH:-false}
|
||||
AUTH_TYPE: internal
|
||||
ENABLE_AUTO_OWNER: "true"
|
||||
depends_on:
|
||||
- jitsi-xmpp
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- breakpilot-network
|
||||
|
||||
jitsi-jvb:
|
||||
image: jitsi/jvb:stable-9823
|
||||
container_name: bp-lehrer-jitsi-jvb
|
||||
ports:
|
||||
- "10000:10000/udp"
|
||||
- "8080:8080"
|
||||
volumes:
|
||||
- jitsi_jvb_config:/config
|
||||
environment:
|
||||
XMPP_DOMAIN: ${XMPP_DOMAIN:-meet.jitsi}
|
||||
XMPP_AUTH_DOMAIN: ${XMPP_AUTH_DOMAIN:-auth.meet.jitsi}
|
||||
XMPP_INTERNAL_MUC_DOMAIN: ${XMPP_INTERNAL_MUC_DOMAIN:-internal-muc.meet.jitsi}
|
||||
XMPP_SERVER: jitsi-xmpp
|
||||
JVB_AUTH_USER: jvb
|
||||
JVB_AUTH_PASSWORD: ${JVB_AUTH_PASSWORD:-jvb_secret}
|
||||
JVB_PORT: 10000
|
||||
JVB_STUN_SERVERS: ${JVB_STUN_SERVERS:-stun.l.google.com:19302}
|
||||
TZ: ${TZ:-Europe/Berlin}
|
||||
PUBLIC_URL: ${JITSI_PUBLIC_URL:-https://macmini:8443}
|
||||
COLIBRI_REST_ENABLED: "true"
|
||||
ENABLE_COLIBRI_WEBSOCKET: "true"
|
||||
depends_on:
|
||||
- jitsi-xmpp
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- breakpilot-network
|
||||
|
||||
# =========================================================
|
||||
# COMMUNICATION: Matrix/Synapse
|
||||
# =========================================================
|
||||
synapse-db:
|
||||
image: postgres:16-alpine
|
||||
container_name: bp-lehrer-synapse-db
|
||||
profiles: [chat]
|
||||
environment:
|
||||
POSTGRES_USER: synapse
|
||||
POSTGRES_PASSWORD: ${SYNAPSE_DB_PASSWORD:-synapse_secret}
|
||||
POSTGRES_DB: synapse
|
||||
POSTGRES_INITDB_ARGS: "--encoding=UTF-8 --lc-collate=C --lc-ctype=C"
|
||||
volumes:
|
||||
- synapse_db_data:/var/lib/postgresql/data
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U synapse"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- breakpilot-network
|
||||
|
||||
synapse:
|
||||
image: matrixdotorg/synapse:latest
|
||||
container_name: bp-lehrer-synapse
|
||||
profiles: [chat]
|
||||
ports:
|
||||
- "8008:8008"
|
||||
- "8448:8448"
|
||||
volumes:
|
||||
- synapse_data:/data
|
||||
environment:
|
||||
SYNAPSE_SERVER_NAME: ${SYNAPSE_SERVER_NAME:-macmini}
|
||||
SYNAPSE_REPORT_STATS: "no"
|
||||
SYNAPSE_NO_TLS: "true"
|
||||
SYNAPSE_ENABLE_REGISTRATION: ${SYNAPSE_ENABLE_REGISTRATION:-true}
|
||||
SYNAPSE_LOG_LEVEL: ${SYNAPSE_LOG_LEVEL:-WARNING}
|
||||
UID: "1000"
|
||||
GID: "1000"
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://127.0.0.1:8008/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
start_period: 30s
|
||||
retries: 3
|
||||
depends_on:
|
||||
synapse-db:
|
||||
condition: service_healthy
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- breakpilot-network
|
||||
|
||||
# =========================================================
|
||||
# EDU SEARCH
|
||||
# =========================================================
|
||||
|
||||
114
docs-src/services/klausur-service/Chunk-Browser.md
Normal file
114
docs-src/services/klausur-service/Chunk-Browser.md
Normal file
@@ -0,0 +1,114 @@
|
||||
# Chunk-Browser
|
||||
|
||||
## Uebersicht
|
||||
|
||||
Der Chunk-Browser ermoeglicht das sequenzielle Durchblaettern aller Chunks in einer Qdrant-Collection. Er ist als Tab "Chunk-Browser" auf der RAG-Seite (`/ai/rag`) verfuegbar.
|
||||
|
||||
**URL:** `https://macmini:3002/ai/rag` → Tab "Chunk-Browser"
|
||||
|
||||
---
|
||||
|
||||
## Funktionen
|
||||
|
||||
### Collection-Auswahl
|
||||
Dropdown mit allen verfuegbaren Compliance-Collections:
|
||||
|
||||
- `bp_compliance_gesetze`
|
||||
- `bp_compliance_ce`
|
||||
- `bp_compliance_datenschutz`
|
||||
- `bp_dsfa_corpus`
|
||||
- `bp_compliance_recht`
|
||||
- `bp_legal_templates`
|
||||
- `bp_compliance_gdpr`
|
||||
- `bp_compliance_schulrecht`
|
||||
- `bp_dsfa_templates`
|
||||
- `bp_dsfa_risks`
|
||||
|
||||
### Seitenweise Navigation
|
||||
- 20 Chunks pro Seite
|
||||
- Zurueck/Weiter-Buttons
|
||||
- Seitennummer und Chunk-Zaehler
|
||||
- Cursor-basierte Pagination via Qdrant Scroll API
|
||||
|
||||
### Textsuche
|
||||
- Filtert Chunks auf der aktuell geladenen Seite
|
||||
- Treffer werden gelb hervorgehoben
|
||||
- Suche ueber den Chunk-Text (payload.text, payload.content, payload.chunk_text)
|
||||
|
||||
### Chunk-Details
|
||||
- Klick auf einen Chunk klappt alle Metadaten aus
|
||||
- Zeigt: regulation_code, article, language, source, licence, etc.
|
||||
- Chunks haben eine fortlaufende Nummer (#1, #2, ...)
|
||||
|
||||
### Integration mit Regulierungen-Tab
|
||||
Der Button "In Chunks suchen" bei jeder Regulierung wechselt zum Chunk-Browser mit:
|
||||
- Vorauswahl der richtigen Collection
|
||||
- Vorausgefuelltem Suchbegriff (Regulierungsname)
|
||||
|
||||
---
|
||||
|
||||
## API
|
||||
|
||||
### Scroll-Endpoint (API Proxy)
|
||||
|
||||
```
|
||||
GET /api/legal-corpus?action=scroll&collection=bp_compliance_ce&limit=20&offset={cursor}
|
||||
```
|
||||
|
||||
**Parameter:**
|
||||
|
||||
| Parameter | Typ | Beschreibung |
|
||||
|-----------|-----|--------------|
|
||||
| `collection` | string | Qdrant Collection Name |
|
||||
| `limit` | number | Chunks pro Seite (max 100) |
|
||||
| `offset` | string | Cursor fuer naechste Seite (optional) |
|
||||
| `text_search` | string | Textsuche-Filter (optional) |
|
||||
|
||||
**Response:**
|
||||
```json
|
||||
{
|
||||
"chunks": [
|
||||
{
|
||||
"id": "uuid",
|
||||
"text": "...",
|
||||
"regulation_code": "GDPR",
|
||||
"article": "Art. 5",
|
||||
"language": "de"
|
||||
}
|
||||
],
|
||||
"next_offset": "uuid-or-null",
|
||||
"total_in_page": 20
|
||||
}
|
||||
```
|
||||
|
||||
### Collection-Count-Endpoint
|
||||
|
||||
```
|
||||
GET /api/legal-corpus?action=collection-count&collection=bp_compliance_ce
|
||||
```
|
||||
|
||||
**Response:**
|
||||
```json
|
||||
{
|
||||
"count": 12345
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Technische Details
|
||||
|
||||
- Der API-Proxy spricht direkt mit Qdrant (Port 6333) via dessen `POST /collections/{name}/points/scroll` Endpoint
|
||||
- Kein Embedding oder rag-service erforderlich
|
||||
- Textsuche ist client-seitig (kein Embedding noetig)
|
||||
- Pagination ist cursor-basiert (Qdrant `next_page_offset`)
|
||||
|
||||
---
|
||||
|
||||
## Weitere Features auf der RAG-Seite
|
||||
|
||||
### Originalquelle-Links
|
||||
Jede Regulierung in der Tabelle hat einen "Originalquelle" Link zum offiziellen Dokument (EUR-Lex, gesetze-im-internet.de, etc.). Definiert in `REGULATION_SOURCES` (88 Eintraege).
|
||||
|
||||
### Low-Chunk-Warnung
|
||||
Regulierungen mit weniger als 10 Chunks aber einem erwarteten Wert >= 10 werden mit einem Amber-Warnsymbol markiert. Dies hilft, fehlgeschlagene oder unvollstaendige Ingestions zu erkennen.
|
||||
1559
docs-src/services/klausur-service/OCR-Pipeline.md
Normal file
1559
docs-src/services/klausur-service/OCR-Pipeline.md
Normal file
File diff suppressed because it is too large
Load Diff
@@ -8,24 +8,15 @@ RUN npm install
|
||||
COPY frontend/ ./
|
||||
RUN npm run build
|
||||
|
||||
# Production stage
|
||||
FROM python:3.11-slim
|
||||
# Production stage — uses pre-built base with Tesseract + Python deps.
|
||||
# Base image contains: python:3.11-slim + tesseract-ocr + all pip packages.
|
||||
# Rebuild base only when requirements.txt or system deps change:
|
||||
# docker build -f klausur-service/Dockerfile.base -t klausur-base:latest klausur-service/
|
||||
FROM klausur-base:latest
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies (incl. Tesseract OCR for bounding-box extraction)
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
curl \
|
||||
tesseract-ocr \
|
||||
tesseract-ocr-deu \
|
||||
tesseract-ocr-eng \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Python dependencies
|
||||
COPY backend/requirements.txt ./
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy backend code
|
||||
# Copy backend code (this is the only layer that changes on code edits)
|
||||
COPY backend/ ./
|
||||
|
||||
# Copy built frontend to the expected path
|
||||
|
||||
27
klausur-service/Dockerfile.base
Normal file
27
klausur-service/Dockerfile.base
Normal file
@@ -0,0 +1,27 @@
|
||||
# Base image with system dependencies + Python packages.
|
||||
# These change rarely — build once, reuse on every --no-cache.
|
||||
#
|
||||
# Rebuild manually when requirements.txt or system deps change:
|
||||
# docker build -f klausur-service/Dockerfile.base -t klausur-base:latest klausur-service/
|
||||
#
|
||||
FROM python:3.11-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# System dependencies (Tesseract OCR, curl for healthcheck)
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
curl \
|
||||
tesseract-ocr \
|
||||
tesseract-ocr-deu \
|
||||
tesseract-ocr-eng \
|
||||
libgl1 \
|
||||
libglib2.0-0 \
|
||||
fonts-liberation \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Python dependencies
|
||||
COPY backend/requirements.txt ./
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Clean up pip cache
|
||||
RUN rm -rf /root/.cache/pip
|
||||
471
klausur-service/backend/cv_box_detect.py
Normal file
471
klausur-service/backend/cv_box_detect.py
Normal file
@@ -0,0 +1,471 @@
|
||||
"""
|
||||
Embedded box detection and page zone splitting for the CV vocabulary pipeline.
|
||||
|
||||
Detects boxes (grammar tips, exercises, etc.) that span the page width and
|
||||
interrupt the normal column layout. Splits the page into vertical zones so
|
||||
that column detection can run independently per zone.
|
||||
|
||||
Two-stage algorithm (both run, results merged):
|
||||
1. Morphological line detection — finds bordered boxes via horizontal lines.
|
||||
2. Background shading detection — finds shaded/colored boxes via median-blur
|
||||
background analysis. Works for colored (blue, green) and grayscale
|
||||
(gray shading on B/W scans) boxes.
|
||||
|
||||
Lizenz: Apache 2.0 (kommerziell nutzbar)
|
||||
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from cv_vocab_types import DetectedBox, PageZone
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
__all__ = [
|
||||
"detect_boxes",
|
||||
"split_page_into_zones",
|
||||
]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stage 1: Morphological line detection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _detect_boxes_by_lines(
|
||||
gray: np.ndarray,
|
||||
content_x: int,
|
||||
content_w: int,
|
||||
content_y: int,
|
||||
content_h: int,
|
||||
) -> List[DetectedBox]:
|
||||
"""Find boxes defined by pairs of long horizontal border lines.
|
||||
|
||||
Args:
|
||||
gray: Grayscale image (full page).
|
||||
content_x, content_w: Horizontal content bounds.
|
||||
content_y, content_h: Vertical content bounds.
|
||||
|
||||
Returns:
|
||||
List of DetectedBox for each detected bordered box.
|
||||
"""
|
||||
h, w = gray.shape[:2]
|
||||
|
||||
# Binarize: dark pixels → white on black background
|
||||
_, binary = cv2.threshold(gray, 180, 255, cv2.THRESH_BINARY_INV)
|
||||
|
||||
# Horizontal morphology kernel — at least 50% of content width
|
||||
kernel_w = max(50, content_w // 2)
|
||||
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_w, 1))
|
||||
lines_img = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
|
||||
|
||||
# Horizontal projection: count line pixels per row
|
||||
h_proj = np.sum(lines_img[:, content_x:content_x + content_w] > 0, axis=1)
|
||||
line_threshold = content_w * 0.30
|
||||
|
||||
# Group consecutive rows with enough line pixels into line segments
|
||||
line_segments: List[Tuple[int, int]] = [] # (y_start, y_end)
|
||||
seg_start: Optional[int] = None
|
||||
for y in range(h):
|
||||
if h_proj[y] >= line_threshold:
|
||||
if seg_start is None:
|
||||
seg_start = y
|
||||
else:
|
||||
if seg_start is not None:
|
||||
line_segments.append((seg_start, y))
|
||||
seg_start = None
|
||||
if seg_start is not None:
|
||||
line_segments.append((seg_start, h))
|
||||
|
||||
if len(line_segments) < 2:
|
||||
return []
|
||||
|
||||
# Pair lines into boxes: top-line + bottom-line
|
||||
# Minimum box height: 30px. Maximum: 70% of content height.
|
||||
min_box_h = 30
|
||||
max_box_h = int(content_h * 0.70)
|
||||
|
||||
boxes: List[DetectedBox] = []
|
||||
used = set()
|
||||
for i, (top_start, top_end) in enumerate(line_segments):
|
||||
if i in used:
|
||||
continue
|
||||
for j in range(i + 1, len(line_segments)):
|
||||
if j in used:
|
||||
continue
|
||||
bot_start, bot_end = line_segments[j]
|
||||
box_y = top_start
|
||||
box_h = bot_end - top_start
|
||||
if box_h < min_box_h or box_h > max_box_h:
|
||||
continue
|
||||
|
||||
# Estimate border thickness from line segment heights
|
||||
border_top = top_end - top_start
|
||||
border_bot = bot_end - bot_start
|
||||
|
||||
box = DetectedBox(
|
||||
x=content_x,
|
||||
y=box_y,
|
||||
width=content_w,
|
||||
height=box_h,
|
||||
confidence=0.8,
|
||||
border_thickness=max(border_top, border_bot),
|
||||
)
|
||||
boxes.append(box)
|
||||
used.add(i)
|
||||
used.add(j)
|
||||
break # move to next top-line candidate
|
||||
|
||||
return boxes
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stage 2: Background shading detection (color + grayscale)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _detect_boxes_by_shading(
|
||||
img_bgr: np.ndarray,
|
||||
content_x: int,
|
||||
content_w: int,
|
||||
content_y: int,
|
||||
content_h: int,
|
||||
) -> List[DetectedBox]:
|
||||
"""Find boxes with shaded/colored background (no visible border lines).
|
||||
|
||||
Uses heavy median blur to remove text and reveal the underlying background.
|
||||
Then detects rectangular regions where the background differs from white.
|
||||
Works for both colored boxes (blue, green) and grayscale shading (gray on
|
||||
B/W scans).
|
||||
|
||||
Args:
|
||||
img_bgr: BGR color image (full page).
|
||||
content_x, content_w: Horizontal content bounds.
|
||||
content_y, content_h: Vertical content bounds.
|
||||
|
||||
Returns:
|
||||
List of DetectedBox for each detected shaded box.
|
||||
"""
|
||||
h, w = img_bgr.shape[:2]
|
||||
|
||||
# --- Heavy median blur removes text strokes, keeps background ---
|
||||
blur_size = 31 # large kernel to wipe out text
|
||||
blurred = cv2.medianBlur(img_bgr, blur_size)
|
||||
blur_gray = cv2.cvtColor(blurred, cv2.COLOR_BGR2GRAY)
|
||||
blur_hsv = cv2.cvtColor(blurred, cv2.COLOR_BGR2HSV)
|
||||
|
||||
# Estimate page background from top-left / top-right corners
|
||||
corner_size = max(20, min(h // 10, w // 10))
|
||||
corners = np.concatenate([
|
||||
blur_gray[:corner_size, :corner_size].ravel(),
|
||||
blur_gray[:corner_size, -corner_size:].ravel(),
|
||||
])
|
||||
page_bg = float(np.median(corners))
|
||||
|
||||
# Two masks: grayscale shading + color saturation
|
||||
# Grayscale: regions noticeably darker than the page background
|
||||
shade_thresh = max(page_bg - 30, 150)
|
||||
gray_mask = (blur_gray < shade_thresh).astype(np.uint8) * 255
|
||||
|
||||
# Color: regions with noticeable saturation (blue/green/etc. boxes)
|
||||
sat_mask = (blur_hsv[:, :, 1] > 20).astype(np.uint8) * 255
|
||||
|
||||
combined = cv2.bitwise_or(gray_mask, sat_mask)
|
||||
|
||||
# Morphological cleanup: close gaps, remove small noise
|
||||
kernel_close = cv2.getStructuringElement(cv2.MORPH_RECT, (25, 10))
|
||||
combined = cv2.morphologyEx(combined, cv2.MORPH_CLOSE, kernel_close)
|
||||
kernel_open = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 5))
|
||||
combined = cv2.morphologyEx(combined, cv2.MORPH_OPEN, kernel_open)
|
||||
|
||||
contours, _ = cv2.findContours(combined, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
# Size thresholds: smaller boxes allowed (e.g. "German leihen" ~30% width)
|
||||
min_area = content_w * 30 # at least 30px tall at full width
|
||||
min_box_h = 25
|
||||
max_box_h = int(content_h * 0.70)
|
||||
min_width_ratio = 0.25 # boxes can be ~25% of content width
|
||||
|
||||
boxes: List[DetectedBox] = []
|
||||
for cnt in contours:
|
||||
area = cv2.contourArea(cnt)
|
||||
if area < min_area:
|
||||
continue
|
||||
|
||||
bx, by, bw, bh = cv2.boundingRect(cnt)
|
||||
|
||||
# Width filter
|
||||
if bw < content_w * min_width_ratio:
|
||||
continue
|
||||
|
||||
# Height filter
|
||||
if bh < min_box_h or bh > max_box_h:
|
||||
continue
|
||||
|
||||
# Rectangularity check: area / bounding-rect area > 0.6
|
||||
rect_area = bw * bh
|
||||
if rect_area > 0 and area / rect_area < 0.5:
|
||||
continue
|
||||
|
||||
# Verify that the background inside this region is actually shaded
|
||||
roi_gray = blur_gray[by:by + bh, bx:bx + bw]
|
||||
roi_hsv = blur_hsv[by:by + bh, bx:bx + bw]
|
||||
if roi_gray.size == 0:
|
||||
continue
|
||||
|
||||
median_val = float(np.median(roi_gray))
|
||||
median_sat = float(np.median(roi_hsv[:, :, 1]))
|
||||
|
||||
# Must be noticeably different from page background
|
||||
is_shaded = median_val < (page_bg - 15)
|
||||
is_colored = median_sat > 15
|
||||
|
||||
if not is_shaded and not is_colored:
|
||||
continue
|
||||
|
||||
conf = 0.7 if is_colored else 0.6
|
||||
|
||||
boxes.append(DetectedBox(
|
||||
x=bx,
|
||||
y=by,
|
||||
width=bw,
|
||||
height=bh,
|
||||
confidence=conf,
|
||||
border_thickness=0,
|
||||
))
|
||||
|
||||
return boxes
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Validation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _validate_box(
|
||||
box: DetectedBox,
|
||||
gray: np.ndarray,
|
||||
content_w: int,
|
||||
content_h: int,
|
||||
median_row_gap: int,
|
||||
) -> bool:
|
||||
"""Validate that a detected box is genuine (not a table-row separator etc.)."""
|
||||
# Must span > 25% of content width (lowered from 60% to allow smaller boxes)
|
||||
if box.width < content_w * 0.25:
|
||||
return False
|
||||
|
||||
# Height constraints
|
||||
if box.height < 25 or box.height > content_h * 0.70:
|
||||
return False
|
||||
|
||||
# Must not be confused with a table-row separator:
|
||||
# real boxes are at least 3x the median row gap
|
||||
if median_row_gap > 0 and box.height < median_row_gap * 3:
|
||||
return False
|
||||
|
||||
# Must contain some text (ink density check)
|
||||
h, w = gray.shape[:2]
|
||||
y1 = max(0, box.y)
|
||||
y2 = min(h, box.y + box.height)
|
||||
x1 = max(0, box.x)
|
||||
x2 = min(w, box.x + box.width)
|
||||
roi = gray[y1:y2, x1:x2]
|
||||
if roi.size == 0:
|
||||
return False
|
||||
ink_ratio = np.sum(roi < 128) / roi.size
|
||||
if ink_ratio < 0.002: # nearly empty → not a real content box
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API: detect_boxes
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _merge_overlapping_boxes(boxes: List[DetectedBox]) -> List[DetectedBox]:
|
||||
"""Merge boxes that overlap significantly (IoU > 0.3 or one contains the other).
|
||||
|
||||
When two boxes overlap, keep the one with higher confidence (or the larger
|
||||
one if confidences are equal).
|
||||
"""
|
||||
if len(boxes) <= 1:
|
||||
return boxes
|
||||
|
||||
# Sort by area descending so larger boxes are processed first
|
||||
boxes = sorted(boxes, key=lambda b: b.width * b.height, reverse=True)
|
||||
keep = [True] * len(boxes)
|
||||
|
||||
for i in range(len(boxes)):
|
||||
if not keep[i]:
|
||||
continue
|
||||
bi = boxes[i]
|
||||
for j in range(i + 1, len(boxes)):
|
||||
if not keep[j]:
|
||||
continue
|
||||
bj = boxes[j]
|
||||
|
||||
# Compute overlap
|
||||
x1 = max(bi.x, bj.x)
|
||||
y1 = max(bi.y, bj.y)
|
||||
x2 = min(bi.x + bi.width, bj.x + bj.width)
|
||||
y2 = min(bi.y + bi.height, bj.y + bj.height)
|
||||
|
||||
if x2 <= x1 or y2 <= y1:
|
||||
continue # no overlap
|
||||
|
||||
inter = (x2 - x1) * (y2 - y1)
|
||||
area_i = bi.width * bi.height
|
||||
area_j = bj.width * bj.height
|
||||
smaller_area = min(area_i, area_j)
|
||||
|
||||
# If overlap covers > 50% of the smaller box, merge (drop the weaker)
|
||||
if smaller_area > 0 and inter / smaller_area > 0.50:
|
||||
# Keep the one with higher confidence; if equal, keep larger
|
||||
if bj.confidence > bi.confidence:
|
||||
keep[i] = False
|
||||
break
|
||||
else:
|
||||
keep[j] = False
|
||||
|
||||
return [b for b, k in zip(boxes, keep) if k]
|
||||
|
||||
|
||||
def detect_boxes(
|
||||
img_bgr: np.ndarray,
|
||||
content_x: int,
|
||||
content_w: int,
|
||||
content_y: int,
|
||||
content_h: int,
|
||||
median_row_gap: int = 0,
|
||||
) -> List[DetectedBox]:
|
||||
"""Detect embedded boxes on a page image.
|
||||
|
||||
Runs BOTH line-based and shading-based detection, then merges and
|
||||
deduplicates results.
|
||||
|
||||
Args:
|
||||
img_bgr: BGR color image (full page or cropped).
|
||||
content_x, content_w: Horizontal content bounds.
|
||||
content_y, content_h: Vertical content bounds.
|
||||
median_row_gap: Median row gap height (for filtering out table separators).
|
||||
|
||||
Returns:
|
||||
List of validated DetectedBox instances, sorted by y position.
|
||||
"""
|
||||
gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
# Stage 1: Line-based detection (bordered boxes)
|
||||
line_boxes = _detect_boxes_by_lines(gray, content_x, content_w, content_y, content_h)
|
||||
|
||||
# Stage 2: Shading-based detection (colored/gray background boxes)
|
||||
shade_boxes = _detect_boxes_by_shading(img_bgr, content_x, content_w, content_y, content_h)
|
||||
|
||||
logger.debug("BoxDetect: %d line-based, %d shading-based candidates",
|
||||
len(line_boxes), len(shade_boxes))
|
||||
|
||||
# Combine and deduplicate
|
||||
all_boxes = line_boxes + shade_boxes
|
||||
merged = _merge_overlapping_boxes(all_boxes)
|
||||
|
||||
# Validate
|
||||
validated = [b for b in merged if _validate_box(b, gray, content_w, content_h, median_row_gap)]
|
||||
|
||||
# Sort top to bottom
|
||||
validated.sort(key=lambda b: b.y)
|
||||
|
||||
if validated:
|
||||
logger.info("BoxDetect: %d box(es) detected (line=%d, shade=%d, merged=%d)",
|
||||
len(validated), len(line_boxes), len(shade_boxes), len(merged))
|
||||
else:
|
||||
logger.debug("BoxDetect: no boxes detected")
|
||||
|
||||
return validated
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Zone Splitting
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def split_page_into_zones(
|
||||
content_x: int,
|
||||
content_y: int,
|
||||
content_w: int,
|
||||
content_h: int,
|
||||
boxes: List[DetectedBox],
|
||||
min_zone_height: int = 40,
|
||||
) -> List[PageZone]:
|
||||
"""Split a page into vertical zones based on detected boxes.
|
||||
|
||||
Regions above, between, and below boxes become 'content' zones;
|
||||
box regions become 'box' zones.
|
||||
|
||||
Args:
|
||||
content_x, content_y, content_w, content_h: Content area bounds.
|
||||
boxes: Detected boxes, sorted by y position.
|
||||
min_zone_height: Minimum height for a content zone to be kept.
|
||||
|
||||
Returns:
|
||||
List of PageZone, ordered top to bottom.
|
||||
"""
|
||||
if not boxes:
|
||||
# Single zone: entire content area
|
||||
return [PageZone(
|
||||
index=0,
|
||||
zone_type='content',
|
||||
y=content_y,
|
||||
height=content_h,
|
||||
x=content_x,
|
||||
width=content_w,
|
||||
)]
|
||||
|
||||
zones: List[PageZone] = []
|
||||
zone_idx = 0
|
||||
cursor_y = content_y
|
||||
content_bottom = content_y + content_h
|
||||
|
||||
for box in boxes:
|
||||
# Content zone above this box
|
||||
gap_above = box.y - cursor_y
|
||||
if gap_above >= min_zone_height:
|
||||
zones.append(PageZone(
|
||||
index=zone_idx,
|
||||
zone_type='content',
|
||||
y=cursor_y,
|
||||
height=gap_above,
|
||||
x=content_x,
|
||||
width=content_w,
|
||||
))
|
||||
zone_idx += 1
|
||||
|
||||
# Box zone
|
||||
zones.append(PageZone(
|
||||
index=zone_idx,
|
||||
zone_type='box',
|
||||
y=box.y,
|
||||
height=box.height,
|
||||
x=box.x,
|
||||
width=box.width,
|
||||
box=box,
|
||||
))
|
||||
zone_idx += 1
|
||||
|
||||
cursor_y = box.y + box.height
|
||||
|
||||
# Content zone below last box
|
||||
remaining = content_bottom - cursor_y
|
||||
if remaining >= min_zone_height:
|
||||
zones.append(PageZone(
|
||||
index=zone_idx,
|
||||
zone_type='content',
|
||||
y=cursor_y,
|
||||
height=remaining,
|
||||
x=content_x,
|
||||
width=content_w,
|
||||
))
|
||||
|
||||
logger.info(f"ZoneSplit: {len(zones)} zones from {len(boxes)} box(es): "
|
||||
f"{[z.zone_type for z in zones]}")
|
||||
|
||||
return zones
|
||||
1588
klausur-service/backend/cv_cell_grid.py
Normal file
1588
klausur-service/backend/cv_cell_grid.py
Normal file
File diff suppressed because it is too large
Load Diff
312
klausur-service/backend/cv_color_detect.py
Normal file
312
klausur-service/backend/cv_color_detect.py
Normal file
@@ -0,0 +1,312 @@
|
||||
"""
|
||||
Color detection for OCR word boxes.
|
||||
|
||||
Detects the text color of existing OCR words and recovers colored text
|
||||
regions (e.g. red markers, blue headings) that standard OCR may have missed.
|
||||
|
||||
Standard OCR (Tesseract, PaddleOCR) binarises images before processing,
|
||||
destroying all color information. This module adds it back by sampling
|
||||
HSV pixel values at word-box positions and finding colored regions that
|
||||
no word-box covers.
|
||||
|
||||
Lizenz: Apache 2.0 (kommerziell nutzbar)
|
||||
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# HSV color ranges (OpenCV: H 0-180, S 0-255, V 0-255)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_COLOR_RANGES: Dict[str, List[Tuple[np.ndarray, np.ndarray]]] = {
|
||||
"red": [
|
||||
(np.array([0, 70, 50]), np.array([10, 255, 255])),
|
||||
(np.array([170, 70, 50]), np.array([180, 255, 255])),
|
||||
],
|
||||
"orange": [
|
||||
(np.array([10, 70, 50]), np.array([25, 255, 255])),
|
||||
],
|
||||
"yellow": [
|
||||
(np.array([25, 70, 50]), np.array([35, 255, 255])),
|
||||
],
|
||||
"green": [
|
||||
(np.array([35, 70, 50]), np.array([85, 255, 255])),
|
||||
],
|
||||
"blue": [
|
||||
(np.array([100, 70, 50]), np.array([130, 255, 255])),
|
||||
],
|
||||
"purple": [
|
||||
(np.array([130, 70, 50]), np.array([170, 255, 255])),
|
||||
],
|
||||
}
|
||||
|
||||
_COLOR_HEX: Dict[str, str] = {
|
||||
"black": "#000000",
|
||||
"gray": "#6b7280",
|
||||
"red": "#dc2626",
|
||||
"orange": "#ea580c",
|
||||
"yellow": "#ca8a04",
|
||||
"green": "#16a34a",
|
||||
"blue": "#2563eb",
|
||||
"purple": "#9333ea",
|
||||
}
|
||||
|
||||
|
||||
def _hue_to_color_name(hue: float) -> str:
|
||||
"""Map OpenCV hue (0-180) to a color name."""
|
||||
if hue < 10 or hue > 170:
|
||||
return "red"
|
||||
if hue < 25:
|
||||
return "orange"
|
||||
if hue < 35:
|
||||
return "yellow"
|
||||
if hue < 85:
|
||||
return "green"
|
||||
if hue < 130:
|
||||
return "blue"
|
||||
return "purple"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 1. Color annotation for existing word boxes
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def detect_word_colors(
|
||||
img_bgr: np.ndarray,
|
||||
word_boxes: List[Dict],
|
||||
sat_threshold: int = 55,
|
||||
min_sat_ratio: float = 0.25,
|
||||
) -> None:
|
||||
"""Annotate each word_box in-place with its detected text color.
|
||||
|
||||
Adds ``color`` (hex string) and ``color_name`` (e.g. 'red', 'black')
|
||||
keys to each dict.
|
||||
|
||||
Algorithm per word:
|
||||
1. Crop the word region from the image.
|
||||
2. Otsu-threshold for text/background separation.
|
||||
3. Sample background color from border pixels of the crop.
|
||||
4. Remove text pixels that match the background (avoids colored
|
||||
backgrounds like blue boxes leaking into the result).
|
||||
5. Use **median** hue (robust to outliers) and require a minimum
|
||||
ratio of saturated pixels before classifying as colored.
|
||||
"""
|
||||
if img_bgr is None or not word_boxes:
|
||||
return
|
||||
|
||||
img_hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
|
||||
img_h, img_w = img_bgr.shape[:2]
|
||||
|
||||
colored_count = 0
|
||||
|
||||
for wb in word_boxes:
|
||||
x1 = max(0, int(wb["left"]))
|
||||
y1 = max(0, int(wb["top"]))
|
||||
x2 = min(img_w, int(wb["left"] + wb["width"]))
|
||||
y2 = min(img_h, int(wb["top"] + wb["height"]))
|
||||
|
||||
if x2 <= x1 or y2 <= y1:
|
||||
wb["color"] = _COLOR_HEX["black"]
|
||||
wb["color_name"] = "black"
|
||||
continue
|
||||
|
||||
crop_hsv = img_hsv[y1:y2, x1:x2]
|
||||
crop_bgr = img_bgr[y1:y2, x1:x2]
|
||||
crop_gray = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2GRAY)
|
||||
ch, cw = crop_hsv.shape[:2]
|
||||
|
||||
# --- Text mask: Otsu (adaptive) + high-saturation pixels ---
|
||||
_, dark_mask = cv2.threshold(
|
||||
crop_gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU,
|
||||
)
|
||||
sat_mask = (crop_hsv[:, :, 1] > sat_threshold).astype(np.uint8) * 255
|
||||
text_mask = cv2.bitwise_or(dark_mask, sat_mask)
|
||||
|
||||
text_pixels = crop_hsv[text_mask > 0]
|
||||
|
||||
if len(text_pixels) < 3:
|
||||
wb["color"] = _COLOR_HEX["black"]
|
||||
wb["color_name"] = "black"
|
||||
continue
|
||||
|
||||
# --- Background subtraction via border pixels ---
|
||||
# Sample background from the 2px border ring of the crop
|
||||
if ch > 6 and cw > 6:
|
||||
border = 2
|
||||
bg_top = crop_hsv[:border, :].reshape(-1, 3)
|
||||
bg_bot = crop_hsv[-border:, :].reshape(-1, 3)
|
||||
bg_lft = crop_hsv[border:-border, :border].reshape(-1, 3)
|
||||
bg_rgt = crop_hsv[border:-border, -border:].reshape(-1, 3)
|
||||
bg_pixels = np.vstack([bg_top, bg_bot, bg_lft, bg_rgt])
|
||||
|
||||
bg_med_h = float(np.median(bg_pixels[:, 0]))
|
||||
bg_med_s = float(np.median(bg_pixels[:, 1]))
|
||||
|
||||
# If background is tinted (S > 15), remove text pixels
|
||||
# with similar hue to avoid false colored detections
|
||||
if bg_med_s > 15:
|
||||
hue_diff = np.minimum(
|
||||
np.abs(text_pixels[:, 0].astype(float) - bg_med_h),
|
||||
180.0 - np.abs(text_pixels[:, 0].astype(float) - bg_med_h),
|
||||
)
|
||||
keep = hue_diff > 20
|
||||
if np.any(keep):
|
||||
text_pixels = text_pixels[keep]
|
||||
|
||||
if len(text_pixels) < 3:
|
||||
wb["color"] = _COLOR_HEX["black"]
|
||||
wb["color_name"] = "black"
|
||||
continue
|
||||
|
||||
# --- Classification using MEDIAN (robust to outliers) ---
|
||||
median_sat = float(np.median(text_pixels[:, 1]))
|
||||
sat_count = int(np.sum(text_pixels[:, 1] > sat_threshold))
|
||||
sat_ratio = sat_count / len(text_pixels)
|
||||
|
||||
if median_sat < sat_threshold or sat_ratio < min_sat_ratio:
|
||||
wb["color"] = _COLOR_HEX["black"]
|
||||
wb["color_name"] = "black"
|
||||
else:
|
||||
# Use median hue of saturated pixels only for cleaner signal
|
||||
sat_pixels = text_pixels[text_pixels[:, 1] > sat_threshold]
|
||||
median_hue = float(np.median(sat_pixels[:, 0]))
|
||||
name = _hue_to_color_name(median_hue)
|
||||
|
||||
# Red requires higher saturation — scanner artifacts on black
|
||||
# text often produce a slight warm tint (hue ~0) with low
|
||||
# saturation that would otherwise be misclassified as red.
|
||||
if name == "red" and median_sat < 90:
|
||||
wb["color"] = _COLOR_HEX["black"]
|
||||
wb["color_name"] = "black"
|
||||
continue
|
||||
|
||||
wb["color"] = _COLOR_HEX.get(name, _COLOR_HEX["black"])
|
||||
wb["color_name"] = name
|
||||
colored_count += 1
|
||||
|
||||
if colored_count:
|
||||
logger.info("color annotation: %d / %d words are colored",
|
||||
colored_count, len(word_boxes))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 2. Recover colored text that OCR missed
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def recover_colored_text(
|
||||
img_bgr: np.ndarray,
|
||||
existing_words: List[Dict],
|
||||
min_area: int = 40,
|
||||
max_regions: int = 60,
|
||||
) -> List[Dict]:
|
||||
"""Find colored text regions not covered by any existing word box.
|
||||
|
||||
Returns a list of recovered word dicts with ``color``, ``color_name``,
|
||||
and ``recovered=True`` fields. The ``text`` is set via a lightweight
|
||||
shape heuristic (e.g. ``!`` for tall narrow shapes) or ``?``.
|
||||
"""
|
||||
if img_bgr is None:
|
||||
return []
|
||||
|
||||
img_hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
|
||||
ih, iw = img_bgr.shape[:2]
|
||||
max_area = int(ih * iw * 0.005)
|
||||
|
||||
# --- Build occupancy mask from existing words (adaptive padding) ---
|
||||
# Pad word boxes generously to prevent colored-pixel artifacts in
|
||||
# narrow inter-word gaps from being recovered as false characters.
|
||||
heights = [wb["height"] for wb in existing_words if wb.get("height", 0) > 0]
|
||||
median_h = int(np.median(heights)) if heights else 20
|
||||
pad = max(8, int(median_h * 0.35))
|
||||
|
||||
occupied = np.zeros((ih, iw), dtype=np.uint8)
|
||||
for wb in existing_words:
|
||||
x1 = max(0, int(wb["left"]) - pad)
|
||||
y1 = max(0, int(wb["top"]) - pad)
|
||||
x2 = min(iw, int(wb["left"] + wb["width"]) + pad)
|
||||
y2 = min(ih, int(wb["top"] + wb["height"]) + pad)
|
||||
occupied[y1:y2, x1:x2] = 255
|
||||
|
||||
recovered: List[Dict] = []
|
||||
|
||||
for color_name, ranges in _COLOR_RANGES.items():
|
||||
# Create mask for this color
|
||||
mask = np.zeros((ih, iw), dtype=np.uint8)
|
||||
for lower, upper in ranges:
|
||||
mask = cv2.bitwise_or(mask, cv2.inRange(img_hsv, lower, upper))
|
||||
|
||||
# Remove pixels already covered by existing OCR words
|
||||
mask = cv2.bitwise_and(mask, cv2.bitwise_not(occupied))
|
||||
|
||||
# Morphological cleanup:
|
||||
# - Close with tall kernel to merge ! stroke + dot
|
||||
# - Open to remove noise specks
|
||||
kernel_close = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 8))
|
||||
mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel_close)
|
||||
kernel_open = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
|
||||
mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel_open)
|
||||
|
||||
contours, _ = cv2.findContours(
|
||||
mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE,
|
||||
)
|
||||
|
||||
candidates = []
|
||||
for cnt in contours:
|
||||
area = cv2.contourArea(cnt)
|
||||
if area < min_area or area > max_area:
|
||||
continue
|
||||
bx, by, bw, bh = cv2.boundingRect(cnt)
|
||||
if bh < 6:
|
||||
continue
|
||||
# Reject regions too wide to be single characters
|
||||
if bw > median_h * 4:
|
||||
continue
|
||||
candidates.append((area, bx, by, bw, bh))
|
||||
|
||||
# Keep largest first, limited count
|
||||
candidates.sort(key=lambda c: c[0], reverse=True)
|
||||
|
||||
for area, bx, by, bw, bh in candidates[:max_regions]:
|
||||
text = _identify_shape(bw, bh)
|
||||
recovered.append({
|
||||
"text": text,
|
||||
"left": bx,
|
||||
"top": by,
|
||||
"width": bw,
|
||||
"height": bh,
|
||||
"conf": 45,
|
||||
"color": _COLOR_HEX.get(color_name, "#000000"),
|
||||
"color_name": color_name,
|
||||
"recovered": True,
|
||||
})
|
||||
|
||||
if recovered:
|
||||
logger.info(
|
||||
"color recovery: %d colored regions found (%s)",
|
||||
len(recovered),
|
||||
", ".join(
|
||||
f"{c}: {sum(1 for r in recovered if r['color_name'] == c)}"
|
||||
for c in sorted({r["color_name"] for r in recovered})
|
||||
),
|
||||
)
|
||||
|
||||
return recovered
|
||||
|
||||
|
||||
def _identify_shape(w: int, h: int) -> str:
|
||||
"""Simple shape heuristic for common single-character text markers."""
|
||||
aspect = w / h if h > 0 else 1.0
|
||||
if aspect < 0.55 and h > 10:
|
||||
# Tall, narrow — likely exclamation mark
|
||||
return "!"
|
||||
if 0.6 < aspect < 1.5 and max(w, h) < 25:
|
||||
# Small, roughly square — bullet or dot
|
||||
return "•"
|
||||
return "?"
|
||||
367
klausur-service/backend/cv_graphic_detect.py
Normal file
367
klausur-service/backend/cv_graphic_detect.py
Normal file
@@ -0,0 +1,367 @@
|
||||
"""
|
||||
Graphical element detection for OCR pages.
|
||||
|
||||
Region-based approach:
|
||||
1. Build a color mask (saturation channel — black text is invisible).
|
||||
2. Dilate heavily to merge nearby colored pixels into regions.
|
||||
3. For each region, check overlap with OCR word boxes:
|
||||
- High word overlap → colored text (skip)
|
||||
- Low word overlap → colored graphic / image (keep)
|
||||
4. Separately detect large black-ink illustrations via ink mask.
|
||||
|
||||
Boxes and text colors are handled by cv_box_detect / cv_color_detect.
|
||||
|
||||
Lizenz: Apache 2.0 (kommerziell nutzbar)
|
||||
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
__all__ = ["detect_graphic_elements", "GraphicElement"]
|
||||
|
||||
|
||||
@dataclass
|
||||
class GraphicElement:
|
||||
"""A detected non-text graphical element."""
|
||||
x: int
|
||||
y: int
|
||||
width: int
|
||||
height: int
|
||||
area: int
|
||||
shape: str # image, illustration
|
||||
color_name: str # dominant color or 'black'
|
||||
color_hex: str
|
||||
confidence: float
|
||||
contour: Any = field(default=None, repr=False)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Color helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_COLOR_HEX = {
|
||||
"black": "#000000",
|
||||
"gray": "#6b7280",
|
||||
"red": "#dc2626",
|
||||
"orange": "#ea580c",
|
||||
"yellow": "#ca8a04",
|
||||
"green": "#16a34a",
|
||||
"blue": "#2563eb",
|
||||
"purple": "#9333ea",
|
||||
}
|
||||
|
||||
|
||||
def _dominant_color(hsv_roi: np.ndarray, sat_threshold: int = 40) -> tuple:
|
||||
"""Return (color_name, color_hex) for an HSV region."""
|
||||
if hsv_roi.size == 0:
|
||||
return "black", _COLOR_HEX["black"]
|
||||
|
||||
pixels = hsv_roi.reshape(-1, 3)
|
||||
sat = pixels[:, 1]
|
||||
sat_mask = sat > sat_threshold
|
||||
sat_ratio = np.sum(sat_mask) / len(pixels) if len(pixels) > 0 else 0
|
||||
|
||||
if sat_ratio < 0.15:
|
||||
return "black", _COLOR_HEX["black"]
|
||||
|
||||
sat_pixels = pixels[sat_mask]
|
||||
if len(sat_pixels) < 3:
|
||||
return "black", _COLOR_HEX["black"]
|
||||
|
||||
med_hue = float(np.median(sat_pixels[:, 0]))
|
||||
|
||||
if med_hue < 10 or med_hue > 170:
|
||||
name = "red"
|
||||
elif med_hue < 25:
|
||||
name = "orange"
|
||||
elif med_hue < 35:
|
||||
name = "yellow"
|
||||
elif med_hue < 85:
|
||||
name = "green"
|
||||
elif med_hue < 130:
|
||||
name = "blue"
|
||||
else:
|
||||
name = "purple"
|
||||
|
||||
return name, _COLOR_HEX.get(name, _COLOR_HEX["black"])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main detection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def detect_graphic_elements(
|
||||
img_bgr: np.ndarray,
|
||||
word_boxes: List[Dict],
|
||||
detected_boxes: Optional[List[Dict]] = None,
|
||||
max_elements: int = 50,
|
||||
) -> List[GraphicElement]:
|
||||
"""Find non-text graphical regions on the page.
|
||||
|
||||
Region-based: dilate color mask to form regions, then check word
|
||||
overlap to distinguish colored text from colored graphics.
|
||||
|
||||
Args:
|
||||
img_bgr: BGR color image.
|
||||
word_boxes: List of OCR word dicts with left/top/width/height.
|
||||
detected_boxes: Optional list of detected box dicts (x/y/w/h).
|
||||
max_elements: Maximum number of elements to return.
|
||||
|
||||
Returns:
|
||||
List of GraphicElement, sorted by area descending.
|
||||
"""
|
||||
if img_bgr is None:
|
||||
return []
|
||||
|
||||
h, w = img_bgr.shape[:2]
|
||||
|
||||
logger.debug("GraphicDetect: image %dx%d, %d word_boxes, %d detected_boxes",
|
||||
w, h, len(word_boxes), len(detected_boxes or []))
|
||||
|
||||
hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
|
||||
candidates: List[GraphicElement] = []
|
||||
|
||||
# --- Build word mask (for overlap checking) ---
|
||||
word_mask = np.zeros((h, w), dtype=np.uint8)
|
||||
for wb in word_boxes:
|
||||
x1 = max(0, int(wb.get("left", 0)))
|
||||
y1 = max(0, int(wb.get("top", 0)))
|
||||
x2 = min(w, int(wb.get("left", 0) + wb.get("width", 0)))
|
||||
y2 = min(h, int(wb.get("top", 0) + wb.get("height", 0)))
|
||||
word_mask[y1:y2, x1:x2] = 255
|
||||
|
||||
# =====================================================================
|
||||
# PASS 1 — COLORED IMAGE REGIONS
|
||||
# =====================================================================
|
||||
# Color mask: saturated pixels (black text has sat ≈ 0 → invisible)
|
||||
sat_mask = (hsv[:, :, 1] > 40).astype(np.uint8) * 255
|
||||
val_mask = (hsv[:, :, 2] < 240).astype(np.uint8) * 255
|
||||
color_pixels = cv2.bitwise_and(sat_mask, val_mask)
|
||||
|
||||
# Remove tiny speckle
|
||||
kernel_open = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
|
||||
color_pixels = cv2.morphologyEx(color_pixels, cv2.MORPH_OPEN, kernel_open)
|
||||
|
||||
# Count raw colored pixels before dilation (for density check later)
|
||||
color_pixel_raw = color_pixels.copy()
|
||||
|
||||
# Heavy dilation to merge nearby colored elements into regions.
|
||||
# A 25x25 kernel merges elements within ~12px of each other.
|
||||
kernel_dilate = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (25, 25))
|
||||
region_mask = cv2.dilate(color_pixels, kernel_dilate, iterations=1)
|
||||
|
||||
contours_regions, _ = cv2.findContours(
|
||||
region_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE,
|
||||
)
|
||||
logger.debug("GraphicDetect PASS1: %d color regions after dilation", len(contours_regions))
|
||||
|
||||
for cnt in contours_regions:
|
||||
bx, by, bw, bh = cv2.boundingRect(cnt)
|
||||
|
||||
# Skip tiny regions
|
||||
if bw < 15 or bh < 15:
|
||||
continue
|
||||
|
||||
# Skip page-spanning regions
|
||||
if bw > w * 0.5 or bh > h * 0.5:
|
||||
logger.debug("GraphicDetect PASS1 skip page-spanning (%d,%d) %dx%d", bx, by, bw, bh)
|
||||
continue
|
||||
|
||||
bbox_area = bw * bh
|
||||
|
||||
# Check: how much of this region's bounding box overlaps with words?
|
||||
roi_words = word_mask[by:by + bh, bx:bx + bw]
|
||||
word_pixel_count = int(np.sum(roi_words > 0))
|
||||
word_overlap = word_pixel_count / bbox_area if bbox_area > 0 else 0
|
||||
|
||||
# Check: how many OCR word centroids fall inside this region?
|
||||
# Colored text that OCR detected will have multiple centroids inside.
|
||||
# Actual images may have 0-1 spurious OCR artifacts.
|
||||
word_centroid_count = sum(
|
||||
1 for wb in word_boxes
|
||||
if (bx <= int(wb.get("left", 0) + wb.get("width", 0) / 2) <= bx + bw
|
||||
and by <= int(wb.get("top", 0) + wb.get("height", 0) / 2) <= by + bh)
|
||||
)
|
||||
|
||||
# Check: how many actual colored pixels are in this region?
|
||||
roi_color = color_pixel_raw[by:by + bh, bx:bx + bw]
|
||||
color_pixel_count = int(np.sum(roi_color > 0))
|
||||
|
||||
# Color pixel density (before any skip checks so we can log it)
|
||||
density = color_pixel_count / bbox_area if bbox_area > 0 else 0
|
||||
|
||||
# --- Skip heuristics for colored TEXT (not images) ---
|
||||
|
||||
# (a) High word-box pixel overlap → clearly text
|
||||
if word_overlap > 0.40:
|
||||
logger.info(
|
||||
"GraphicDetect PASS1 skip text-overlap (%d,%d) %dx%d "
|
||||
"overlap=%.0f%% centroids=%d",
|
||||
bx, by, bw, bh, word_overlap * 100, word_centroid_count,
|
||||
)
|
||||
continue
|
||||
|
||||
# (b) Multiple OCR words detected inside → colored text
|
||||
# (images rarely produce 2+ confident word detections)
|
||||
if word_centroid_count >= 2:
|
||||
logger.info(
|
||||
"GraphicDetect PASS1 skip multi-word (%d,%d) %dx%d "
|
||||
"centroids=%d overlap=%.0f%% density=%.0f%%",
|
||||
bx, by, bw, bh, word_centroid_count,
|
||||
word_overlap * 100, density * 100,
|
||||
)
|
||||
continue
|
||||
|
||||
# (c) Even 1 word + some pixel overlap → likely text
|
||||
if word_centroid_count >= 1 and word_overlap > 0.10:
|
||||
logger.info(
|
||||
"GraphicDetect PASS1 skip word+overlap (%d,%d) %dx%d "
|
||||
"centroids=%d overlap=%.0f%%",
|
||||
bx, by, bw, bh, word_centroid_count, word_overlap * 100,
|
||||
)
|
||||
continue
|
||||
|
||||
# Need a minimum number of colored pixels (not just dilated area)
|
||||
if color_pixel_count < 200:
|
||||
continue
|
||||
|
||||
# (d) Very low density → thin strokes, almost certainly text
|
||||
if density < 0.20:
|
||||
logger.info(
|
||||
"GraphicDetect PASS1 skip low-density (%d,%d) %dx%d "
|
||||
"density=%.0f%% (likely colored text)",
|
||||
bx, by, bw, bh, density * 100,
|
||||
)
|
||||
continue
|
||||
|
||||
# (e) Moderate density + small height → colored text line
|
||||
if density < 0.35 and bh < h * 0.05:
|
||||
logger.info(
|
||||
"GraphicDetect PASS1 skip text-height (%d,%d) %dx%d "
|
||||
"density=%.0f%% height=%.1f%%",
|
||||
bx, by, bw, bh, density * 100, 100.0 * bh / h,
|
||||
)
|
||||
continue
|
||||
|
||||
# Determine dominant color from the actual colored pixels
|
||||
roi_hsv = hsv[by:by + bh, bx:bx + bw]
|
||||
color_px_mask = roi_color > 0
|
||||
if np.sum(color_px_mask) > 0:
|
||||
masked_hsv = roi_hsv[color_px_mask]
|
||||
color_name, color_hex = _dominant_color(masked_hsv)
|
||||
else:
|
||||
color_name, color_hex = "black", _COLOR_HEX["black"]
|
||||
|
||||
# Confidence based on color density and low word overlap
|
||||
conf = min(0.95, 0.5 + density * 0.5)
|
||||
|
||||
logger.debug("GraphicDetect PASS1 accept (%d,%d) %dx%d px=%d density=%.0f%% overlap=%.0f%% %s",
|
||||
bx, by, bw, bh, color_pixel_count, density * 100, word_overlap * 100, color_name)
|
||||
candidates.append(GraphicElement(
|
||||
x=bx, y=by, width=bw, height=bh,
|
||||
area=color_pixel_count,
|
||||
shape="image",
|
||||
color_name=color_name, color_hex=color_hex,
|
||||
confidence=round(conf, 2), contour=cnt,
|
||||
))
|
||||
|
||||
# =====================================================================
|
||||
# PASS 2 — LARGE BLACK-INK ILLUSTRATIONS
|
||||
# =====================================================================
|
||||
gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
|
||||
_, dark_mask = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
|
||||
|
||||
# Exclude words and colored regions already found
|
||||
exclusion = np.zeros((h, w), dtype=np.uint8)
|
||||
word_pad = 5
|
||||
for wb in word_boxes:
|
||||
x1 = max(0, int(wb.get("left", 0)) - word_pad)
|
||||
y1 = max(0, int(wb.get("top", 0)) - word_pad)
|
||||
x2 = min(w, int(wb.get("left", 0) + wb.get("width", 0)) + word_pad)
|
||||
y2 = min(h, int(wb.get("top", 0) + wb.get("height", 0)) + word_pad)
|
||||
exclusion[y1:y2, x1:x2] = 255
|
||||
|
||||
if detected_boxes:
|
||||
for box in detected_boxes:
|
||||
bbx = int(box.get("x", 0))
|
||||
bby = int(box.get("y", 0))
|
||||
bbw = int(box.get("w", box.get("width", 0)))
|
||||
bbh = int(box.get("h", box.get("height", 0)))
|
||||
inset = 8
|
||||
x1 = max(0, bbx + inset)
|
||||
y1 = max(0, bby + inset)
|
||||
x2 = min(w, bbx + bbw - inset)
|
||||
y2 = min(h, bby + bbh - inset)
|
||||
if x2 > x1 and y2 > y1:
|
||||
exclusion[y1:y2, x1:x2] = 255
|
||||
|
||||
ink_only = cv2.bitwise_and(dark_mask, cv2.bitwise_not(exclusion))
|
||||
ink_only = cv2.bitwise_and(ink_only, cv2.bitwise_not(color_pixels))
|
||||
|
||||
contours_ink, _ = cv2.findContours(
|
||||
ink_only, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE,
|
||||
)
|
||||
logger.debug("GraphicDetect PASS2 ink: %d contours", len(contours_ink))
|
||||
|
||||
for cnt in contours_ink:
|
||||
area = cv2.contourArea(cnt)
|
||||
bx, by, bw, bh = cv2.boundingRect(cnt)
|
||||
|
||||
if area < 5000 or min(bw, bh) < 40:
|
||||
continue
|
||||
if bw > w * 0.8 or bh > h * 0.8:
|
||||
continue
|
||||
|
||||
logger.debug("GraphicDetect PASS2 accept (%d,%d) %dx%d area=%d",
|
||||
bx, by, bw, bh, int(area))
|
||||
candidates.append(GraphicElement(
|
||||
x=bx, y=by, width=bw, height=bh,
|
||||
area=int(area), shape="illustration",
|
||||
color_name="black", color_hex="#000000",
|
||||
confidence=0.5, contour=cnt,
|
||||
))
|
||||
|
||||
# =====================================================================
|
||||
# Deduplicate and return
|
||||
# =====================================================================
|
||||
candidates.sort(key=lambda g: g.area, reverse=True)
|
||||
|
||||
final: List[GraphicElement] = []
|
||||
for c in candidates:
|
||||
overlap = False
|
||||
for f in final:
|
||||
ix1 = max(c.x, f.x)
|
||||
iy1 = max(c.y, f.y)
|
||||
ix2 = min(c.x + c.width, f.x + f.width)
|
||||
iy2 = min(c.y + c.height, f.y + f.height)
|
||||
if ix2 > ix1 and iy2 > iy1:
|
||||
inter = (ix2 - ix1) * (iy2 - iy1)
|
||||
smaller = min(c.width * c.height, f.width * f.height)
|
||||
if smaller > 0 and inter / smaller > 0.5:
|
||||
overlap = True
|
||||
break
|
||||
if not overlap:
|
||||
final.append(c)
|
||||
|
||||
result = final[:max_elements]
|
||||
|
||||
if result:
|
||||
shape_counts: Dict[str, int] = {}
|
||||
for g in result:
|
||||
shape_counts[g.shape] = shape_counts.get(g.shape, 0) + 1
|
||||
logger.info(
|
||||
"GraphicDetect: %d elements found (%s)",
|
||||
len(result),
|
||||
", ".join(f"{s}: {c}" for s, c in sorted(shape_counts.items())),
|
||||
)
|
||||
else:
|
||||
logger.info("GraphicDetect: no graphic elements found")
|
||||
|
||||
return result
|
||||
3314
klausur-service/backend/cv_layout.py
Normal file
3314
klausur-service/backend/cv_layout.py
Normal file
File diff suppressed because it is too large
Load Diff
1996
klausur-service/backend/cv_ocr_engines.py
Normal file
1996
klausur-service/backend/cv_ocr_engines.py
Normal file
File diff suppressed because it is too large
Load Diff
1166
klausur-service/backend/cv_preprocessing.py
Normal file
1166
klausur-service/backend/cv_preprocessing.py
Normal file
File diff suppressed because it is too large
Load Diff
1159
klausur-service/backend/cv_review.py
Normal file
1159
klausur-service/backend/cv_review.py
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
183
klausur-service/backend/cv_vocab_types.py
Normal file
183
klausur-service/backend/cv_vocab_types.py
Normal file
@@ -0,0 +1,183 @@
|
||||
"""
|
||||
Shared types, constants, and availability guards for the CV vocabulary pipeline.
|
||||
|
||||
Lizenz: Apache 2.0 (kommerziell nutzbar)
|
||||
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re # noqa: F401 — re-exported for downstream modules
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import numpy as np # noqa: F401
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# --- Availability Guards ---
|
||||
|
||||
try:
|
||||
import cv2 # noqa: F401
|
||||
CV2_AVAILABLE = True
|
||||
except ImportError:
|
||||
cv2 = None # type: ignore[assignment]
|
||||
CV2_AVAILABLE = False
|
||||
logger.warning("OpenCV not available — CV pipeline disabled")
|
||||
|
||||
try:
|
||||
import pytesseract # noqa: F401
|
||||
from PIL import Image # noqa: F401
|
||||
TESSERACT_AVAILABLE = True
|
||||
except ImportError:
|
||||
pytesseract = None # type: ignore[assignment]
|
||||
Image = None # type: ignore[assignment,misc]
|
||||
TESSERACT_AVAILABLE = False
|
||||
logger.warning("pytesseract/Pillow not available — CV pipeline disabled")
|
||||
|
||||
CV_PIPELINE_AVAILABLE = CV2_AVAILABLE and TESSERACT_AVAILABLE
|
||||
|
||||
# --- IPA Dictionary ---
|
||||
|
||||
IPA_AVAILABLE = False
|
||||
_ipa_convert_american = None
|
||||
_britfone_dict: Dict[str, str] = {}
|
||||
|
||||
try:
|
||||
import eng_to_ipa as _eng_to_ipa
|
||||
_ipa_convert_american = _eng_to_ipa.convert
|
||||
IPA_AVAILABLE = True
|
||||
logger.info("eng_to_ipa available — American IPA lookup enabled")
|
||||
except ImportError:
|
||||
logger.info("eng_to_ipa not installed — American IPA disabled")
|
||||
|
||||
# Load Britfone dictionary (MIT license, ~15k British English IPA entries)
|
||||
_britfone_path = os.path.join(os.path.dirname(__file__), 'data', 'britfone_ipa.json')
|
||||
if os.path.exists(_britfone_path):
|
||||
try:
|
||||
with open(_britfone_path, 'r', encoding='utf-8') as f:
|
||||
_britfone_dict = json.load(f)
|
||||
IPA_AVAILABLE = True
|
||||
logger.info(f"Britfone loaded — {len(_britfone_dict)} British IPA entries")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load Britfone: {e}")
|
||||
else:
|
||||
logger.info("Britfone not found — British IPA disabled")
|
||||
|
||||
# --- Language Detection Constants ---
|
||||
|
||||
GERMAN_FUNCTION_WORDS = {'der', 'die', 'das', 'und', 'ist', 'ein', 'eine', 'nicht',
|
||||
'von', 'zu', 'mit', 'auf', 'fuer', 'den', 'dem', 'sich', 'auch', 'wird',
|
||||
'nach', 'bei', 'aus', 'wie', 'oder', 'wenn', 'noch', 'aber', 'hat', 'nur',
|
||||
'ueber', 'kann', 'als', 'ich', 'er', 'sie', 'es', 'wir', 'ihr', 'haben',
|
||||
'sein', 'werden', 'war', 'sind', 'muss', 'soll', 'dieser', 'diese', 'diesem'}
|
||||
|
||||
ENGLISH_FUNCTION_WORDS = {'the', 'a', 'an', 'is', 'are', 'was', 'were', 'to', 'of',
|
||||
'and', 'in', 'that', 'it', 'for', 'on', 'with', 'as', 'at', 'by', 'from',
|
||||
'or', 'but', 'not', 'be', 'have', 'has', 'had', 'do', 'does', 'did', 'will',
|
||||
'would', 'can', 'could', 'should', 'may', 'might', 'this', 'they', 'you', 'he',
|
||||
'she', 'we', 'my', 'your', 'his', 'her', 'its', 'our', 'their', 'which'}
|
||||
|
||||
|
||||
# --- Data Classes ---
|
||||
|
||||
@dataclass
|
||||
class PageRegion:
|
||||
"""A detected region on the page."""
|
||||
type: str # 'column_en', 'column_de', 'column_example', 'page_ref', 'column_marker', 'column_text', 'header', 'footer', 'margin_top', 'margin_bottom'
|
||||
x: int
|
||||
y: int
|
||||
width: int
|
||||
height: int
|
||||
classification_confidence: float = 1.0 # 0.0-1.0
|
||||
classification_method: str = "" # 'content', 'position_enhanced', 'position_fallback'
|
||||
|
||||
|
||||
@dataclass
|
||||
class ColumnGeometry:
|
||||
"""Geometrisch erkannte Spalte vor Typ-Klassifikation."""
|
||||
index: int # 0-basiert, links->rechts
|
||||
x: int
|
||||
y: int
|
||||
width: int
|
||||
height: int
|
||||
word_count: int
|
||||
words: List[Dict] # Wort-Dicts aus Tesseract (text, conf, left, top, ...)
|
||||
width_ratio: float # width / content_width (0.0-1.0)
|
||||
is_sub_column: bool = False # True if created by _detect_sub_columns() split
|
||||
|
||||
|
||||
@dataclass
|
||||
class RowGeometry:
|
||||
"""Geometrisch erkannte Zeile mit Kopf-/Fusszeilen-Klassifikation."""
|
||||
index: int # 0-basiert, oben→unten
|
||||
x: int # absolute left (= content left_x)
|
||||
y: int # absolute y start
|
||||
width: int # content width
|
||||
height: int # Zeilenhoehe in px
|
||||
word_count: int
|
||||
words: List[Dict]
|
||||
row_type: str = 'content' # 'content' | 'header' | 'footer'
|
||||
gap_before: int = 0 # Gap in px ueber dieser Zeile
|
||||
|
||||
|
||||
@dataclass
|
||||
class VocabRow:
|
||||
"""A single vocabulary entry assembled from multi-column OCR."""
|
||||
english: str = ""
|
||||
german: str = ""
|
||||
example: str = ""
|
||||
source_page: str = ""
|
||||
confidence: float = 0.0
|
||||
y_position: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class PipelineResult:
|
||||
"""Complete result of the CV pipeline."""
|
||||
vocabulary: List[Dict[str, Any]] = field(default_factory=list)
|
||||
word_count: int = 0
|
||||
columns_detected: int = 0
|
||||
duration_seconds: float = 0.0
|
||||
stages: Dict[str, float] = field(default_factory=dict)
|
||||
error: Optional[str] = None
|
||||
image_width: int = 0
|
||||
image_height: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class DocumentTypeResult:
|
||||
"""Result of automatic document type detection."""
|
||||
doc_type: str # 'vocab_table' | 'full_text' | 'generic_table'
|
||||
confidence: float # 0.0-1.0
|
||||
pipeline: str # 'cell_first' | 'full_page'
|
||||
skip_steps: List[str] = field(default_factory=list) # e.g. ['columns', 'rows']
|
||||
features: Dict[str, Any] = field(default_factory=dict) # debug info
|
||||
|
||||
|
||||
@dataclass
|
||||
class DetectedBox:
|
||||
"""An embedded box (e.g. grammar tip, exercise) detected on the page."""
|
||||
x: int # absolute pixel position
|
||||
y: int
|
||||
width: int
|
||||
height: int
|
||||
confidence: float # 0.0-1.0
|
||||
border_thickness: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class PageZone:
|
||||
"""A horizontal zone of the page — either normal content or a detected box."""
|
||||
index: int # 0-based, top to bottom
|
||||
zone_type: str # 'content' | 'box'
|
||||
y: int # absolute pixel y
|
||||
height: int
|
||||
x: int
|
||||
width: int
|
||||
box: Optional[DetectedBox] = None
|
||||
columns: List[ColumnGeometry] = field(default_factory=list)
|
||||
image_overlays: List[Dict] = field(default_factory=list)
|
||||
layout_hint: Optional[str] = None # 'left_of_vsplit', 'right_of_vsplit'
|
||||
vsplit_group: Optional[int] = None # group ID for side-by-side rendering
|
||||
355
klausur-service/backend/cv_words_first.py
Normal file
355
klausur-service/backend/cv_words_first.py
Normal file
@@ -0,0 +1,355 @@
|
||||
"""
|
||||
Words-First Grid Builder (Bottom-Up).
|
||||
|
||||
Builds a cell grid from Tesseract word_boxes directly, without requiring
|
||||
pre-detected columns or rows. Algorithm:
|
||||
|
||||
1. Cluster words into columns by X-gap analysis
|
||||
2. Cluster words into rows by Y-proximity
|
||||
3. Build cells at (column, row) intersections
|
||||
|
||||
Returns the same (cells, columns_meta) format as build_cell_grid_v2().
|
||||
|
||||
Lizenz: Apache 2.0 (kommerziell nutzbar)
|
||||
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
import statistics
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from cv_ocr_engines import (
|
||||
_group_words_into_lines,
|
||||
_words_to_reading_order_text,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 1. Column clustering
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _cluster_columns(
|
||||
words: List[Dict],
|
||||
img_w: int,
|
||||
min_gap_pct: float = 3.0,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Cluster words into columns by finding large horizontal gaps.
|
||||
|
||||
Returns a list of column dicts:
|
||||
[{'index': 0, 'type': 'column_1', 'x_min': ..., 'x_max': ...}, ...]
|
||||
sorted left-to-right.
|
||||
"""
|
||||
if not words:
|
||||
return []
|
||||
|
||||
# Sort by X center
|
||||
sorted_w = sorted(words, key=lambda w: w['left'] + w['width'] / 2)
|
||||
|
||||
# Collect word heights to compute adaptive threshold
|
||||
heights = [w['height'] for w in sorted_w if w.get('height', 0) > 0]
|
||||
median_h = statistics.median(heights) if heights else 30
|
||||
|
||||
# Adaptive gap threshold: 3× median word height, but at least min_gap_pct of image width
|
||||
min_gap_px = max(median_h * 3, img_w * min_gap_pct / 100) if img_w > 0 else median_h * 3
|
||||
|
||||
# Find X-gap boundaries between consecutive words (sorted by X-center)
|
||||
# For each word, compute right edge; for next word, compute left edge
|
||||
boundaries: List[float] = [] # X positions where columns split
|
||||
for i in range(len(sorted_w) - 1):
|
||||
right_edge = sorted_w[i]['left'] + sorted_w[i]['width']
|
||||
left_edge = sorted_w[i + 1]['left']
|
||||
gap = left_edge - right_edge
|
||||
if gap > min_gap_px:
|
||||
# Split point is midway through the gap
|
||||
boundaries.append((right_edge + left_edge) / 2)
|
||||
|
||||
# Build column ranges from boundaries
|
||||
# Column ranges: (-inf, boundary[0]), (boundary[0], boundary[1]), ..., (boundary[-1], +inf)
|
||||
col_edges = [0.0] + boundaries + [float(img_w)]
|
||||
columns = []
|
||||
for ci in range(len(col_edges) - 1):
|
||||
columns.append({
|
||||
'index': ci,
|
||||
'type': f'column_{ci + 1}' if len(col_edges) > 2 else 'column_text',
|
||||
'x_min': col_edges[ci],
|
||||
'x_max': col_edges[ci + 1],
|
||||
})
|
||||
|
||||
return columns
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 2. Row clustering
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _cluster_rows(
|
||||
words: List[Dict],
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Cluster words into visual rows by Y-proximity.
|
||||
|
||||
Uses half the median word height as Y-tolerance.
|
||||
|
||||
Returns a list of row dicts:
|
||||
[{'index': 0, 'y_min': ..., 'y_max': ..., 'y_center': ...}, ...]
|
||||
sorted top-to-bottom.
|
||||
"""
|
||||
if not words:
|
||||
return []
|
||||
|
||||
heights = [w['height'] for w in words if w.get('height', 0) > 0]
|
||||
median_h = statistics.median(heights) if heights else 20
|
||||
y_tol = max(median_h * 0.5, 5)
|
||||
|
||||
lines = _group_words_into_lines(words, y_tolerance_px=int(y_tol))
|
||||
|
||||
rows = []
|
||||
for ri, line_words in enumerate(lines):
|
||||
y_min = min(w['top'] for w in line_words)
|
||||
y_max = max(w['top'] + w['height'] for w in line_words)
|
||||
rows.append({
|
||||
'index': ri,
|
||||
'y_min': y_min,
|
||||
'y_max': y_max,
|
||||
'y_center': (y_min + y_max) / 2,
|
||||
})
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 3. Build cells
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _assign_word_to_column(word: Dict, columns: List[Dict]) -> int:
|
||||
"""Return column index for a word based on its X-center."""
|
||||
x_center = word['left'] + word['width'] / 2
|
||||
for col in columns:
|
||||
if col['x_min'] <= x_center < col['x_max']:
|
||||
return col['index']
|
||||
# Fallback: nearest column
|
||||
return min(columns, key=lambda c: abs((c['x_min'] + c['x_max']) / 2 - x_center))['index']
|
||||
|
||||
|
||||
def _assign_word_to_row(word: Dict, rows: List[Dict]) -> int:
|
||||
"""Return row index for a word based on its Y-center.
|
||||
|
||||
When rows overlap (e.g. due to tall border-ghost characters inflating
|
||||
a row's y_max), prefer the row whose y_center is closest.
|
||||
"""
|
||||
y_center = word['top'] + word['height'] / 2
|
||||
# Find all rows whose y_range contains this word's center
|
||||
matching = [r for r in rows if r['y_min'] <= y_center <= r['y_max']]
|
||||
if matching:
|
||||
return min(matching, key=lambda r: abs(r['y_center'] - y_center))['index']
|
||||
# Fallback: nearest row by Y-center
|
||||
return min(rows, key=lambda r: abs(r['y_center'] - y_center))['index']
|
||||
|
||||
|
||||
def _build_cells(
|
||||
words: List[Dict],
|
||||
columns: List[Dict],
|
||||
rows: List[Dict],
|
||||
img_w: int,
|
||||
img_h: int,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Build cell dicts from word assignments to (column, row) pairs."""
|
||||
if not columns or not rows:
|
||||
return []
|
||||
|
||||
# Bucket words into (col_idx, row_idx)
|
||||
buckets: Dict[Tuple[int, int], List[Dict]] = {}
|
||||
for w in words:
|
||||
ci = _assign_word_to_column(w, columns)
|
||||
ri = _assign_word_to_row(w, rows)
|
||||
buckets.setdefault((ci, ri), []).append(w)
|
||||
|
||||
cells = []
|
||||
for (ci, ri), cell_words in sorted(buckets.items(), key=lambda kv: (kv[0][1], kv[0][0])):
|
||||
col = columns[ci]
|
||||
row = rows[ri]
|
||||
|
||||
# Compute tight bbox from actual word positions
|
||||
x_min = min(w['left'] for w in cell_words)
|
||||
y_min = min(w['top'] for w in cell_words)
|
||||
x_max = max(w['left'] + w['width'] for w in cell_words)
|
||||
y_max = max(w['top'] + w['height'] for w in cell_words)
|
||||
bw = x_max - x_min
|
||||
bh = y_max - y_min
|
||||
|
||||
# Text from words in reading order
|
||||
text = _words_to_reading_order_text(cell_words, y_tolerance_px=max(10, int(bh * 0.4)))
|
||||
|
||||
# Average confidence
|
||||
confs = [w.get('conf', 0) for w in cell_words if w.get('conf', 0) > 0]
|
||||
avg_conf = sum(confs) / len(confs) if confs else 0.0
|
||||
|
||||
# Word boxes with absolute pixel coordinates (consistent with cv_cell_grid.py).
|
||||
# PaddleOCR returns phrase-level boxes (e.g. "competition [kompa'tifn]"),
|
||||
# but the overlay slide mechanism expects one box per word. Split multi-word
|
||||
# boxes into individual word positions proportional to character length.
|
||||
# Also split at "[" boundaries (IPA patterns like "badge[bxd3]").
|
||||
#
|
||||
# Sort in reading order: group by Y (same visual line), then sort by X.
|
||||
# Simple (top, left) sort fails when words on the same line have slightly
|
||||
# different top values (1-6px), causing wrong word order.
|
||||
y_tol_wb = max(10, int(bh * 0.4))
|
||||
reading_lines = _group_words_into_lines(cell_words, y_tolerance_px=y_tol_wb)
|
||||
ordered_cell_words = [w for line in reading_lines for w in line]
|
||||
|
||||
word_boxes = []
|
||||
for w in ordered_cell_words:
|
||||
raw_text = w.get('text', '').strip()
|
||||
# Split by whitespace, at "[" boundaries (IPA), and after leading "!"
|
||||
# e.g. "badge[bxd3]" → ["badge", "[bxd3]"]
|
||||
# e.g. "profit['proft]" → ["profit", "['proft]"]
|
||||
# e.g. "!Betonung" → ["!", "Betonung"]
|
||||
tokens = re.split(r'\s+|(?=\[)|(?<=!)(?=[A-Za-z\u00c0-\u024f])', raw_text)
|
||||
tokens = [t for t in tokens if t] # remove empty strings
|
||||
if len(tokens) <= 1:
|
||||
# Single word — keep as-is
|
||||
word_boxes.append({
|
||||
'text': raw_text,
|
||||
'left': w['left'],
|
||||
'top': w['top'],
|
||||
'width': w['width'],
|
||||
'height': w['height'],
|
||||
'conf': w.get('conf', 0),
|
||||
})
|
||||
else:
|
||||
# Multi-word phrase — split proportionally by character count
|
||||
total_chars = sum(len(t) for t in tokens)
|
||||
if total_chars == 0:
|
||||
continue
|
||||
# Small gap between words (2% of box width per gap)
|
||||
n_gaps = len(tokens) - 1
|
||||
gap_px = w['width'] * 0.02
|
||||
usable_w = w['width'] - gap_px * n_gaps
|
||||
cursor = w['left']
|
||||
for t in tokens:
|
||||
token_w = max(1, usable_w * len(t) / total_chars)
|
||||
word_boxes.append({
|
||||
'text': t,
|
||||
'left': round(cursor),
|
||||
'top': w['top'],
|
||||
'width': round(token_w),
|
||||
'height': w['height'],
|
||||
'conf': w.get('conf', 0),
|
||||
})
|
||||
cursor += token_w + gap_px
|
||||
|
||||
cells.append({
|
||||
'cell_id': f"R{ri:02d}_C{ci}",
|
||||
'row_index': ri,
|
||||
'col_index': ci,
|
||||
'col_type': col['type'],
|
||||
'text': text,
|
||||
'confidence': round(avg_conf, 1),
|
||||
'bbox_px': {'x': x_min, 'y': y_min, 'w': bw, 'h': bh},
|
||||
'bbox_pct': {
|
||||
'x': round(x_min / img_w * 100, 2) if img_w else 0,
|
||||
'y': round(y_min / img_h * 100, 2) if img_h else 0,
|
||||
'w': round(bw / img_w * 100, 2) if img_w else 0,
|
||||
'h': round(bh / img_h * 100, 2) if img_h else 0,
|
||||
},
|
||||
'word_boxes': word_boxes,
|
||||
'ocr_engine': 'words_first',
|
||||
'is_bold': False,
|
||||
})
|
||||
|
||||
return cells
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 4. Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def build_grid_from_words(
|
||||
word_dicts: List[Dict],
|
||||
img_w: int,
|
||||
img_h: int,
|
||||
min_confidence: int = 30,
|
||||
box_rects: Optional[List[Dict]] = None,
|
||||
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
|
||||
"""Build a cell grid bottom-up from Tesseract word boxes.
|
||||
|
||||
Args:
|
||||
word_dicts: Flat list of word dicts with keys:
|
||||
text, left, top, width, height, conf
|
||||
(absolute pixel coordinates).
|
||||
img_w: Image width in pixels.
|
||||
img_h: Image height in pixels.
|
||||
min_confidence: Minimum OCR confidence to keep a word.
|
||||
box_rects: Optional list of box dicts with keys x, y, width, height.
|
||||
Words inside these boxes are excluded from column clustering
|
||||
(box-internal columns are detected separately in sub-sessions).
|
||||
|
||||
Returns:
|
||||
(cells, columns_meta) — same format as build_cell_grid_v2().
|
||||
cells: list of cell dicts with cell_id, bbox_px, bbox_pct, etc.
|
||||
columns_meta: list of {'index', 'type', 'x', 'width'} dicts.
|
||||
"""
|
||||
if not word_dicts:
|
||||
logger.info("build_grid_from_words: no words — returning empty grid")
|
||||
return [], []
|
||||
|
||||
# Filter by confidence
|
||||
words = [
|
||||
w for w in word_dicts
|
||||
if w.get('conf', 0) >= min_confidence and w.get('text', '').strip()
|
||||
]
|
||||
if not words:
|
||||
logger.info("build_grid_from_words: all words filtered (conf < %d)", min_confidence)
|
||||
return [], []
|
||||
|
||||
logger.info("build_grid_from_words: %d words (after confidence filter from %d)", len(words), len(word_dicts))
|
||||
|
||||
# Exclude words inside detected boxes — box columns are detected separately
|
||||
if box_rects:
|
||||
content_words = []
|
||||
for w in words:
|
||||
w_cx = w['left'] + w['width'] / 2
|
||||
w_cy = w['top'] + w['height'] / 2
|
||||
inside = any(
|
||||
b['x'] <= w_cx <= b['x'] + b['width']
|
||||
and b['y'] <= w_cy <= b['y'] + b['height']
|
||||
for b in box_rects
|
||||
)
|
||||
if not inside:
|
||||
content_words.append(w)
|
||||
excluded = len(words) - len(content_words)
|
||||
if excluded:
|
||||
logger.info("build_grid_from_words: excluded %d words inside %d box(es)",
|
||||
excluded, len(box_rects))
|
||||
words = content_words
|
||||
if not words:
|
||||
logger.info("build_grid_from_words: all words inside boxes — returning empty grid")
|
||||
return [], []
|
||||
|
||||
# Step 1: cluster columns
|
||||
columns = _cluster_columns(words, img_w)
|
||||
logger.info("build_grid_from_words: %d column(s) detected", len(columns))
|
||||
|
||||
# Step 2: cluster rows
|
||||
rows = _cluster_rows(words)
|
||||
logger.info("build_grid_from_words: %d row(s) detected", len(rows))
|
||||
|
||||
# Step 3: build cells
|
||||
cells = _build_cells(words, columns, rows, img_w, img_h)
|
||||
logger.info("build_grid_from_words: %d cells built", len(cells))
|
||||
|
||||
# Build columns_meta in same format as build_cell_grid_v2
|
||||
columns_meta = []
|
||||
for col in columns:
|
||||
x = int(col['x_min'])
|
||||
w = int(col['x_max'] - col['x_min'])
|
||||
columns_meta.append({
|
||||
'index': col['index'],
|
||||
'type': col['type'],
|
||||
'x': x,
|
||||
'width': w,
|
||||
})
|
||||
|
||||
return cells, columns_meta
|
||||
1
klausur-service/backend/data/britfone_ipa.json
Normal file
1
klausur-service/backend/data/britfone_ipa.json
Normal file
File diff suppressed because one or more lines are too long
2492
klausur-service/backend/grid_editor_api.py
Normal file
2492
klausur-service/backend/grid_editor_api.py
Normal file
File diff suppressed because it is too large
Load Diff
276
klausur-service/backend/handwriting_htr_api.py
Normal file
276
klausur-service/backend/handwriting_htr_api.py
Normal file
@@ -0,0 +1,276 @@
|
||||
"""
|
||||
Handwriting HTR API - Hochwertige Handschriftenerkennung (HTR) fuer Klausurkorrekturen.
|
||||
|
||||
Endpoints:
|
||||
- POST /api/v1/htr/recognize - Bild hochladen → handgeschriebener Text
|
||||
- POST /api/v1/htr/recognize-session - OCR-Pipeline Session als Quelle nutzen
|
||||
|
||||
Modell-Strategie:
|
||||
1. qwen2.5vl:32b via Ollama (primaer, hoechste Qualitaet als VLM)
|
||||
2. microsoft/trocr-large-handwritten (Fallback, offline, kein Ollama)
|
||||
|
||||
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal auf dem Mac Mini.
|
||||
"""
|
||||
|
||||
import io
|
||||
import os
|
||||
import logging
|
||||
import time
|
||||
import base64
|
||||
from typing import Optional
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from fastapi import APIRouter, HTTPException, Query, UploadFile, File
|
||||
from pydantic import BaseModel
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/api/v1/htr", tags=["HTR"])
|
||||
|
||||
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://host.docker.internal:11434")
|
||||
OLLAMA_HTR_MODEL = os.getenv("OLLAMA_HTR_MODEL", "qwen2.5vl:32b")
|
||||
HTR_FALLBACK_MODEL = os.getenv("HTR_FALLBACK_MODEL", "trocr-large")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pydantic Models
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class HTRSessionRequest(BaseModel):
|
||||
session_id: str
|
||||
model: str = "auto" # "auto" | "qwen2.5vl" | "trocr-large"
|
||||
use_clean: bool = True # Prefer clean_png (after handwriting removal)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Preprocessing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _preprocess_for_htr(img_bgr: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
CLAHE contrast enhancement + upscale to improve HTR accuracy.
|
||||
Returns grayscale enhanced image.
|
||||
"""
|
||||
gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
|
||||
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
||||
enhanced = clahe.apply(gray)
|
||||
|
||||
# Upscale if image is too small
|
||||
h, w = enhanced.shape
|
||||
if min(h, w) < 800:
|
||||
scale = 800 / min(h, w)
|
||||
enhanced = cv2.resize(
|
||||
enhanced, None, fx=scale, fy=scale,
|
||||
interpolation=cv2.INTER_CUBIC
|
||||
)
|
||||
|
||||
return enhanced
|
||||
|
||||
|
||||
def _bgr_to_png_bytes(img_bgr: np.ndarray) -> bytes:
|
||||
"""Convert BGR ndarray to PNG bytes."""
|
||||
success, buf = cv2.imencode(".png", img_bgr)
|
||||
if not success:
|
||||
raise RuntimeError("Failed to encode image to PNG")
|
||||
return buf.tobytes()
|
||||
|
||||
|
||||
def _preprocess_image_bytes(image_bytes: bytes) -> bytes:
|
||||
"""Load image, apply HTR preprocessing, return PNG bytes."""
|
||||
arr = np.frombuffer(image_bytes, dtype=np.uint8)
|
||||
img_bgr = cv2.imdecode(arr, cv2.IMREAD_COLOR)
|
||||
if img_bgr is None:
|
||||
raise ValueError("Could not decode image")
|
||||
|
||||
enhanced = _preprocess_for_htr(img_bgr)
|
||||
# Convert grayscale back to BGR for encoding
|
||||
enhanced_bgr = cv2.cvtColor(enhanced, cv2.COLOR_GRAY2BGR)
|
||||
return _bgr_to_png_bytes(enhanced_bgr)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Backend: Ollama qwen2.5vl
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def _recognize_with_qwen_vl(image_bytes: bytes, language: str) -> Optional[str]:
|
||||
"""
|
||||
Send image to Ollama qwen2.5vl:32b for HTR.
|
||||
Returns extracted text or None on error.
|
||||
"""
|
||||
import httpx
|
||||
|
||||
lang_hint = {
|
||||
"de": "Deutsch",
|
||||
"en": "Englisch",
|
||||
"de+en": "Deutsch und Englisch",
|
||||
}.get(language, "Deutsch")
|
||||
|
||||
prompt = (
|
||||
f"Du bist ein OCR-Experte fuer handgeschriebenen Text auf {lang_hint}. "
|
||||
"Lies den Text im Bild exakt ab — korrigiere KEINE Rechtschreibfehler. "
|
||||
"Antworte NUR mit dem erkannten Text, ohne Erklaerungen."
|
||||
)
|
||||
|
||||
img_b64 = base64.b64encode(image_bytes).decode("utf-8")
|
||||
|
||||
payload = {
|
||||
"model": OLLAMA_HTR_MODEL,
|
||||
"prompt": prompt,
|
||||
"images": [img_b64],
|
||||
"stream": False,
|
||||
}
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||
resp = await client.post(f"{OLLAMA_BASE_URL}/api/generate", json=payload)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
return data.get("response", "").strip()
|
||||
except Exception as e:
|
||||
logger.warning(f"Ollama qwen2.5vl HTR failed: {e}")
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Backend: TrOCR-large fallback
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def _recognize_with_trocr_large(image_bytes: bytes) -> Optional[str]:
|
||||
"""
|
||||
Use microsoft/trocr-large-handwritten via trocr_service.py.
|
||||
Returns extracted text or None on error.
|
||||
"""
|
||||
try:
|
||||
from services.trocr_service import run_trocr_ocr, _check_trocr_available
|
||||
if not _check_trocr_available():
|
||||
logger.warning("TrOCR not available for HTR fallback")
|
||||
return None
|
||||
|
||||
text, confidence = await run_trocr_ocr(image_bytes, handwritten=True, size="large")
|
||||
return text.strip() if text else None
|
||||
except Exception as e:
|
||||
logger.warning(f"TrOCR-large HTR failed: {e}")
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Core recognition logic
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def _do_recognize(
|
||||
image_bytes: bytes,
|
||||
model: str = "auto",
|
||||
preprocess: bool = True,
|
||||
language: str = "de",
|
||||
) -> dict:
|
||||
"""
|
||||
Core HTR logic: preprocess → try Ollama → fallback to TrOCR-large.
|
||||
Returns dict with text, model_used, processing_time_ms.
|
||||
"""
|
||||
t0 = time.monotonic()
|
||||
|
||||
if preprocess:
|
||||
try:
|
||||
image_bytes = _preprocess_image_bytes(image_bytes)
|
||||
except Exception as e:
|
||||
logger.warning(f"HTR preprocessing failed, using raw image: {e}")
|
||||
|
||||
text: Optional[str] = None
|
||||
model_used: str = "none"
|
||||
|
||||
use_qwen = model in ("auto", "qwen2.5vl")
|
||||
use_trocr = model in ("auto", "trocr-large") or (use_qwen and text is None)
|
||||
|
||||
if use_qwen:
|
||||
text = await _recognize_with_qwen_vl(image_bytes, language)
|
||||
if text is not None:
|
||||
model_used = f"qwen2.5vl ({OLLAMA_HTR_MODEL})"
|
||||
|
||||
if text is None and (use_trocr or model == "trocr-large"):
|
||||
text = await _recognize_with_trocr_large(image_bytes)
|
||||
if text is not None:
|
||||
model_used = "trocr-large-handwritten"
|
||||
|
||||
if text is None:
|
||||
text = ""
|
||||
model_used = "none (all backends failed)"
|
||||
|
||||
elapsed_ms = int((time.monotonic() - t0) * 1000)
|
||||
|
||||
return {
|
||||
"text": text,
|
||||
"model_used": model_used,
|
||||
"processing_time_ms": elapsed_ms,
|
||||
"language": language,
|
||||
"preprocessed": preprocess,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Endpoints
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.post("/recognize")
|
||||
async def recognize_handwriting(
|
||||
file: UploadFile = File(...),
|
||||
model: str = Query("auto", description="auto | qwen2.5vl | trocr-large"),
|
||||
preprocess: bool = Query(True, description="Apply CLAHE + upscale before recognition"),
|
||||
language: str = Query("de", description="de | en | de+en"),
|
||||
):
|
||||
"""
|
||||
Upload an image and get back the handwritten text as plain text.
|
||||
|
||||
Tries qwen2.5vl:32b via Ollama first, falls back to TrOCR-large-handwritten.
|
||||
"""
|
||||
if model not in ("auto", "qwen2.5vl", "trocr-large"):
|
||||
raise HTTPException(status_code=400, detail="model must be one of: auto, qwen2.5vl, trocr-large")
|
||||
if language not in ("de", "en", "de+en"):
|
||||
raise HTTPException(status_code=400, detail="language must be one of: de, en, de+en")
|
||||
|
||||
image_bytes = await file.read()
|
||||
if not image_bytes:
|
||||
raise HTTPException(status_code=400, detail="Empty file")
|
||||
|
||||
return await _do_recognize(image_bytes, model=model, preprocess=preprocess, language=language)
|
||||
|
||||
|
||||
@router.post("/recognize-session")
|
||||
async def recognize_from_session(req: HTRSessionRequest):
|
||||
"""
|
||||
Use an OCR-Pipeline session as image source for HTR.
|
||||
|
||||
Set use_clean=true to prefer the clean image (after handwriting removal step).
|
||||
This is useful when you want to do HTR on isolated handwriting regions.
|
||||
"""
|
||||
from ocr_pipeline_session_store import get_session_db, get_session_image
|
||||
|
||||
session = await get_session_db(req.session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail=f"Session {req.session_id} not found")
|
||||
|
||||
# Choose source image
|
||||
image_bytes: Optional[bytes] = None
|
||||
source_used: str = ""
|
||||
|
||||
if req.use_clean:
|
||||
image_bytes = await get_session_image(req.session_id, "clean")
|
||||
if image_bytes:
|
||||
source_used = "clean"
|
||||
|
||||
if not image_bytes:
|
||||
image_bytes = await get_session_image(req.session_id, "deskewed")
|
||||
if image_bytes:
|
||||
source_used = "deskewed"
|
||||
|
||||
if not image_bytes:
|
||||
image_bytes = await get_session_image(req.session_id, "original")
|
||||
source_used = "original"
|
||||
|
||||
if not image_bytes:
|
||||
raise HTTPException(status_code=404, detail="No image available in session")
|
||||
|
||||
result = await _do_recognize(image_bytes, model=req.model)
|
||||
result["session_id"] = req.session_id
|
||||
result["source_image"] = source_used
|
||||
return result
|
||||
@@ -42,6 +42,14 @@ try:
|
||||
except ImportError:
|
||||
trocr_router = None
|
||||
from vocab_worksheet_api import router as vocab_router, set_db_pool as set_vocab_db_pool, _init_vocab_table, _load_all_sessions, DATABASE_URL as VOCAB_DATABASE_URL
|
||||
from ocr_pipeline_api import router as ocr_pipeline_router, _cache as ocr_pipeline_cache
|
||||
from grid_editor_api import router as grid_editor_router
|
||||
from orientation_crop_api import router as orientation_crop_router, set_cache_ref as set_orientation_crop_cache
|
||||
from ocr_pipeline_session_store import init_ocr_pipeline_tables
|
||||
try:
|
||||
from handwriting_htr_api import router as htr_router
|
||||
except ImportError:
|
||||
htr_router = None
|
||||
try:
|
||||
from dsfa_rag_api import router as dsfa_rag_router, set_db_pool as set_dsfa_db_pool
|
||||
from dsfa_corpus_ingestion import DSFAQdrantService, DATABASE_URL as DSFA_DATABASE_URL
|
||||
@@ -75,6 +83,13 @@ async def lifespan(app: FastAPI):
|
||||
except Exception as e:
|
||||
print(f"Warning: Vocab sessions database initialization failed: {e}")
|
||||
|
||||
# Initialize OCR Pipeline session tables
|
||||
try:
|
||||
await init_ocr_pipeline_tables()
|
||||
print("OCR Pipeline session tables initialized")
|
||||
except Exception as e:
|
||||
print(f"Warning: OCR Pipeline tables initialization failed: {e}")
|
||||
|
||||
# Initialize database pool for DSFA RAG
|
||||
dsfa_db_pool = None
|
||||
if DSFA_DATABASE_URL and set_dsfa_db_pool:
|
||||
@@ -104,6 +119,19 @@ async def lifespan(app: FastAPI):
|
||||
# Ensure EH upload directory exists
|
||||
os.makedirs(EH_UPLOAD_DIR, exist_ok=True)
|
||||
|
||||
# Preload LightOnOCR model if OCR_ENGINE=lighton (avoids cold-start on first request)
|
||||
ocr_engine_env = os.getenv("OCR_ENGINE", "auto")
|
||||
if ocr_engine_env == "lighton":
|
||||
try:
|
||||
import asyncio
|
||||
from services.lighton_ocr_service import get_lighton_model
|
||||
loop = asyncio.get_event_loop()
|
||||
print("Preloading LightOnOCR-2-1B at startup (OCR_ENGINE=lighton)...")
|
||||
await loop.run_in_executor(None, get_lighton_model)
|
||||
print("LightOnOCR-2-1B preloaded")
|
||||
except Exception as e:
|
||||
print(f"Warning: LightOnOCR preload failed: {e}")
|
||||
|
||||
yield
|
||||
|
||||
print("Klausur-Service shutting down...")
|
||||
@@ -150,6 +178,12 @@ app.include_router(mail_router) # Unified Inbox Mail
|
||||
if trocr_router:
|
||||
app.include_router(trocr_router) # TrOCR Handwriting OCR
|
||||
app.include_router(vocab_router) # Vocabulary Worksheet Generator
|
||||
app.include_router(ocr_pipeline_router) # OCR Pipeline (step-by-step)
|
||||
app.include_router(grid_editor_router) # Grid Editor (Excel-like)
|
||||
set_orientation_crop_cache(ocr_pipeline_cache)
|
||||
app.include_router(orientation_crop_router) # OCR Pipeline: Orientation + Crop
|
||||
if htr_router:
|
||||
app.include_router(htr_router) # Handwriting HTR (Klausur)
|
||||
if dsfa_rag_router:
|
||||
app.include_router(dsfa_rag_router) # DSFA RAG Corpus Search
|
||||
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
-- OCR Pipeline Sessions - Persistent session storage
|
||||
-- Applied automatically by ocr_pipeline_session_store.init_ocr_pipeline_tables()
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ocr_pipeline_sessions (
|
||||
id UUID PRIMARY KEY,
|
||||
name VARCHAR(255) NOT NULL,
|
||||
filename VARCHAR(255),
|
||||
status VARCHAR(50) DEFAULT 'active',
|
||||
current_step INT DEFAULT 1,
|
||||
original_png BYTEA,
|
||||
deskewed_png BYTEA,
|
||||
binarized_png BYTEA,
|
||||
dewarped_png BYTEA,
|
||||
deskew_result JSONB,
|
||||
dewarp_result JSONB,
|
||||
column_result JSONB,
|
||||
ground_truth JSONB DEFAULT '{}',
|
||||
auto_shear_degrees FLOAT,
|
||||
created_at TIMESTAMP DEFAULT NOW(),
|
||||
updated_at TIMESTAMP DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Index for listing sessions
|
||||
CREATE INDEX IF NOT EXISTS idx_ocr_pipeline_sessions_created
|
||||
ON ocr_pipeline_sessions (created_at DESC);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_ocr_pipeline_sessions_status
|
||||
ON ocr_pipeline_sessions (status);
|
||||
@@ -0,0 +1,4 @@
|
||||
-- Migration 003: Add row_result column for row geometry detection
|
||||
-- Stores detected row geometries including header/footer classification
|
||||
|
||||
ALTER TABLE ocr_pipeline_sessions ADD COLUMN IF NOT EXISTS row_result JSONB;
|
||||
@@ -0,0 +1,4 @@
|
||||
-- Migration 004: Add word_result column for OCR Pipeline Step 5
|
||||
-- Stores the word recognition grid result (entries with english/german/example + bboxes)
|
||||
|
||||
ALTER TABLE ocr_pipeline_sessions ADD COLUMN IF NOT EXISTS word_result JSONB;
|
||||
7
klausur-service/backend/migrations/005_add_doc_type.sql
Normal file
7
klausur-service/backend/migrations/005_add_doc_type.sql
Normal file
@@ -0,0 +1,7 @@
|
||||
-- Migration 005: Add document type detection columns
|
||||
-- These columns store the result of automatic document type detection
|
||||
-- (vocab_table, full_text, generic_table) after dewarp.
|
||||
|
||||
ALTER TABLE ocr_pipeline_sessions
|
||||
ADD COLUMN IF NOT EXISTS doc_type VARCHAR(50),
|
||||
ADD COLUMN IF NOT EXISTS doc_type_result JSONB;
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user