fix: alle Post-Crop-Schritte nutzen cropped statt dewarped Bild
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 27s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Failing after 1m59s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 24s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 27s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Failing after 1m59s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 24s
Spalten-, Zeilen-, Woerter-Overlay und alle nachfolgenden Steps (LLM-Review, Rekonstruktion) lesen jetzt image/cropped mit Fallback auf image/dewarped. Tests fuer page_crop.py hinzugefuegt (25 Tests). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -101,7 +101,7 @@ export function FabricReconstructionCanvas({
|
||||
if (!canvasEl) return
|
||||
|
||||
// Load background image first to get dimensions
|
||||
const imgUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/dewarped`
|
||||
const imgUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
|
||||
|
||||
const bgImg = await fabricModule.FabricImage.fromURL(imgUrl, { crossOrigin: 'anonymous' }) as FabricImage
|
||||
|
||||
|
||||
@@ -192,7 +192,7 @@ export function StepColumnDetection({ sessionId, onNext }: StepColumnDetectionPr
|
||||
)
|
||||
}
|
||||
|
||||
const dewarpedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/dewarped`
|
||||
const dewarpedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
|
||||
const overlayUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/columns-overlay`
|
||||
|
||||
// Pre-compute editor state from saved GT or auto columns for GT mode
|
||||
|
||||
@@ -320,7 +320,7 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
|
||||
}
|
||||
|
||||
const dewarpedUrl = sessionId
|
||||
? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/dewarped`
|
||||
? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
|
||||
: ''
|
||||
|
||||
if (!sessionId) {
|
||||
|
||||
@@ -276,7 +276,7 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
|
||||
}, [])
|
||||
|
||||
const dewarpedUrl = sessionId
|
||||
? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/dewarped`
|
||||
? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
|
||||
: ''
|
||||
|
||||
const colTypeColor = (colType: string): string => {
|
||||
|
||||
@@ -95,7 +95,7 @@ export function StepRowDetection({ sessionId, onNext }: StepRowDetectionProps) {
|
||||
}
|
||||
|
||||
const overlayUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/rows-overlay`
|
||||
const dewarpedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/dewarped`
|
||||
const dewarpedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
|
||||
|
||||
const rowTypeColors: Record<string, string> = {
|
||||
header: 'bg-gray-200 dark:bg-gray-600 text-gray-700 dark:text-gray-300',
|
||||
|
||||
@@ -334,7 +334,7 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
||||
}
|
||||
|
||||
const overlayUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/words-overlay`
|
||||
const dewarpedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/dewarped`
|
||||
const dewarpedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
|
||||
|
||||
const confColor = (conf: number) => {
|
||||
if (conf >= 70) return 'text-green-600 dark:text-green-400'
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# OCR Pipeline - Schrittweise Seitenrekonstruktion
|
||||
|
||||
**Version:** 4.0.0
|
||||
**Version:** 4.1.0
|
||||
**Status:** Produktiv (Schritte 1–10 implementiert)
|
||||
**URL:** https://macmini:3002/ai/ocr-pipeline
|
||||
|
||||
@@ -17,9 +17,9 @@ Jeder Schritt kann individuell geprueft, korrigiert und mit Ground-Truth-Daten v
|
||||
| Schritt | Name | Beschreibung | Status |
|
||||
|---------|------|--------------|--------|
|
||||
| 1 | Orientierung | 90/180/270° Drehungen von Scannern korrigieren | Implementiert |
|
||||
| 2 | Zuschneiden (Crop) | Scannerraender entfernen, Papierformat (A4) erkennen | Implementiert |
|
||||
| 3 | Begradigung (Deskew) | Scan begradigen (Hough Lines + Word Alignment) | Implementiert |
|
||||
| 4 | Entzerrung (Dewarp) | Buchwoelbung entzerren (Vertikalkanten-Analyse) | Implementiert |
|
||||
| 2 | Begradigung (Deskew) | Scan begradigen (Hough Lines + Word Alignment) | Implementiert |
|
||||
| 3 | Entzerrung (Dewarp) | Buchwoelbung entzerren (Vertikalkanten-Analyse) | Implementiert |
|
||||
| 4 | Zuschneiden (Crop) | Content-basierter Crop: Buchruecken-Schatten + Ink-Projektion | Implementiert |
|
||||
| 5 | Spaltenerkennung | Unsichtbare Spalten finden (Projektionsprofile + Wortvalidierung) | Implementiert |
|
||||
| 6 | Zeilenerkennung | Horizontale Zeilen + Kopf-/Fusszeilen-Klassifikation + Luecken-Heilung | Implementiert |
|
||||
| 7 | Worterkennung | Hybrid-Grid: Breite Spalten full-page, schmale cell-crop | Implementiert |
|
||||
@@ -27,6 +27,11 @@ Jeder Schritt kann individuell geprueft, korrigiert und mit Ground-Truth-Daten v
|
||||
| 9 | Rekonstruktion | Interaktive Zellenbearbeitung auf Bildhintergrund (Fabric.js) | Implementiert |
|
||||
| 10 | Validierung | Ground-Truth-Vergleich und Qualitaetspruefung | Implementiert |
|
||||
|
||||
!!! note "Reihenfolge-Aenderung (v4.1)"
|
||||
Crop wurde hinter Deskew/Dewarp verschoben. Das Bild ist dann bereits gerade,
|
||||
was den Content-basierten Crop deutlich zuverlaessiger macht — insbesondere
|
||||
bei Buchscans mit Ruecken-Schatten und weissem Scanner-Hintergrund.
|
||||
|
||||
---
|
||||
|
||||
## Dokumenttyp-Erkennung und Pipeline-Pfade
|
||||
@@ -34,7 +39,7 @@ Jeder Schritt kann individuell geprueft, korrigiert und mit Ground-Truth-Daten v
|
||||
### Automatische Weiche: `detect_document_type()`
|
||||
|
||||
Nicht jedes Dokument durchlaeuft denselben Pfad. Nach den gemeinsamen Vorverarbeitungsschritten
|
||||
(Deskew, Dewarp, Binarisierung) analysiert `detect_document_type()` die Seitenstruktur
|
||||
(Orientierung, Deskew, Dewarp, Crop) analysiert `detect_document_type()` die Seitenstruktur
|
||||
**ohne OCR** — rein ueber Projektionsprofile und Textdichte-Analyse (< 2 Sekunden).
|
||||
|
||||
```
|
||||
@@ -69,10 +74,10 @@ flowchart TD
|
||||
┌─────────────────────────────────────────────────────────────────────┐
|
||||
│ GEMEINSAME VORVERARBEITUNG (alle Dokumente) │
|
||||
│ │
|
||||
│ Stage 1: Render (432 DPI, 3× Zoom) │
|
||||
│ Stage 2: Deskew (Hough Lines + Ensemble) │
|
||||
│ Stage 3: Dewarp (Vertikalkanten-Drift, Ensemble Shear) │
|
||||
│ Stage 4: Dual-Bild (ocr_img = binarisiert, layout_img = CLAHE) │
|
||||
│ Schritt 1: Orientierung (90/180/270° Drehung korrigieren) │
|
||||
│ Schritt 2: Deskew (Hough Lines + Iterative Projektion + Ensemble) │
|
||||
│ Schritt 3: Dewarp (Vertikalkanten-Drift, Ensemble Shear) │
|
||||
│ Schritt 4: Crop (Content-basiert: Schatten + Ink-Projektion) │
|
||||
└─────────────────────────────────────┬───────────────────────────────┘
|
||||
│
|
||||
detect_document_type()
|
||||
@@ -103,9 +108,9 @@ flowchart TD
|
||||
Post-Processing Pipeline
|
||||
(Lautschrift, Komma-Split, etc.)
|
||||
│
|
||||
Schritt 6: Korrektur (Spell)
|
||||
Schritt 7: Rekonstruktion
|
||||
Schritt 8: Validierung
|
||||
Schritt 8: Korrektur (Spell)
|
||||
Schritt 9: Rekonstruktion
|
||||
Schritt 10: Validierung
|
||||
```
|
||||
|
||||
---
|
||||
@@ -140,7 +145,9 @@ Admin-Lehrer (Next.js) klausur-service (FastAPI :8086)
|
||||
klausur-service/backend/
|
||||
├── services/
|
||||
│ └── cv_vocab_pipeline.py # Computer Vision + NLP Algorithmen
|
||||
├── ocr_pipeline_api.py # FastAPI Router (alle Endpoints)
|
||||
├── ocr_pipeline_api.py # FastAPI Router (Schritte 2-10)
|
||||
├── orientation_crop_api.py # FastAPI Router (Schritte 1 + 4)
|
||||
├── page_crop.py # Content-basierter Crop-Algorithmus
|
||||
├── ocr_pipeline_session_store.py # PostgreSQL Persistence
|
||||
├── layout_reconstruction_service.py # Fabric.js JSON + PDF/DOCX Export
|
||||
└── migrations/
|
||||
@@ -154,15 +161,17 @@ admin-lehrer/
|
||||
│ └── types.ts # TypeScript Interfaces
|
||||
└── components/ocr-pipeline/
|
||||
├── PipelineStepper.tsx # Fortschritts-Stepper
|
||||
├── StepDeskew.tsx # Schritt 1: Begradigung
|
||||
├── StepDewarp.tsx # Schritt 2: Entzerrung
|
||||
├── StepColumnDetection.tsx # Schritt 3: Spaltenerkennung
|
||||
├── StepRowDetection.tsx # Schritt 4: Zeilenerkennung
|
||||
├── StepWordRecognition.tsx # Schritt 5: Worterkennung
|
||||
├── StepLlmReview.tsx # Schritt 6: Korrektur (SSE-Stream)
|
||||
├── StepReconstruction.tsx # Schritt 7: Rekonstruktion (Canvas)
|
||||
├── StepOrientation.tsx # Schritt 1: Orientierung
|
||||
├── StepDeskew.tsx # Schritt 2: Begradigung
|
||||
├── StepDewarp.tsx # Schritt 3: Entzerrung
|
||||
├── StepCrop.tsx # Schritt 4: Zuschneiden
|
||||
├── StepColumnDetection.tsx # Schritt 5: Spaltenerkennung
|
||||
├── StepRowDetection.tsx # Schritt 6: Zeilenerkennung
|
||||
├── StepWordRecognition.tsx # Schritt 7: Worterkennung
|
||||
├── StepLlmReview.tsx # Schritt 8: Korrektur (SSE-Stream)
|
||||
├── StepReconstruction.tsx # Schritt 9: Rekonstruktion (Canvas)
|
||||
├── FabricReconstructionCanvas.tsx # Fabric.js Editor
|
||||
└── StepGroundTruth.tsx # Schritt 8: Validierung
|
||||
└── StepGroundTruth.tsx # Schritt 10: Validierung
|
||||
```
|
||||
|
||||
---
|
||||
@@ -187,14 +196,22 @@ Alle Endpoints unter `/api/v1/ocr-pipeline/`.
|
||||
| Methode | Pfad | Beschreibung |
|
||||
|---------|------|--------------|
|
||||
| `GET` | `/sessions/{id}/image/original` | Originalbild |
|
||||
| `GET` | `/sessions/{id}/image/oriented` | Orientiertes Bild |
|
||||
| `GET` | `/sessions/{id}/image/deskewed` | Begradigtes Bild |
|
||||
| `GET` | `/sessions/{id}/image/dewarped` | Entzerrtes Bild |
|
||||
| `GET` | `/sessions/{id}/image/cropped` | Zugeschnittenes Bild |
|
||||
| `GET` | `/sessions/{id}/image/binarized` | Binarisiertes Bild |
|
||||
| `GET` | `/sessions/{id}/image/columns-overlay` | Spalten-Overlay |
|
||||
| `GET` | `/sessions/{id}/image/rows-overlay` | Zeilen-Overlay |
|
||||
| `GET` | `/sessions/{id}/image/words-overlay` | Wort-Grid-Overlay |
|
||||
|
||||
### Schritt 1: Begradigung
|
||||
### Schritt 1: Orientierung
|
||||
|
||||
| Methode | Pfad | Beschreibung |
|
||||
|---------|------|--------------|
|
||||
| `POST` | `/sessions/{id}/orientation` | 90/180/270° Drehung erkennen und korrigieren |
|
||||
|
||||
### Schritt 2: Begradigung
|
||||
|
||||
| Methode | Pfad | Beschreibung |
|
||||
|---------|------|--------------|
|
||||
@@ -202,7 +219,7 @@ Alle Endpoints unter `/api/v1/ocr-pipeline/`.
|
||||
| `POST` | `/sessions/{id}/deskew/manual` | Manuelle Winkelkorrektur |
|
||||
| `POST` | `/sessions/{id}/ground-truth/deskew` | Ground Truth speichern |
|
||||
|
||||
### Schritt 2: Entzerrung
|
||||
### Schritt 3: Entzerrung
|
||||
|
||||
| Methode | Pfad | Beschreibung |
|
||||
|---------|------|--------------|
|
||||
@@ -211,7 +228,15 @@ Alle Endpoints unter `/api/v1/ocr-pipeline/`.
|
||||
| `POST` | `/sessions/{id}/adjust-combined` | Kombinierte Rotation + Shear Feinabstimmung |
|
||||
| `POST` | `/sessions/{id}/ground-truth/dewarp` | Ground Truth speichern |
|
||||
|
||||
### Schritt 3: Spalten
|
||||
### Schritt 4: Zuschneiden
|
||||
|
||||
| Methode | Pfad | Beschreibung |
|
||||
|---------|------|--------------|
|
||||
| `POST` | `/sessions/{id}/crop` | Automatischer Content-Crop |
|
||||
| `POST` | `/sessions/{id}/crop/manual` | Manueller Crop (Prozent-Koordinaten) |
|
||||
| `POST` | `/sessions/{id}/crop/skip` | Crop ueberspringen |
|
||||
|
||||
### Schritt 5: Spalten
|
||||
|
||||
| Methode | Pfad | Beschreibung |
|
||||
|---------|------|--------------|
|
||||
@@ -219,7 +244,7 @@ Alle Endpoints unter `/api/v1/ocr-pipeline/`.
|
||||
| `POST` | `/sessions/{id}/columns/manual` | Manuelle Spalten-Definition |
|
||||
| `POST` | `/sessions/{id}/ground-truth/columns` | Ground Truth speichern |
|
||||
|
||||
### Schritt 4: Zeilen
|
||||
### Schritt 6: Zeilen
|
||||
|
||||
| Methode | Pfad | Beschreibung |
|
||||
|---------|------|--------------|
|
||||
@@ -228,7 +253,7 @@ Alle Endpoints unter `/api/v1/ocr-pipeline/`.
|
||||
| `POST` | `/sessions/{id}/ground-truth/rows` | Ground Truth speichern |
|
||||
| `GET` | `/sessions/{id}/ground-truth/rows` | Ground Truth abrufen |
|
||||
|
||||
### Schritt 5: Worterkennung
|
||||
### Schritt 7: Worterkennung
|
||||
|
||||
| Methode | Pfad | Beschreibung |
|
||||
|---------|------|--------------|
|
||||
@@ -236,14 +261,14 @@ Alle Endpoints unter `/api/v1/ocr-pipeline/`.
|
||||
| `POST` | `/sessions/{id}/ground-truth/words` | Ground Truth speichern |
|
||||
| `GET` | `/sessions/{id}/ground-truth/words` | Ground Truth abrufen |
|
||||
|
||||
### Schritt 6: Korrektur
|
||||
### Schritt 8: Korrektur
|
||||
|
||||
| Methode | Pfad | Beschreibung |
|
||||
|---------|------|--------------|
|
||||
| `POST` | `/sessions/{id}/llm-review?stream=true` | SSE-Stream Korrektur starten |
|
||||
| `POST` | `/sessions/{id}/llm-review/apply` | Ausgewaehlte Korrekturen speichern |
|
||||
|
||||
### Schritt 7: Rekonstruktion
|
||||
### Schritt 9: Rekonstruktion
|
||||
|
||||
| Methode | Pfad | Beschreibung |
|
||||
|---------|------|--------------|
|
||||
@@ -253,12 +278,66 @@ Alle Endpoints unter `/api/v1/ocr-pipeline/`.
|
||||
| `GET` | `/sessions/{id}/reconstruction/export/docx` | DOCX-Export (python-docx) |
|
||||
| `POST` | `/sessions/{id}/reconstruction/detect-images` | Bildbereiche per VLM erkennen |
|
||||
| `POST` | `/sessions/{id}/reconstruction/generate-image` | Bild per mflux generieren |
|
||||
| `POST` | `/sessions/{id}/reconstruction/validate` | Validierung speichern (Step 8) |
|
||||
| `POST` | `/sessions/{id}/reconstruction/validate` | Validierung speichern (Step 10) |
|
||||
| `GET` | `/sessions/{id}/reconstruction/validation` | Validierungsdaten abrufen |
|
||||
|
||||
---
|
||||
|
||||
## Schritt 2: Entzerrung/Dewarp (Detail)
|
||||
## Schritt 4: Zuschneiden/Crop (Detail)
|
||||
|
||||
### Warum Crop nach Deskew/Dewarp?
|
||||
|
||||
In frueheren Versionen lief Crop als Schritt 2 (vor Deskew). Das fuehrte zu Problemen:
|
||||
|
||||
- **Schiefes Bild**: `boundingRect` einer schiefen Seite schliesst viel Scanner-Hintergrund ein
|
||||
- **Buchscans**: Otsu-Binarisierung versagt bei weiss-auf-weiss (Seite auf weissem Scanner)
|
||||
- **Buchruecken**: Gradueller Schatten-Uebergang wird nicht als Kante erkannt
|
||||
|
||||
**Loesung (v4.1):** Crop laeuft jetzt nach Dewarp — das Bild ist dann gerade.
|
||||
|
||||
### Algorithmus: Content-basierte 4-Kanten-Erkennung
|
||||
|
||||
Datei: `page_crop.py`
|
||||
|
||||
```
|
||||
Input: Entzerrtes BGR-Bild
|
||||
│
|
||||
├─ Adaptive Threshold (Gauss, blockSize=51)
|
||||
│ → binary (Text=255, Hintergrund=0)
|
||||
│
|
||||
├─ Linker Rand (Buchruecken-Schatten):
|
||||
│ 1. Grauwert-Spaltenmittel in linken 25%
|
||||
│ 2. Glaetten mit Boxcar-Kernel
|
||||
│ 3. Transition hell→dunkel finden (> 60% des Helligkeitsbereichs)
|
||||
│ 4. Fallback: Binaere Vertikal-Projektion
|
||||
│
|
||||
├─ Rechter Rand: Binaere Vertikal-Projektion (letzte Ink-Spalte)
|
||||
│
|
||||
├─ Oben/Unten: Binaere Horizontal-Projektion (erste/letzte Ink-Zeile)
|
||||
│
|
||||
├─ Rausch-Filter: Runs < 0.5% der Dimension ignorieren
|
||||
│
|
||||
├─ Sanity-Checks:
|
||||
│ - Mindestens eine Kante > 2% Border
|
||||
│ - Crop-Flaeche >= 40% des Originals
|
||||
│
|
||||
└─ Crop + konfigurierbarer Rand (default 1%)
|
||||
```
|
||||
|
||||
### Vergleich alt vs. neu
|
||||
|
||||
| Eigenschaft | Alt (Otsu + Kontur) | Neu (Content-basiert) |
|
||||
|-------------|--------------------|-----------------------|
|
||||
| Binarisierung | Otsu (global) | Adaptive Threshold |
|
||||
| Methode | Groesste Kontur → boundingRect | 4-Kanten Ink-Projektion |
|
||||
| Buchruecken | Nicht erkannt | Schatten-Gradient-Erkennung |
|
||||
| Weiss-auf-weiss | Versagt | Funktioniert (adaptive) |
|
||||
| Format-Matching | A4/Letter erzwungen | Kein Format-Matching (Content-Bounds) |
|
||||
| Position in Pipeline | Vor Deskew (Schritt 2) | Nach Dewarp (Schritt 4) |
|
||||
|
||||
---
|
||||
|
||||
## Schritt 3: Entzerrung/Dewarp (Detail)
|
||||
|
||||
### Algorithmus: Vertikalkanten-Drift
|
||||
|
||||
@@ -311,7 +390,7 @@ Response: {"method_used": "manual_combined", "shear_degrees": -0.45, "dewarped_i
|
||||
|
||||
---
|
||||
|
||||
## Schritt 3: Spaltenerkennung (Detail)
|
||||
## Schritt 5: Spaltenerkennung (Detail)
|
||||
|
||||
### Algorithmus: `detect_column_geometry()`
|
||||
|
||||
@@ -417,7 +496,7 @@ min_real_col_w = max(20, int(content_w * 0.03))
|
||||
|
||||
---
|
||||
|
||||
## Schritt 4: Zeilenerkennung (Detail)
|
||||
## Schritt 6: Zeilenerkennung (Detail)
|
||||
|
||||
### Algorithmus: `detect_row_geometry()`
|
||||
|
||||
@@ -447,7 +526,7 @@ def _heal_row_gaps(rows, top_bound, bottom_bound):
|
||||
|
||||
---
|
||||
|
||||
## Schritt 5: Worterkennung — Hybrid-Grid (Detail)
|
||||
## Schritt 7: Worterkennung — Hybrid-Grid (Detail)
|
||||
|
||||
### Algorithmus: `build_cell_grid_v2()`
|
||||
|
||||
@@ -554,7 +633,7 @@ Eingabe: ocr_img, column_regions, row_geometries
|
||||
|
||||
---
|
||||
|
||||
## Schritt 6: Korrektur (Detail)
|
||||
## Schritt 8: Korrektur (Detail)
|
||||
|
||||
### Korrektur-Engine
|
||||
|
||||
@@ -611,7 +690,7 @@ Change-Format:
|
||||
|
||||
---
|
||||
|
||||
## Schritt 7: Rekonstruktion (Detail)
|
||||
## Schritt 9: Rekonstruktion (Detail)
|
||||
|
||||
Zwei Modi verfuegbar:
|
||||
|
||||
|
||||
@@ -1207,7 +1207,7 @@ async def get_column_ground_truth(session_id: str):
|
||||
|
||||
|
||||
async def _get_columns_overlay(session_id: str) -> Response:
|
||||
"""Generate dewarped image with column borders drawn on it."""
|
||||
"""Generate cropped (or dewarped) image with column borders drawn on it."""
|
||||
session = await get_session_db(session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||
@@ -1216,12 +1216,14 @@ async def _get_columns_overlay(session_id: str) -> Response:
|
||||
if not column_result or not column_result.get("columns"):
|
||||
raise HTTPException(status_code=404, detail="No column data available")
|
||||
|
||||
# Load dewarped image
|
||||
dewarped_png = await get_session_image(session_id, "dewarped")
|
||||
if not dewarped_png:
|
||||
raise HTTPException(status_code=404, detail="Dewarped image not available")
|
||||
# Load cropped image (preferred) or dewarped as fallback
|
||||
base_png = await get_session_image(session_id, "cropped")
|
||||
if not base_png:
|
||||
base_png = await get_session_image(session_id, "dewarped")
|
||||
if not base_png:
|
||||
raise HTTPException(status_code=404, detail="No base image available (cropped/dewarped)")
|
||||
|
||||
arr = np.frombuffer(dewarped_png, dtype=np.uint8)
|
||||
arr = np.frombuffer(base_png, dtype=np.uint8)
|
||||
img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
|
||||
if img is None:
|
||||
raise HTTPException(status_code=500, detail="Failed to decode image")
|
||||
@@ -2692,7 +2694,7 @@ async def reprocess_session(session_id: str, request: Request):
|
||||
|
||||
|
||||
async def _get_rows_overlay(session_id: str) -> Response:
|
||||
"""Generate dewarped image with row bands drawn on it."""
|
||||
"""Generate cropped (or dewarped) image with row bands drawn on it."""
|
||||
session = await get_session_db(session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||
@@ -2701,12 +2703,14 @@ async def _get_rows_overlay(session_id: str) -> Response:
|
||||
if not row_result or not row_result.get("rows"):
|
||||
raise HTTPException(status_code=404, detail="No row data available")
|
||||
|
||||
# Load dewarped image
|
||||
dewarped_png = await get_session_image(session_id, "dewarped")
|
||||
if not dewarped_png:
|
||||
raise HTTPException(status_code=404, detail="Dewarped image not available")
|
||||
# Load cropped image (preferred) or dewarped as fallback
|
||||
base_png = await get_session_image(session_id, "cropped")
|
||||
if not base_png:
|
||||
base_png = await get_session_image(session_id, "dewarped")
|
||||
if not base_png:
|
||||
raise HTTPException(status_code=404, detail="No base image available (cropped/dewarped)")
|
||||
|
||||
arr = np.frombuffer(dewarped_png, dtype=np.uint8)
|
||||
arr = np.frombuffer(base_png, dtype=np.uint8)
|
||||
img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
|
||||
if img is None:
|
||||
raise HTTPException(status_code=500, detail="Failed to decode image")
|
||||
@@ -2753,7 +2757,7 @@ async def _get_rows_overlay(session_id: str) -> Response:
|
||||
|
||||
|
||||
async def _get_words_overlay(session_id: str) -> Response:
|
||||
"""Generate dewarped image with cell grid drawn on it."""
|
||||
"""Generate cropped (or dewarped) image with cell grid drawn on it."""
|
||||
session = await get_session_db(session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||
@@ -2767,12 +2771,14 @@ async def _get_words_overlay(session_id: str) -> Response:
|
||||
if not cells and not word_result.get("entries"):
|
||||
raise HTTPException(status_code=404, detail="No word data available")
|
||||
|
||||
# Load dewarped image
|
||||
dewarped_png = await get_session_image(session_id, "dewarped")
|
||||
if not dewarped_png:
|
||||
raise HTTPException(status_code=404, detail="Dewarped image not available")
|
||||
# Load cropped image (preferred) or dewarped as fallback
|
||||
base_png = await get_session_image(session_id, "cropped")
|
||||
if not base_png:
|
||||
base_png = await get_session_image(session_id, "dewarped")
|
||||
if not base_png:
|
||||
raise HTTPException(status_code=404, detail="No base image available (cropped/dewarped)")
|
||||
|
||||
arr = np.frombuffer(dewarped_png, dtype=np.uint8)
|
||||
arr = np.frombuffer(base_png, dtype=np.uint8)
|
||||
img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
|
||||
if img is None:
|
||||
raise HTTPException(status_code=500, detail="Failed to decode image")
|
||||
|
||||
327
klausur-service/backend/tests/test_page_crop.py
Normal file
327
klausur-service/backend/tests/test_page_crop.py
Normal file
@@ -0,0 +1,327 @@
|
||||
"""
|
||||
Tests for page_crop.py — content-based crop algorithm.
|
||||
|
||||
Tests cover:
|
||||
- Edge detection via ink projections
|
||||
- Spine shadow detection for book scans
|
||||
- Narrow run filtering
|
||||
- Paper format detection
|
||||
- Sanity checks (min area, min border)
|
||||
- End-to-end crop on synthetic images
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from page_crop import (
|
||||
detect_and_crop_page,
|
||||
_detect_format,
|
||||
_detect_edge_projection,
|
||||
_detect_left_edge_shadow,
|
||||
_filter_narrow_runs,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helper: create synthetic images
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _make_white_image(h: int, w: int) -> np.ndarray:
|
||||
"""Create a white BGR image."""
|
||||
return np.full((h, w, 3), 255, dtype=np.uint8)
|
||||
|
||||
|
||||
def _make_image_with_content(
|
||||
h: int, w: int,
|
||||
content_rect: tuple, # (y1, y2, x1, x2)
|
||||
bg_color: int = 255,
|
||||
content_color: int = 0,
|
||||
) -> np.ndarray:
|
||||
"""Create an image with a dark content rectangle on a light background."""
|
||||
img = np.full((h, w, 3), bg_color, dtype=np.uint8)
|
||||
y1, y2, x1, x2 = content_rect
|
||||
img[y1:y2, x1:x2] = content_color
|
||||
return img
|
||||
|
||||
|
||||
def _make_book_scan(h: int = 1000, w: int = 800) -> np.ndarray:
|
||||
"""Create a synthetic book scan with spine shadow on the left.
|
||||
|
||||
Left 10%: gradient from dark (50) to white (255)
|
||||
Top 5%: white (empty scanner border)
|
||||
Bottom 5%: white (empty scanner border)
|
||||
Center: text-like content (dark pixels scattered)
|
||||
"""
|
||||
img = np.full((h, w, 3), 255, dtype=np.uint8)
|
||||
|
||||
# Spine shadow: left 10% has gradient from dark to bright
|
||||
shadow_w = w // 10
|
||||
for x in range(shadow_w):
|
||||
brightness = int(50 + (255 - 50) * x / shadow_w)
|
||||
img[:, x] = brightness
|
||||
|
||||
# Content area: scatter some dark pixels (simulate text)
|
||||
content_top = h // 20 # 5% top margin
|
||||
content_bottom = h - h // 20 # 5% bottom margin
|
||||
content_left = shadow_w + w // 20 # past shadow + small margin
|
||||
content_right = w - w // 20 # 5% right margin
|
||||
|
||||
rng = np.random.RandomState(42)
|
||||
for _ in range(500):
|
||||
y = rng.randint(content_top, content_bottom)
|
||||
x = rng.randint(content_left, content_right)
|
||||
# Small text-like blob
|
||||
y2 = min(y + 3, h)
|
||||
x2 = min(x + 10, w)
|
||||
img[y:y2, x:x2] = 20
|
||||
|
||||
return img
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests: _filter_narrow_runs
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestFilterNarrowRuns:
|
||||
def test_removes_short_runs(self):
|
||||
mask = np.array([False, True, True, False, False, True, False])
|
||||
result = _filter_narrow_runs(mask, min_run=3)
|
||||
# The run [True, True] (length 2) and [True] (length 1) should be removed
|
||||
assert not result.any()
|
||||
|
||||
def test_keeps_long_runs(self):
|
||||
mask = np.array([False, True, True, True, True, False])
|
||||
result = _filter_narrow_runs(mask, min_run=3)
|
||||
expected = np.array([False, True, True, True, True, False])
|
||||
np.testing.assert_array_equal(result, expected)
|
||||
|
||||
def test_min_run_1_keeps_all(self):
|
||||
mask = np.array([True, False, True])
|
||||
result = _filter_narrow_runs(mask, min_run=1)
|
||||
np.testing.assert_array_equal(result, mask)
|
||||
|
||||
def test_empty_mask(self):
|
||||
mask = np.array([], dtype=bool)
|
||||
result = _filter_narrow_runs(mask, min_run=5)
|
||||
assert len(result) == 0
|
||||
|
||||
def test_mixed_runs(self):
|
||||
mask = np.array([True, False, True, True, True, True, True, False, True, True])
|
||||
result = _filter_narrow_runs(mask, min_run=3)
|
||||
# Run of 1 at [0]: removed
|
||||
# Run of 5 at [2:7]: kept
|
||||
# Run of 2 at [8:10]: removed
|
||||
expected = np.array([False, False, True, True, True, True, True, False, False, False])
|
||||
np.testing.assert_array_equal(result, expected)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests: _detect_format
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestDetectFormat:
|
||||
def test_a4_portrait(self):
|
||||
fmt, conf = _detect_format(210, 297)
|
||||
assert fmt == "A4"
|
||||
assert conf > 0.8
|
||||
|
||||
def test_a4_landscape(self):
|
||||
fmt, conf = _detect_format(297, 210)
|
||||
assert fmt == "A4"
|
||||
assert conf > 0.8
|
||||
|
||||
def test_letter(self):
|
||||
fmt, conf = _detect_format(850, 1100)
|
||||
assert fmt == "Letter"
|
||||
assert conf > 0.5
|
||||
|
||||
def test_unknown_square(self):
|
||||
fmt, conf = _detect_format(100, 100)
|
||||
# Aspect ratio 1.0 doesn't match any paper format well
|
||||
assert fmt == "unknown" or conf < 0.5
|
||||
|
||||
def test_zero_dimensions(self):
|
||||
fmt, conf = _detect_format(0, 100)
|
||||
assert fmt == "unknown"
|
||||
assert conf == 0.0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests: _detect_edge_projection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestDetectEdgeProjection:
|
||||
def test_finds_first_ink_column(self):
|
||||
"""Binary image with ink starting at column 50."""
|
||||
binary = np.zeros((100, 200), dtype=np.uint8)
|
||||
binary[10:90, 50:180] = 255 # Content from x=50 to x=180
|
||||
|
||||
edge = _detect_edge_projection(binary, axis=0, from_start=True, dim=200)
|
||||
assert edge == 50
|
||||
|
||||
def test_finds_last_ink_column(self):
|
||||
binary = np.zeros((100, 200), dtype=np.uint8)
|
||||
binary[10:90, 50:180] = 255
|
||||
|
||||
edge = _detect_edge_projection(binary, axis=0, from_start=False, dim=200)
|
||||
assert edge == 179 # last column with ink
|
||||
|
||||
def test_finds_first_ink_row(self):
|
||||
binary = np.zeros((200, 100), dtype=np.uint8)
|
||||
binary[30:170, 10:90] = 255
|
||||
|
||||
edge = _detect_edge_projection(binary, axis=1, from_start=True, dim=200)
|
||||
assert edge == 30
|
||||
|
||||
def test_finds_last_ink_row(self):
|
||||
binary = np.zeros((200, 100), dtype=np.uint8)
|
||||
binary[30:170, 10:90] = 255
|
||||
|
||||
edge = _detect_edge_projection(binary, axis=1, from_start=False, dim=200)
|
||||
assert edge == 169
|
||||
|
||||
def test_empty_image_returns_boundary(self):
|
||||
binary = np.zeros((100, 100), dtype=np.uint8)
|
||||
assert _detect_edge_projection(binary, axis=0, from_start=True, dim=100) == 0
|
||||
assert _detect_edge_projection(binary, axis=0, from_start=False, dim=100) == 100
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests: _detect_left_edge_shadow
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestDetectLeftEdgeShadow:
|
||||
def test_detects_shadow_gradient(self):
|
||||
"""Synthetic image with left-side shadow gradient."""
|
||||
h, w = 500, 400
|
||||
gray = np.full((h, w), 255, dtype=np.uint8)
|
||||
binary = np.zeros((h, w), dtype=np.uint8)
|
||||
|
||||
# Shadow: left 15% gradually darkens
|
||||
shadow_w = w * 15 // 100
|
||||
for x in range(shadow_w):
|
||||
brightness = int(50 + (255 - 50) * x / shadow_w)
|
||||
gray[:, x] = brightness
|
||||
|
||||
# Content starts after shadow
|
||||
binary[:, shadow_w + 10:w - 10] = 255
|
||||
|
||||
edge = _detect_left_edge_shadow(gray, binary, w, h)
|
||||
# Edge should be within the shadow transition zone
|
||||
# The 60% threshold fires before the actual shadow boundary
|
||||
assert 0 < edge < shadow_w + 20
|
||||
|
||||
def test_no_shadow_uses_binary_fallback(self):
|
||||
"""When shadow range is small, falls back to binary projection."""
|
||||
h, w = 400, 400
|
||||
gray = np.full((h, w), 200, dtype=np.uint8)
|
||||
binary = np.zeros((h, w), dtype=np.uint8)
|
||||
# Content block from x=80 onward (large enough to survive noise filtering)
|
||||
binary[50:350, 80:380] = 255
|
||||
|
||||
edge = _detect_left_edge_shadow(gray, binary, w, h)
|
||||
# Should find content start via projection fallback (near x=80)
|
||||
assert edge <= 85
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests: detect_and_crop_page (end-to-end)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestDetectAndCropPage:
|
||||
def test_no_crop_needed_all_content(self):
|
||||
"""Image that is all content — no borders to crop."""
|
||||
img = np.full((100, 80, 3), 40, dtype=np.uint8) # Dark content everywhere
|
||||
cropped, result = detect_and_crop_page(img)
|
||||
# Should return original (all borders < 2%)
|
||||
assert not result["crop_applied"]
|
||||
assert result["cropped_size"] == {"width": 80, "height": 100}
|
||||
|
||||
def test_crops_white_borders(self):
|
||||
"""Image with wide white borders around dark content."""
|
||||
h, w = 400, 300
|
||||
img = _make_image_with_content(h, w, (80, 320, 60, 240))
|
||||
|
||||
cropped, result = detect_and_crop_page(img)
|
||||
assert result["crop_applied"]
|
||||
# Cropped size should be close to the content area (with margin)
|
||||
assert result["cropped_size"]["width"] < w
|
||||
assert result["cropped_size"]["height"] < h
|
||||
# Content should be roughly 180x240 + margins (adaptive threshold may widen slightly)
|
||||
assert 160 <= result["cropped_size"]["width"] <= 260
|
||||
assert 220 <= result["cropped_size"]["height"] <= 300
|
||||
|
||||
def test_book_scan_detects_spine_shadow(self):
|
||||
"""Synthetic book scan with spine shadow on left."""
|
||||
img = _make_book_scan(1000, 800)
|
||||
cropped, result = detect_and_crop_page(img)
|
||||
|
||||
# Should crop the spine shadow area
|
||||
left_border = result["border_fractions"]["left"]
|
||||
# Spine shadow is ~10% of width, plus some margin
|
||||
assert left_border > 0.05 # At least 5% left border detected
|
||||
|
||||
def test_sanity_check_too_small_crop(self):
|
||||
"""If detected content area is too small, skip crop."""
|
||||
h, w = 500, 500
|
||||
# Tiny content area (5x5 pixels) — should fail sanity check
|
||||
img = _make_white_image(h, w)
|
||||
# Add tiny dark spot
|
||||
img[248:253, 248:253] = 0
|
||||
|
||||
cropped, result = detect_and_crop_page(img)
|
||||
# Should either not crop or crop is too small (< 40%)
|
||||
if result["crop_applied"]:
|
||||
crop_area = result["cropped_size"]["width"] * result["cropped_size"]["height"]
|
||||
assert crop_area >= 0.4 * h * w
|
||||
|
||||
def test_crop_preserves_content(self):
|
||||
"""Verify that content is preserved after cropping."""
|
||||
h, w = 300, 200
|
||||
img = _make_image_with_content(h, w, (50, 250, 40, 160))
|
||||
cropped, result = detect_and_crop_page(img)
|
||||
|
||||
if result["crop_applied"]:
|
||||
# Cropped image should contain dark pixels (content)
|
||||
gray = np.mean(cropped, axis=2)
|
||||
assert np.min(gray) < 50 # Content is dark
|
||||
|
||||
def test_result_structure(self):
|
||||
"""Verify all expected keys are present in result dict."""
|
||||
img = _make_white_image(100, 100)
|
||||
_, result = detect_and_crop_page(img)
|
||||
|
||||
assert "crop_applied" in result
|
||||
assert "original_size" in result
|
||||
assert "cropped_size" in result
|
||||
assert "border_fractions" in result
|
||||
assert "detected_format" in result
|
||||
assert "format_confidence" in result
|
||||
assert "aspect_ratio" in result
|
||||
|
||||
def test_margin_parameter(self):
|
||||
"""Custom margin_frac should affect crop bounds."""
|
||||
h, w = 400, 300
|
||||
img = _make_image_with_content(h, w, (80, 320, 60, 240))
|
||||
|
||||
_, result_small = detect_and_crop_page(img, margin_frac=0.005)
|
||||
_, result_large = detect_and_crop_page(img, margin_frac=0.05)
|
||||
|
||||
if result_small["crop_applied"] and result_large["crop_applied"]:
|
||||
# Larger margin should produce a larger crop
|
||||
small_area = result_small["cropped_size"]["width"] * result_small["cropped_size"]["height"]
|
||||
large_area = result_large["cropped_size"]["width"] * result_large["cropped_size"]["height"]
|
||||
assert large_area >= small_area
|
||||
|
||||
def test_crop_rect_pct_values(self):
|
||||
"""crop_rect_pct values should be in 0-100 range."""
|
||||
h, w = 400, 300
|
||||
img = _make_image_with_content(h, w, (80, 320, 60, 240))
|
||||
_, result = detect_and_crop_page(img)
|
||||
|
||||
if result["crop_applied"] and result["crop_rect_pct"]:
|
||||
pct = result["crop_rect_pct"]
|
||||
assert 0 <= pct["x"] <= 100
|
||||
assert 0 <= pct["y"] <= 100
|
||||
assert 0 < pct["width"] <= 100
|
||||
assert 0 < pct["height"] <= 100
|
||||
Reference in New Issue
Block a user