klausur-service (11 files): - cv_gutter_repair, ocr_pipeline_regression, upload_api - ocr_pipeline_sessions, smart_spell, nru_worksheet_generator - ocr_pipeline_overlays, mail/aggregator, zeugnis_api - cv_syllable_detect, self_rag backend-lehrer (17 files): - classroom_engine/suggestions, generators/quiz_generator - worksheets_api, llm_gateway/comparison, state_engine_api - classroom/models (→ 4 submodules), services/file_processor - alerts_agent/api/wizard+digests+routes, content_generators/pdf - classroom/routes/sessions, llm_gateway/inference - classroom_engine/analytics, auth/keycloak_auth - alerts_agent/processing/rule_engine, ai_processor/print_versions agent-core (5 files): - brain/memory_store, brain/knowledge_graph, brain/context_manager - orchestrator/supervisor, sessions/session_manager admin-lehrer (5 components): - GridOverlay, StepGridReview, DevOpsPipelineSidebar - DataFlowDiagram, sbom/wizard/page website (2 files): - DependencyMap, lehrer/abitur-archiv Other: nibis_ingestion, grid_detection_service, export-doclayout-onnx Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
49 lines
1.1 KiB
Python
49 lines
1.1 KiB
Python
"""
|
|
File Processor - Datenmodelle und Enums.
|
|
|
|
Typen fuer Dokumentenverarbeitung: Dateitypen, Modi, Ergebnisse.
|
|
"""
|
|
|
|
from typing import List, Dict, Any, Tuple
|
|
from dataclasses import dataclass
|
|
from enum import Enum
|
|
|
|
|
|
class FileType(str, Enum):
|
|
"""Unterstützte Dateitypen."""
|
|
PDF = "pdf"
|
|
IMAGE = "image"
|
|
DOCX = "docx"
|
|
DOC = "doc"
|
|
TXT = "txt"
|
|
UNKNOWN = "unknown"
|
|
|
|
|
|
class ProcessingMode(str, Enum):
|
|
"""Verarbeitungsmodi."""
|
|
OCR_HANDWRITING = "ocr_handwriting" # Handschrifterkennung
|
|
OCR_PRINTED = "ocr_printed" # Gedruckter Text
|
|
TEXT_EXTRACT = "text_extract" # Textextraktion (PDF/DOCX)
|
|
MIXED = "mixed" # Kombiniert OCR + Textextraktion
|
|
|
|
|
|
@dataclass
|
|
class ProcessedRegion:
|
|
"""Ein erkannter Textbereich."""
|
|
text: str
|
|
confidence: float
|
|
bbox: Tuple[int, int, int, int] # x1, y1, x2, y2
|
|
page: int = 1
|
|
|
|
|
|
@dataclass
|
|
class ProcessingResult:
|
|
"""Ergebnis der Dokumentenverarbeitung."""
|
|
text: str
|
|
confidence: float
|
|
regions: List[ProcessedRegion]
|
|
page_count: int
|
|
file_type: FileType
|
|
processing_mode: ProcessingMode
|
|
metadata: Dict[str, Any]
|