[split-required] [guardrail-change] Enforce 500 LOC budget across all services

Install LOC guardrails (check-loc.sh, architecture.md, pre-commit hook)
and split all 44 files exceeding 500 LOC into domain-focused modules:

- consent-service (Go): models, handlers, services, database splits
- backend-core (Python): security_api, rbac_api, pdf_service, auth splits
- admin-core (TypeScript): 5 page.tsx + sidebar extractions
- pitch-deck (TypeScript): 6 slides, 3 UI components, engine.ts splits
- voice-service (Python): enhanced_task_orchestrator split

Result: 0 violations, 36 exempted (pipeline, tests, pure-data files).
Go build verified clean. No behavior changes — pure structural splits.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-27 00:09:30 +02:00
parent 5ef039a6bc
commit 92c86ec6ba
162 changed files with 23853 additions and 23034 deletions

View File

@@ -5,7 +5,7 @@ Hybrid authentication supporting both Keycloak and local JWT tokens.
"""
from .keycloak_auth import (
# Config
# Config & Models
KeycloakConfig,
KeycloakUser,
@@ -18,7 +18,9 @@ from .keycloak_auth import (
TokenExpiredError,
TokenInvalidError,
KeycloakConfigError,
)
from .dependencies import (
# Factory functions
get_keycloak_config_from_env,
get_authenticator,
@@ -30,7 +32,7 @@ from .keycloak_auth import (
)
__all__ = [
# Config
# Config & Models
"KeycloakConfig",
"KeycloakUser",

View File

@@ -0,0 +1,164 @@
"""
FastAPI Authentication Dependencies and Factory Functions.
Provides:
- get_keycloak_config_from_env(): Create config from env vars
- get_authenticator(): Create HybridAuthenticator instance
- get_auth(): Global authenticator singleton
- get_current_user(): FastAPI dependency for authentication
- require_role(): FastAPI dependency factory for role-based access
"""
import os
import logging
from typing import Optional, Dict, Any
from fastapi import Request, HTTPException, Depends
from .keycloak_auth import (
KeycloakConfig,
KeycloakConfigError,
HybridAuthenticator,
TokenExpiredError,
TokenInvalidError,
)
logger = logging.getLogger(__name__)
# =============================================
# FACTORY FUNCTIONS
# =============================================
def get_keycloak_config_from_env() -> Optional[KeycloakConfig]:
"""
Create KeycloakConfig from environment variables.
Required env vars:
- KEYCLOAK_SERVER_URL: e.g., https://keycloak.breakpilot.app
- KEYCLOAK_REALM: e.g., breakpilot
- KEYCLOAK_CLIENT_ID: e.g., breakpilot-backend
Optional:
- KEYCLOAK_CLIENT_SECRET: For confidential clients
- KEYCLOAK_VERIFY_SSL: Default true
"""
server_url = os.environ.get("KEYCLOAK_SERVER_URL")
realm = os.environ.get("KEYCLOAK_REALM")
client_id = os.environ.get("KEYCLOAK_CLIENT_ID")
if not all([server_url, realm, client_id]):
logger.info("Keycloak not configured, using local JWT only")
return None
return KeycloakConfig(
server_url=server_url,
realm=realm,
client_id=client_id,
client_secret=os.environ.get("KEYCLOAK_CLIENT_SECRET"),
verify_ssl=os.environ.get("KEYCLOAK_VERIFY_SSL", "true").lower() == "true"
)
def get_authenticator() -> HybridAuthenticator:
"""
Get configured authenticator instance.
Uses environment variables to determine configuration.
"""
keycloak_config = get_keycloak_config_from_env()
# JWT_SECRET is required - no default fallback in production
jwt_secret = os.environ.get("JWT_SECRET")
environment = os.environ.get("ENVIRONMENT", "development")
if not jwt_secret and environment == "production":
raise KeycloakConfigError(
"JWT_SECRET environment variable is required in production"
)
return HybridAuthenticator(
keycloak_config=keycloak_config,
local_jwt_secret=jwt_secret,
environment=environment
)
# =============================================
# FASTAPI DEPENDENCY
# =============================================
# Global authenticator instance (lazy-initialized)
_authenticator: Optional[HybridAuthenticator] = None
def get_auth() -> HybridAuthenticator:
"""Get or create global authenticator."""
global _authenticator
if _authenticator is None:
_authenticator = get_authenticator()
return _authenticator
async def get_current_user(request: Request) -> Dict[str, Any]:
"""
FastAPI dependency to get current authenticated user.
Usage:
@app.get("/api/protected")
async def protected_endpoint(user: dict = Depends(get_current_user)):
return {"user_id": user["user_id"]}
"""
auth_header = request.headers.get("authorization", "")
if not auth_header.startswith("Bearer "):
# Check for development mode
environment = os.environ.get("ENVIRONMENT", "development")
if environment == "development":
# Return demo user in development without token
return {
"user_id": "10000000-0000-0000-0000-000000000024",
"email": "demo@breakpilot.app",
"role": "admin",
"realm_roles": ["admin"],
"tenant_id": "a0000000-0000-0000-0000-000000000001",
"auth_method": "development_bypass"
}
raise HTTPException(status_code=401, detail="Missing authorization header")
token = auth_header.split(" ")[1]
try:
auth = get_auth()
return await auth.validate_token(token)
except TokenExpiredError:
raise HTTPException(status_code=401, detail="Token expired")
except TokenInvalidError as e:
raise HTTPException(status_code=401, detail=str(e))
except Exception as e:
logger.error(f"Authentication failed: {e}")
raise HTTPException(status_code=401, detail="Authentication failed")
async def require_role(required_role: str):
"""
FastAPI dependency factory for role-based access.
Usage:
@app.get("/api/admin-only")
async def admin_endpoint(user: dict = Depends(require_role("admin"))):
return {"message": "Admin access granted"}
"""
async def role_checker(user: dict = Depends(get_current_user)) -> dict:
user_role = user.get("role", "user")
realm_roles = user.get("realm_roles", [])
if user_role == required_role or required_role in realm_roles:
return user
raise HTTPException(
status_code=403,
detail=f"Role '{required_role}' required"
)
return role_checker

View File

@@ -375,141 +375,12 @@ class HybridAuthenticator:
await self.keycloak_auth.close()
# =============================================
# FACTORY FUNCTIONS
# =============================================
def get_keycloak_config_from_env() -> Optional[KeycloakConfig]:
"""
Create KeycloakConfig from environment variables.
Required env vars:
- KEYCLOAK_SERVER_URL: e.g., https://keycloak.breakpilot.app
- KEYCLOAK_REALM: e.g., breakpilot
- KEYCLOAK_CLIENT_ID: e.g., breakpilot-backend
Optional:
- KEYCLOAK_CLIENT_SECRET: For confidential clients
- KEYCLOAK_VERIFY_SSL: Default true
"""
server_url = os.environ.get("KEYCLOAK_SERVER_URL")
realm = os.environ.get("KEYCLOAK_REALM")
client_id = os.environ.get("KEYCLOAK_CLIENT_ID")
if not all([server_url, realm, client_id]):
logger.info("Keycloak not configured, using local JWT only")
return None
return KeycloakConfig(
server_url=server_url,
realm=realm,
client_id=client_id,
client_secret=os.environ.get("KEYCLOAK_CLIENT_SECRET"),
verify_ssl=os.environ.get("KEYCLOAK_VERIFY_SSL", "true").lower() == "true"
)
def get_authenticator() -> HybridAuthenticator:
"""
Get configured authenticator instance.
Uses environment variables to determine configuration.
"""
keycloak_config = get_keycloak_config_from_env()
# JWT_SECRET is required - no default fallback in production
jwt_secret = os.environ.get("JWT_SECRET")
environment = os.environ.get("ENVIRONMENT", "development")
if not jwt_secret and environment == "production":
raise KeycloakConfigError(
"JWT_SECRET environment variable is required in production"
)
return HybridAuthenticator(
keycloak_config=keycloak_config,
local_jwt_secret=jwt_secret,
environment=environment
)
# =============================================
# FASTAPI DEPENDENCY
# =============================================
from fastapi import Request, HTTPException, Depends
# Global authenticator instance (lazy-initialized)
_authenticator: Optional[HybridAuthenticator] = None
def get_auth() -> HybridAuthenticator:
"""Get or create global authenticator."""
global _authenticator
if _authenticator is None:
_authenticator = get_authenticator()
return _authenticator
async def get_current_user(request: Request) -> Dict[str, Any]:
"""
FastAPI dependency to get current authenticated user.
Usage:
@app.get("/api/protected")
async def protected_endpoint(user: dict = Depends(get_current_user)):
return {"user_id": user["user_id"]}
"""
auth_header = request.headers.get("authorization", "")
if not auth_header.startswith("Bearer "):
# Check for development mode
environment = os.environ.get("ENVIRONMENT", "development")
if environment == "development":
# Return demo user in development without token
return {
"user_id": "10000000-0000-0000-0000-000000000024",
"email": "demo@breakpilot.app",
"role": "admin",
"realm_roles": ["admin"],
"tenant_id": "a0000000-0000-0000-0000-000000000001",
"auth_method": "development_bypass"
}
raise HTTPException(status_code=401, detail="Missing authorization header")
token = auth_header.split(" ")[1]
try:
auth = get_auth()
return await auth.validate_token(token)
except TokenExpiredError:
raise HTTPException(status_code=401, detail="Token expired")
except TokenInvalidError as e:
raise HTTPException(status_code=401, detail=str(e))
except Exception as e:
logger.error(f"Authentication failed: {e}")
raise HTTPException(status_code=401, detail="Authentication failed")
async def require_role(required_role: str):
"""
FastAPI dependency factory for role-based access.
Usage:
@app.get("/api/admin-only")
async def admin_endpoint(user: dict = Depends(require_role("admin"))):
return {"message": "Admin access granted"}
"""
async def role_checker(user: dict = Depends(get_current_user)) -> dict:
user_role = user.get("role", "user")
realm_roles = user.get("realm_roles", [])
if user_role == required_role or required_role in realm_roles:
return user
raise HTTPException(
status_code=403,
detail=f"Role '{required_role}' required"
)
return role_checker
# Re-export factory functions and FastAPI dependencies from dependencies module
# for backward compatibility with existing imports
from .dependencies import ( # noqa: E402, F401
get_keycloak_config_from_env,
get_authenticator,
get_auth,
get_current_user,
require_role,
)

View File

@@ -18,6 +18,7 @@ from fastapi.middleware.cors import CORSMiddleware
# ---------------------------------------------------------------------------
from auth_api import router as auth_router
from rbac_api import router as rbac_router
from rbac_teachers_api import router as rbac_teachers_router
from notification_api import router as notification_router
from email_template_api import (
router as email_template_router,
@@ -89,9 +90,12 @@ app.add_middleware(RateLimiterMiddleware, valkey_url=VALKEY_URL)
# Auth (proxy to consent-service)
app.include_router(auth_router, prefix="/api")
# RBAC (teacher / role management)
# RBAC (role / assignment / custom-role management)
app.include_router(rbac_router, prefix="/api")
# RBAC Teachers (teacher CRUD, listing, roles per teacher)
app.include_router(rbac_teachers_router, prefix="/api")
# Notifications (proxy to consent-service)
app.include_router(notification_router, prefix="/api")

View File

@@ -1,11 +1,14 @@
"""
RBAC API - Teacher and Role Management Endpoints
RBAC API - Role and Assignment Management Endpoints
Provides API endpoints for:
- Listing all teachers
- Listing all available roles
- Assigning/revoking roles to teachers
- Viewing role assignments per teacher
- Listing all available roles (built-in + custom)
- Assigning/revoking roles to users
- Role summary with assignment counts
- Custom role CRUD
Shared infrastructure (DB pool, Pydantic models, role definitions)
used by rbac_teachers_api.py as well.
Architecture:
- Authentication: Keycloak (when configured) or local JWT
@@ -230,163 +233,6 @@ async def list_available_roles() -> List[RoleInfo]:
]
@router.get("/teachers")
async def list_teachers(user: Dict[str, Any] = Depends(get_current_user)) -> List[TeacherResponse]:
"""List all teachers with their current roles"""
pool = await get_pool()
async with pool.acquire() as conn:
# Get all teachers with their user info
teachers = await conn.fetch("""
SELECT
t.id, t.user_id, t.teacher_code, t.title,
t.first_name, t.last_name, t.is_active,
u.email, u.name
FROM teachers t
JOIN users u ON t.user_id = u.id
WHERE t.school_id = 'a0000000-0000-0000-0000-000000000001'
ORDER BY t.last_name, t.first_name
""")
# Get role assignments for all teachers
role_assignments = await conn.fetch("""
SELECT user_id, role
FROM role_assignments
WHERE tenant_id = 'a0000000-0000-0000-0000-000000000001'
AND revoked_at IS NULL
AND (valid_to IS NULL OR valid_to > NOW())
""")
# Build role lookup
role_lookup: Dict[str, List[str]] = {}
for ra in role_assignments:
uid = str(ra["user_id"])
if uid not in role_lookup:
role_lookup[uid] = []
role_lookup[uid].append(ra["role"])
# Build response
result = []
for t in teachers:
uid = str(t["user_id"])
result.append(TeacherResponse(
id=str(t["id"]),
user_id=uid,
email=t["email"],
name=t["name"] or f"{t['first_name']} {t['last_name']}",
teacher_code=t["teacher_code"],
title=t["title"],
first_name=t["first_name"],
last_name=t["last_name"],
is_active=t["is_active"],
roles=role_lookup.get(uid, [])
))
return result
@router.get("/teachers/{teacher_id}/roles")
async def get_teacher_roles(teacher_id: str, user: Dict[str, Any] = Depends(get_current_user)) -> List[RoleAssignmentResponse]:
"""Get all role assignments for a specific teacher"""
pool = await get_pool()
async with pool.acquire() as conn:
# Get teacher's user_id
teacher = await conn.fetchrow(
"SELECT user_id FROM teachers WHERE id = $1",
teacher_id
)
if not teacher:
raise HTTPException(status_code=404, detail="Teacher not found")
# Get role assignments
assignments = await conn.fetch("""
SELECT id, user_id, role, resource_type, resource_id,
valid_from, valid_to, granted_at, revoked_at
FROM role_assignments
WHERE user_id = $1
ORDER BY granted_at DESC
""", teacher["user_id"])
return [
RoleAssignmentResponse(
id=str(a["id"]),
user_id=str(a["user_id"]),
role=a["role"],
resource_type=a["resource_type"],
resource_id=str(a["resource_id"]),
valid_from=a["valid_from"].isoformat() if a["valid_from"] else None,
valid_to=a["valid_to"].isoformat() if a["valid_to"] else None,
granted_at=a["granted_at"].isoformat() if a["granted_at"] else None,
is_active=a["revoked_at"] is None and (
a["valid_to"] is None or a["valid_to"] > datetime.now(timezone.utc)
)
)
for a in assignments
]
@router.get("/roles/{role}/teachers")
async def get_teachers_by_role(role: str, user: Dict[str, Any] = Depends(get_current_user)) -> List[TeacherResponse]:
"""Get all teachers with a specific role"""
if role not in AVAILABLE_ROLES:
raise HTTPException(status_code=400, detail=f"Unknown role: {role}")
pool = await get_pool()
async with pool.acquire() as conn:
teachers = await conn.fetch("""
SELECT DISTINCT
t.id, t.user_id, t.teacher_code, t.title,
t.first_name, t.last_name, t.is_active,
u.email, u.name
FROM teachers t
JOIN users u ON t.user_id = u.id
JOIN role_assignments ra ON t.user_id = ra.user_id
WHERE ra.role = $1
AND ra.revoked_at IS NULL
AND (ra.valid_to IS NULL OR ra.valid_to > NOW())
AND t.school_id = 'a0000000-0000-0000-0000-000000000001'
ORDER BY t.last_name, t.first_name
""", role)
# Get all roles for these teachers
if teachers:
user_ids = [t["user_id"] for t in teachers]
role_assignments = await conn.fetch("""
SELECT user_id, role
FROM role_assignments
WHERE user_id = ANY($1)
AND revoked_at IS NULL
AND (valid_to IS NULL OR valid_to > NOW())
""", user_ids)
role_lookup: Dict[str, List[str]] = {}
for ra in role_assignments:
uid = str(ra["user_id"])
if uid not in role_lookup:
role_lookup[uid] = []
role_lookup[uid].append(ra["role"])
else:
role_lookup = {}
return [
TeacherResponse(
id=str(t["id"]),
user_id=str(t["user_id"]),
email=t["email"],
name=t["name"] or f"{t['first_name']} {t['last_name']}",
teacher_code=t["teacher_code"],
title=t["title"],
first_name=t["first_name"],
last_name=t["last_name"],
is_active=t["is_active"],
roles=role_lookup.get(str(t["user_id"]), [])
)
for t in teachers
]
@router.post("/assignments")
async def assign_role(assignment: RoleAssignmentCreate, user: Dict[str, Any] = Depends(get_current_user)) -> RoleAssignmentResponse:
"""Assign a role to a user"""
@@ -519,178 +365,6 @@ async def get_role_summary(user: Dict[str, Any] = Depends(get_current_user)) ->
}
# ==========================================
# TEACHER MANAGEMENT ENDPOINTS
# ==========================================
@router.post("/teachers")
async def create_teacher(teacher: TeacherCreate, user: Dict[str, Any] = Depends(get_current_user)) -> TeacherResponse:
"""Create a new teacher with optional initial roles"""
pool = await get_pool()
import uuid
async with pool.acquire() as conn:
# Check if email already exists
existing = await conn.fetchrow(
"SELECT id FROM users WHERE email = $1",
teacher.email
)
if existing:
raise HTTPException(status_code=409, detail="Email already exists")
# Generate UUIDs
user_id = str(uuid.uuid4())
teacher_id = str(uuid.uuid4())
# Create user first
await conn.execute("""
INSERT INTO users (id, email, name, password_hash, role, is_active)
VALUES ($1, $2, $3, '', 'teacher', true)
""", user_id, teacher.email, f"{teacher.first_name} {teacher.last_name}")
# Create teacher record
await conn.execute("""
INSERT INTO teachers (id, user_id, school_id, first_name, last_name, teacher_code, title, is_active)
VALUES ($1, $2, 'a0000000-0000-0000-0000-000000000001', $3, $4, $5, $6, true)
""", teacher_id, user_id, teacher.first_name, teacher.last_name,
teacher.teacher_code, teacher.title)
# Assign initial roles if provided
assigned_roles = []
for role in teacher.roles:
if role in AVAILABLE_ROLES or await conn.fetchrow(
"SELECT 1 FROM custom_roles WHERE role_key = $1 AND is_active = true", role
):
await conn.execute("""
INSERT INTO role_assignments (user_id, role, resource_type, resource_id, tenant_id, granted_by)
VALUES ($1, $2, 'tenant', 'a0000000-0000-0000-0000-000000000001',
'a0000000-0000-0000-0000-000000000001', $3)
""", user_id, role, user.get("user_id"))
assigned_roles.append(role)
return TeacherResponse(
id=teacher_id,
user_id=user_id,
email=teacher.email,
name=f"{teacher.first_name} {teacher.last_name}",
teacher_code=teacher.teacher_code,
title=teacher.title,
first_name=teacher.first_name,
last_name=teacher.last_name,
is_active=True,
roles=assigned_roles
)
@router.put("/teachers/{teacher_id}")
async def update_teacher(teacher_id: str, updates: TeacherUpdate, user: Dict[str, Any] = Depends(get_current_user)) -> TeacherResponse:
"""Update teacher information"""
pool = await get_pool()
async with pool.acquire() as conn:
# Get current teacher data
teacher = await conn.fetchrow("""
SELECT t.id, t.user_id, t.teacher_code, t.title, t.first_name, t.last_name, t.is_active,
u.email, u.name
FROM teachers t
JOIN users u ON t.user_id = u.id
WHERE t.id = $1
""", teacher_id)
if not teacher:
raise HTTPException(status_code=404, detail="Teacher not found")
# Build update queries
if updates.email:
await conn.execute("UPDATE users SET email = $1 WHERE id = $2",
updates.email, teacher["user_id"])
teacher_updates = []
teacher_values = []
idx = 1
if updates.first_name:
teacher_updates.append(f"first_name = ${idx}")
teacher_values.append(updates.first_name)
idx += 1
if updates.last_name:
teacher_updates.append(f"last_name = ${idx}")
teacher_values.append(updates.last_name)
idx += 1
if updates.teacher_code is not None:
teacher_updates.append(f"teacher_code = ${idx}")
teacher_values.append(updates.teacher_code)
idx += 1
if updates.title is not None:
teacher_updates.append(f"title = ${idx}")
teacher_values.append(updates.title)
idx += 1
if updates.is_active is not None:
teacher_updates.append(f"is_active = ${idx}")
teacher_values.append(updates.is_active)
idx += 1
if teacher_updates:
teacher_values.append(teacher_id)
await conn.execute(
f"UPDATE teachers SET {', '.join(teacher_updates)} WHERE id = ${idx}",
*teacher_values
)
# Update user name if first/last name changed
if updates.first_name or updates.last_name:
new_first = updates.first_name or teacher["first_name"]
new_last = updates.last_name or teacher["last_name"]
await conn.execute("UPDATE users SET name = $1 WHERE id = $2",
f"{new_first} {new_last}", teacher["user_id"])
# Fetch updated data
updated = await conn.fetchrow("""
SELECT t.id, t.user_id, t.teacher_code, t.title, t.first_name, t.last_name, t.is_active,
u.email, u.name
FROM teachers t
JOIN users u ON t.user_id = u.id
WHERE t.id = $1
""", teacher_id)
# Get roles
roles = await conn.fetch("""
SELECT role FROM role_assignments
WHERE user_id = $1 AND revoked_at IS NULL
AND (valid_to IS NULL OR valid_to > NOW())
""", updated["user_id"])
return TeacherResponse(
id=str(updated["id"]),
user_id=str(updated["user_id"]),
email=updated["email"],
name=updated["name"],
teacher_code=updated["teacher_code"],
title=updated["title"],
first_name=updated["first_name"],
last_name=updated["last_name"],
is_active=updated["is_active"],
roles=[r["role"] for r in roles]
)
@router.delete("/teachers/{teacher_id}")
async def deactivate_teacher(teacher_id: str, user: Dict[str, Any] = Depends(get_current_user)):
"""Deactivate a teacher (soft delete)"""
pool = await get_pool()
async with pool.acquire() as conn:
result = await conn.execute("""
UPDATE teachers SET is_active = false WHERE id = $1
""", teacher_id)
if result == "UPDATE 0":
raise HTTPException(status_code=404, detail="Teacher not found")
return {"status": "deactivated", "teacher_id": teacher_id}
# ==========================================
# CUSTOM ROLE MANAGEMENT ENDPOINTS
# ==========================================

View File

@@ -0,0 +1,358 @@
"""
RBAC Teachers API - Teacher Management Endpoints
Provides API endpoints for:
- Listing all teachers with roles
- Getting teacher roles
- Getting teachers by role
- Creating, updating, deactivating teachers
Split from rbac_api.py for file-size compliance.
"""
import uuid
from datetime import datetime, timezone
from typing import Dict, Any, List
from fastapi import APIRouter, HTTPException, Depends
from rbac_api import (
get_pool,
get_current_user,
TeacherCreate,
TeacherUpdate,
TeacherResponse,
RoleAssignmentResponse,
AVAILABLE_ROLES,
)
router = APIRouter(prefix="/rbac", tags=["rbac"])
def _build_teacher_response(teacher_row, roles: List[str]) -> TeacherResponse:
"""Build a TeacherResponse from a DB row and a list of role strings."""
return TeacherResponse(
id=str(teacher_row["id"]),
user_id=str(teacher_row["user_id"]),
email=teacher_row["email"],
name=teacher_row["name"] or f"{teacher_row['first_name']} {teacher_row['last_name']}",
teacher_code=teacher_row["teacher_code"],
title=teacher_row["title"],
first_name=teacher_row["first_name"],
last_name=teacher_row["last_name"],
is_active=teacher_row["is_active"],
roles=roles,
)
def _build_role_lookup(role_assignments) -> Dict[str, List[str]]:
"""Build a user_id -> [roles] lookup from role assignment rows."""
role_lookup: Dict[str, List[str]] = {}
for ra in role_assignments:
uid = str(ra["user_id"])
if uid not in role_lookup:
role_lookup[uid] = []
role_lookup[uid].append(ra["role"])
return role_lookup
# ==========================================
# TEACHER LISTING / QUERY ENDPOINTS
# ==========================================
@router.get("/teachers")
async def list_teachers(
user: Dict[str, Any] = Depends(get_current_user),
) -> List[TeacherResponse]:
"""List all teachers with their current roles"""
pool = await get_pool()
async with pool.acquire() as conn:
teachers = await conn.fetch("""
SELECT
t.id, t.user_id, t.teacher_code, t.title,
t.first_name, t.last_name, t.is_active,
u.email, u.name
FROM teachers t
JOIN users u ON t.user_id = u.id
WHERE t.school_id = 'a0000000-0000-0000-0000-000000000001'
ORDER BY t.last_name, t.first_name
""")
role_assignments = await conn.fetch("""
SELECT user_id, role
FROM role_assignments
WHERE tenant_id = 'a0000000-0000-0000-0000-000000000001'
AND revoked_at IS NULL
AND (valid_to IS NULL OR valid_to > NOW())
""")
role_lookup = _build_role_lookup(role_assignments)
return [
_build_teacher_response(t, role_lookup.get(str(t["user_id"]), []))
for t in teachers
]
@router.get("/teachers/{teacher_id}/roles")
async def get_teacher_roles(
teacher_id: str,
user: Dict[str, Any] = Depends(get_current_user),
) -> List[RoleAssignmentResponse]:
"""Get all role assignments for a specific teacher"""
pool = await get_pool()
async with pool.acquire() as conn:
teacher = await conn.fetchrow(
"SELECT user_id FROM teachers WHERE id = $1",
teacher_id,
)
if not teacher:
raise HTTPException(status_code=404, detail="Teacher not found")
assignments = await conn.fetch("""
SELECT id, user_id, role, resource_type, resource_id,
valid_from, valid_to, granted_at, revoked_at
FROM role_assignments
WHERE user_id = $1
ORDER BY granted_at DESC
""", teacher["user_id"])
return [
RoleAssignmentResponse(
id=str(a["id"]),
user_id=str(a["user_id"]),
role=a["role"],
resource_type=a["resource_type"],
resource_id=str(a["resource_id"]),
valid_from=a["valid_from"].isoformat() if a["valid_from"] else None,
valid_to=a["valid_to"].isoformat() if a["valid_to"] else None,
granted_at=a["granted_at"].isoformat() if a["granted_at"] else None,
is_active=a["revoked_at"] is None and (
a["valid_to"] is None or a["valid_to"] > datetime.now(timezone.utc)
),
)
for a in assignments
]
@router.get("/roles/{role}/teachers")
async def get_teachers_by_role(
role: str,
user: Dict[str, Any] = Depends(get_current_user),
) -> List[TeacherResponse]:
"""Get all teachers with a specific role"""
if role not in AVAILABLE_ROLES:
raise HTTPException(status_code=400, detail=f"Unknown role: {role}")
pool = await get_pool()
async with pool.acquire() as conn:
teachers = await conn.fetch("""
SELECT DISTINCT
t.id, t.user_id, t.teacher_code, t.title,
t.first_name, t.last_name, t.is_active,
u.email, u.name
FROM teachers t
JOIN users u ON t.user_id = u.id
JOIN role_assignments ra ON t.user_id = ra.user_id
WHERE ra.role = $1
AND ra.revoked_at IS NULL
AND (ra.valid_to IS NULL OR ra.valid_to > NOW())
AND t.school_id = 'a0000000-0000-0000-0000-000000000001'
ORDER BY t.last_name, t.first_name
""", role)
if teachers:
user_ids = [t["user_id"] for t in teachers]
role_assignments = await conn.fetch("""
SELECT user_id, role
FROM role_assignments
WHERE user_id = ANY($1)
AND revoked_at IS NULL
AND (valid_to IS NULL OR valid_to > NOW())
""", user_ids)
role_lookup = _build_role_lookup(role_assignments)
else:
role_lookup = {}
return [
_build_teacher_response(t, role_lookup.get(str(t["user_id"]), []))
for t in teachers
]
# ==========================================
# TEACHER CRUD ENDPOINTS
# ==========================================
@router.post("/teachers")
async def create_teacher(
teacher: TeacherCreate,
user: Dict[str, Any] = Depends(get_current_user),
) -> TeacherResponse:
"""Create a new teacher with optional initial roles"""
pool = await get_pool()
async with pool.acquire() as conn:
existing = await conn.fetchrow(
"SELECT id FROM users WHERE email = $1",
teacher.email,
)
if existing:
raise HTTPException(status_code=409, detail="Email already exists")
user_id = str(uuid.uuid4())
teacher_id = str(uuid.uuid4())
await conn.execute("""
INSERT INTO users (id, email, name, password_hash, role, is_active)
VALUES ($1, $2, $3, '', 'teacher', true)
""", user_id, teacher.email, f"{teacher.first_name} {teacher.last_name}")
await conn.execute("""
INSERT INTO teachers (id, user_id, school_id, first_name, last_name, teacher_code, title, is_active)
VALUES ($1, $2, 'a0000000-0000-0000-0000-000000000001', $3, $4, $5, $6, true)
""", teacher_id, user_id, teacher.first_name, teacher.last_name,
teacher.teacher_code, teacher.title)
assigned_roles = []
for role in teacher.roles:
if role in AVAILABLE_ROLES or await conn.fetchrow(
"SELECT 1 FROM custom_roles WHERE role_key = $1 AND is_active = true", role
):
await conn.execute("""
INSERT INTO role_assignments (user_id, role, resource_type, resource_id, tenant_id, granted_by)
VALUES ($1, $2, 'tenant', 'a0000000-0000-0000-0000-000000000001',
'a0000000-0000-0000-0000-000000000001', $3)
""", user_id, role, user.get("user_id"))
assigned_roles.append(role)
return TeacherResponse(
id=teacher_id,
user_id=user_id,
email=teacher.email,
name=f"{teacher.first_name} {teacher.last_name}",
teacher_code=teacher.teacher_code,
title=teacher.title,
first_name=teacher.first_name,
last_name=teacher.last_name,
is_active=True,
roles=assigned_roles,
)
@router.put("/teachers/{teacher_id}")
async def update_teacher(
teacher_id: str,
updates: TeacherUpdate,
user: Dict[str, Any] = Depends(get_current_user),
) -> TeacherResponse:
"""Update teacher information"""
pool = await get_pool()
async with pool.acquire() as conn:
teacher = await conn.fetchrow("""
SELECT t.id, t.user_id, t.teacher_code, t.title, t.first_name, t.last_name, t.is_active,
u.email, u.name
FROM teachers t
JOIN users u ON t.user_id = u.id
WHERE t.id = $1
""", teacher_id)
if not teacher:
raise HTTPException(status_code=404, detail="Teacher not found")
if updates.email:
await conn.execute(
"UPDATE users SET email = $1 WHERE id = $2",
updates.email, teacher["user_id"],
)
teacher_updates = []
teacher_values = []
idx = 1
if updates.first_name:
teacher_updates.append(f"first_name = ${idx}")
teacher_values.append(updates.first_name)
idx += 1
if updates.last_name:
teacher_updates.append(f"last_name = ${idx}")
teacher_values.append(updates.last_name)
idx += 1
if updates.teacher_code is not None:
teacher_updates.append(f"teacher_code = ${idx}")
teacher_values.append(updates.teacher_code)
idx += 1
if updates.title is not None:
teacher_updates.append(f"title = ${idx}")
teacher_values.append(updates.title)
idx += 1
if updates.is_active is not None:
teacher_updates.append(f"is_active = ${idx}")
teacher_values.append(updates.is_active)
idx += 1
if teacher_updates:
teacher_values.append(teacher_id)
await conn.execute(
f"UPDATE teachers SET {', '.join(teacher_updates)} WHERE id = ${idx}",
*teacher_values,
)
if updates.first_name or updates.last_name:
new_first = updates.first_name or teacher["first_name"]
new_last = updates.last_name or teacher["last_name"]
await conn.execute(
"UPDATE users SET name = $1 WHERE id = $2",
f"{new_first} {new_last}", teacher["user_id"],
)
updated = await conn.fetchrow("""
SELECT t.id, t.user_id, t.teacher_code, t.title, t.first_name, t.last_name, t.is_active,
u.email, u.name
FROM teachers t
JOIN users u ON t.user_id = u.id
WHERE t.id = $1
""", teacher_id)
roles = await conn.fetch("""
SELECT role FROM role_assignments
WHERE user_id = $1 AND revoked_at IS NULL
AND (valid_to IS NULL OR valid_to > NOW())
""", updated["user_id"])
return TeacherResponse(
id=str(updated["id"]),
user_id=str(updated["user_id"]),
email=updated["email"],
name=updated["name"],
teacher_code=updated["teacher_code"],
title=updated["title"],
first_name=updated["first_name"],
last_name=updated["last_name"],
is_active=updated["is_active"],
roles=[r["role"] for r in roles],
)
@router.delete("/teachers/{teacher_id}")
async def deactivate_teacher(
teacher_id: str,
user: Dict[str, Any] = Depends(get_current_user),
):
"""Deactivate a teacher (soft delete)"""
pool = await get_pool()
async with pool.acquire() as conn:
result = await conn.execute("""
UPDATE teachers SET is_active = false WHERE id = $1
""", teacher_id)
if result == "UPDATE 0":
raise HTTPException(status_code=404, detail="Teacher not found")
return {"status": "deactivated", "teacher_id": teacher_id}

View File

@@ -13,312 +13,47 @@ Features:
- Fuehrt Security-Scans via subprocess aus
- Parst Gitleaks, Semgrep, Trivy, Grype JSON-Reports
- Generiert SBOM mit Syft
Split structure:
- security_models.py — Pydantic models
- security_report_parsers.py — Report parsing, tool detection, aggregation
- security_mock_data.py — Mock data generators + /demo/* endpoints
- security_monitoring.py — /monitoring/* endpoints (logs, metrics, containers)
"""
import os
import json
import subprocess
import asyncio
from datetime import datetime
from pathlib import Path
from typing import List, Dict, Any, Optional
from typing import List, Optional
from fastapi import APIRouter, HTTPException, BackgroundTasks
from pydantic import BaseModel
from security_models import (
ToolStatus,
Finding,
SeveritySummary,
HistoryItem,
)
from security_report_parsers import (
REPORTS_DIR,
PROJECT_ROOT,
check_tool_installed,
get_latest_report,
get_all_findings,
calculate_summary,
)
from security_mock_data import (
get_mock_findings,
get_mock_sbom_data,
get_mock_history,
router as mock_data_router,
)
from security_monitoring import router as monitoring_router
router = APIRouter(prefix="/v1/security", tags=["Security"])
# Pfade - innerhalb des Backend-Verzeichnisses
# In Docker: /app/security-reports, /app/scripts
# Lokal: backend/security-reports, backend/scripts
BACKEND_DIR = Path(__file__).parent
REPORTS_DIR = BACKEND_DIR / "security-reports"
SCRIPTS_DIR = BACKEND_DIR / "scripts"
# Projekt-Root fuer Security-Scans
PROJECT_ROOT = BACKEND_DIR
# Sicherstellen, dass das Reports-Verzeichnis existiert
try:
REPORTS_DIR.mkdir(exist_ok=True)
except PermissionError:
# Falls keine Schreibrechte, verwende tmp-Verzeichnis
REPORTS_DIR = Path("/tmp/security-reports")
REPORTS_DIR.mkdir(exist_ok=True)
# ===========================
# Pydantic Models
# ===========================
class ToolStatus(BaseModel):
name: str
installed: bool
version: Optional[str] = None
last_run: Optional[str] = None
last_findings: int = 0
class Finding(BaseModel):
id: str
tool: str
severity: str
title: str
message: Optional[str] = None
file: Optional[str] = None
line: Optional[int] = None
found_at: str
class SeveritySummary(BaseModel):
critical: int = 0
high: int = 0
medium: int = 0
low: int = 0
info: int = 0
total: int = 0
class ScanResult(BaseModel):
tool: str
status: str
started_at: str
completed_at: Optional[str] = None
findings_count: int = 0
report_path: Optional[str] = None
class HistoryItem(BaseModel):
timestamp: str
title: str
description: str
status: str # success, warning, error
# ===========================
# Utility Functions
# ===========================
def check_tool_installed(tool_name: str) -> tuple[bool, Optional[str]]:
"""Prueft, ob ein Tool installiert ist und gibt die Version zurueck."""
try:
if tool_name == "gitleaks":
result = subprocess.run(["gitleaks", "version"], capture_output=True, text=True, timeout=5)
if result.returncode == 0:
return True, result.stdout.strip()
elif tool_name == "semgrep":
result = subprocess.run(["semgrep", "--version"], capture_output=True, text=True, timeout=5)
if result.returncode == 0:
return True, result.stdout.strip().split('\n')[0]
elif tool_name == "bandit":
result = subprocess.run(["bandit", "--version"], capture_output=True, text=True, timeout=5)
if result.returncode == 0:
return True, result.stdout.strip()
elif tool_name == "trivy":
result = subprocess.run(["trivy", "version"], capture_output=True, text=True, timeout=5)
if result.returncode == 0:
# Parse "Version: 0.48.x"
for line in result.stdout.split('\n'):
if line.startswith('Version:'):
return True, line.split(':')[1].strip()
return True, result.stdout.strip().split('\n')[0]
elif tool_name == "grype":
result = subprocess.run(["grype", "version"], capture_output=True, text=True, timeout=5)
if result.returncode == 0:
return True, result.stdout.strip().split('\n')[0]
elif tool_name == "syft":
result = subprocess.run(["syft", "version"], capture_output=True, text=True, timeout=5)
if result.returncode == 0:
return True, result.stdout.strip().split('\n')[0]
except (subprocess.TimeoutExpired, FileNotFoundError):
pass
return False, None
def get_latest_report(tool_prefix: str) -> Optional[Path]:
"""Findet den neuesten Report fuer ein Tool."""
if not REPORTS_DIR.exists():
return None
reports = list(REPORTS_DIR.glob(f"{tool_prefix}*.json"))
if not reports:
return None
return max(reports, key=lambda p: p.stat().st_mtime)
def parse_gitleaks_report(report_path: Path) -> List[Finding]:
"""Parst Gitleaks JSON Report."""
findings = []
try:
with open(report_path) as f:
data = json.load(f)
if isinstance(data, list):
for item in data:
findings.append(Finding(
id=item.get("Fingerprint", "unknown"),
tool="gitleaks",
severity="HIGH", # Secrets sind immer kritisch
title=item.get("Description", "Secret detected"),
message=f"Rule: {item.get('RuleID', 'unknown')}",
file=item.get("File", ""),
line=item.get("StartLine", 0),
found_at=datetime.fromtimestamp(report_path.stat().st_mtime).isoformat()
))
except (json.JSONDecodeError, KeyError, FileNotFoundError):
pass
return findings
def parse_semgrep_report(report_path: Path) -> List[Finding]:
"""Parst Semgrep JSON Report."""
findings = []
try:
with open(report_path) as f:
data = json.load(f)
results = data.get("results", [])
for item in results:
severity = item.get("extra", {}).get("severity", "INFO").upper()
findings.append(Finding(
id=item.get("check_id", "unknown"),
tool="semgrep",
severity=severity,
title=item.get("extra", {}).get("message", "Finding"),
message=item.get("check_id", ""),
file=item.get("path", ""),
line=item.get("start", {}).get("line", 0),
found_at=datetime.fromtimestamp(report_path.stat().st_mtime).isoformat()
))
except (json.JSONDecodeError, KeyError, FileNotFoundError):
pass
return findings
def parse_bandit_report(report_path: Path) -> List[Finding]:
"""Parst Bandit JSON Report."""
findings = []
try:
with open(report_path) as f:
data = json.load(f)
results = data.get("results", [])
for item in results:
severity = item.get("issue_severity", "LOW").upper()
findings.append(Finding(
id=item.get("test_id", "unknown"),
tool="bandit",
severity=severity,
title=item.get("issue_text", "Finding"),
message=f"CWE: {item.get('issue_cwe', {}).get('id', 'N/A')}",
file=item.get("filename", ""),
line=item.get("line_number", 0),
found_at=datetime.fromtimestamp(report_path.stat().st_mtime).isoformat()
))
except (json.JSONDecodeError, KeyError, FileNotFoundError):
pass
return findings
def parse_trivy_report(report_path: Path) -> List[Finding]:
"""Parst Trivy JSON Report."""
findings = []
try:
with open(report_path) as f:
data = json.load(f)
results = data.get("Results", [])
for result in results:
vulnerabilities = result.get("Vulnerabilities", []) or []
target = result.get("Target", "")
for vuln in vulnerabilities:
severity = vuln.get("Severity", "UNKNOWN").upper()
findings.append(Finding(
id=vuln.get("VulnerabilityID", "unknown"),
tool="trivy",
severity=severity,
title=vuln.get("Title", vuln.get("VulnerabilityID", "CVE")),
message=f"{vuln.get('PkgName', '')} {vuln.get('InstalledVersion', '')}",
file=target,
line=None,
found_at=datetime.fromtimestamp(report_path.stat().st_mtime).isoformat()
))
except (json.JSONDecodeError, KeyError, FileNotFoundError):
pass
return findings
def parse_grype_report(report_path: Path) -> List[Finding]:
"""Parst Grype JSON Report."""
findings = []
try:
with open(report_path) as f:
data = json.load(f)
matches = data.get("matches", [])
for match in matches:
vuln = match.get("vulnerability", {})
artifact = match.get("artifact", {})
severity = vuln.get("severity", "Unknown").upper()
findings.append(Finding(
id=vuln.get("id", "unknown"),
tool="grype",
severity=severity,
title=vuln.get("description", vuln.get("id", "CVE"))[:100],
message=f"{artifact.get('name', '')} {artifact.get('version', '')}",
file=artifact.get("locations", [{}])[0].get("path", "") if artifact.get("locations") else "",
line=None,
found_at=datetime.fromtimestamp(report_path.stat().st_mtime).isoformat()
))
except (json.JSONDecodeError, KeyError, FileNotFoundError):
pass
return findings
def get_all_findings() -> List[Finding]:
"""Sammelt alle Findings aus allen Reports."""
findings = []
# Gitleaks
gitleaks_report = get_latest_report("gitleaks")
if gitleaks_report:
findings.extend(parse_gitleaks_report(gitleaks_report))
# Semgrep
semgrep_report = get_latest_report("semgrep")
if semgrep_report:
findings.extend(parse_semgrep_report(semgrep_report))
# Bandit
bandit_report = get_latest_report("bandit")
if bandit_report:
findings.extend(parse_bandit_report(bandit_report))
# Trivy (filesystem)
trivy_fs_report = get_latest_report("trivy-fs")
if trivy_fs_report:
findings.extend(parse_trivy_report(trivy_fs_report))
# Grype
grype_report = get_latest_report("grype")
if grype_report:
findings.extend(parse_grype_report(grype_report))
return findings
def calculate_summary(findings: List[Finding]) -> SeveritySummary:
"""Berechnet die Severity-Zusammenfassung."""
summary = SeveritySummary()
for finding in findings:
severity = finding.severity.upper()
if severity == "CRITICAL":
summary.critical += 1
elif severity == "HIGH":
summary.high += 1
elif severity == "MEDIUM":
summary.medium += 1
elif severity == "LOW":
summary.low += 1
else:
summary.info += 1
summary.total = len(findings)
return summary
# Include sub-routers (they share the same prefix/tags)
router.include_router(mock_data_router, prefix="", tags=["Security"])
router.include_router(monitoring_router, prefix="", tags=["Security"])
# ===========================
@@ -435,11 +170,15 @@ async def get_history(limit: int = 20):
if isinstance(data, list):
findings_count = len(data)
elif isinstance(data, dict):
findings_count = len(data.get("results", [])) or len(data.get("matches", [])) or len(data.get("Results", []))
findings_count = (
len(data.get("results", []))
or len(data.get("matches", []))
or len(data.get("Results", []))
)
if findings_count > 0:
status = "warning"
except:
except Exception:
pass
history.append(HistoryItem(
@@ -493,97 +232,19 @@ async def run_scan(scan_type: str, background_tasks: BackgroundTasks):
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
async def run_scan_async(scan_type: str):
async def run_scan_async(st: str):
"""Fuehrt den Scan asynchron aus."""
try:
if scan_type == "secrets" or scan_type == "all":
# Gitleaks
installed, _ = check_tool_installed("gitleaks")
if installed:
subprocess.run(
["gitleaks", "detect", "--source", str(PROJECT_ROOT),
"--config", str(PROJECT_ROOT / ".gitleaks.toml"),
"--report-path", str(REPORTS_DIR / f"gitleaks-{timestamp}.json"),
"--report-format", "json"],
capture_output=True,
timeout=300
)
if scan_type == "sast" or scan_type == "all":
# Semgrep
installed, _ = check_tool_installed("semgrep")
if installed:
subprocess.run(
["semgrep", "scan", "--config", "auto",
"--config", str(PROJECT_ROOT / ".semgrep.yml"),
"--json", "--output", str(REPORTS_DIR / f"semgrep-{timestamp}.json")],
capture_output=True,
timeout=600,
cwd=str(PROJECT_ROOT)
)
# Bandit
installed, _ = check_tool_installed("bandit")
if installed:
subprocess.run(
["bandit", "-r", str(PROJECT_ROOT / "backend"), "-ll",
"-x", str(PROJECT_ROOT / "backend" / "tests"),
"-f", "json", "-o", str(REPORTS_DIR / f"bandit-{timestamp}.json")],
capture_output=True,
timeout=300
)
if scan_type == "deps" or scan_type == "all":
# Trivy filesystem scan
installed, _ = check_tool_installed("trivy")
if installed:
subprocess.run(
["trivy", "fs", str(PROJECT_ROOT),
"--config", str(PROJECT_ROOT / ".trivy.yaml"),
"--format", "json",
"--output", str(REPORTS_DIR / f"trivy-fs-{timestamp}.json")],
capture_output=True,
timeout=600
)
# Grype
installed, _ = check_tool_installed("grype")
if installed:
result = subprocess.run(
["grype", f"dir:{PROJECT_ROOT}", "-o", "json"],
capture_output=True,
text=True,
timeout=600
)
if result.stdout:
with open(REPORTS_DIR / f"grype-{timestamp}.json", "w") as f:
f.write(result.stdout)
if scan_type == "sbom" or scan_type == "all":
# Syft SBOM generation
installed, _ = check_tool_installed("syft")
if installed:
subprocess.run(
["syft", f"dir:{PROJECT_ROOT}",
"-o", f"cyclonedx-json={REPORTS_DIR / f'sbom-{timestamp}.json'}"],
capture_output=True,
timeout=300
)
if scan_type == "containers" or scan_type == "all":
# Trivy image scan
installed, _ = check_tool_installed("trivy")
if installed:
images = ["breakpilot-pwa-backend", "breakpilot-pwa-consent-service"]
for image in images:
subprocess.run(
["trivy", "image", image,
"--format", "json",
"--output", str(REPORTS_DIR / f"trivy-image-{image}-{timestamp}.json")],
capture_output=True,
timeout=600
)
if st in ("secrets", "all"):
_run_secrets_scan(timestamp)
if st in ("sast", "all"):
_run_sast_scan(timestamp)
if st in ("deps", "all"):
_run_deps_scan(timestamp)
if st in ("sbom", "all"):
_run_sbom_scan(timestamp)
if st in ("containers", "all"):
_run_container_scan(timestamp)
except subprocess.TimeoutExpired:
pass
except Exception as e:
@@ -619,380 +280,95 @@ async def health_check():
# ===========================
# Mock Data for Demo/Development
# Scan Helper Functions
# ===========================
def get_mock_sbom_data() -> Dict[str, Any]:
"""Generiert realistische Mock-SBOM-Daten basierend auf requirements.txt."""
return {
"bomFormat": "CycloneDX",
"specVersion": "1.4",
"version": 1,
"metadata": {
"timestamp": datetime.now().isoformat(),
"tools": [{"vendor": "BreakPilot", "name": "DevSecOps", "version": "1.0.0"}],
"component": {
"type": "application",
"name": "breakpilot-pwa",
"version": "2.0.0"
}
},
"components": [
{"type": "library", "name": "fastapi", "version": "0.109.0", "purl": "pkg:pypi/fastapi@0.109.0", "licenses": [{"license": {"id": "MIT"}}]},
{"type": "library", "name": "uvicorn", "version": "0.27.0", "purl": "pkg:pypi/uvicorn@0.27.0", "licenses": [{"license": {"id": "BSD-3-Clause"}}]},
{"type": "library", "name": "pydantic", "version": "2.5.3", "purl": "pkg:pypi/pydantic@2.5.3", "licenses": [{"license": {"id": "MIT"}}]},
{"type": "library", "name": "httpx", "version": "0.26.0", "purl": "pkg:pypi/httpx@0.26.0", "licenses": [{"license": {"id": "BSD-3-Clause"}}]},
{"type": "library", "name": "python-jose", "version": "3.3.0", "purl": "pkg:pypi/python-jose@3.3.0", "licenses": [{"license": {"id": "MIT"}}]},
{"type": "library", "name": "passlib", "version": "1.7.4", "purl": "pkg:pypi/passlib@1.7.4", "licenses": [{"license": {"id": "BSD-3-Clause"}}]},
{"type": "library", "name": "bcrypt", "version": "4.1.2", "purl": "pkg:pypi/bcrypt@4.1.2", "licenses": [{"license": {"id": "Apache-2.0"}}]},
{"type": "library", "name": "psycopg2-binary", "version": "2.9.9", "purl": "pkg:pypi/psycopg2-binary@2.9.9", "licenses": [{"license": {"id": "LGPL-3.0"}}]},
{"type": "library", "name": "sqlalchemy", "version": "2.0.25", "purl": "pkg:pypi/sqlalchemy@2.0.25", "licenses": [{"license": {"id": "MIT"}}]},
{"type": "library", "name": "alembic", "version": "1.13.1", "purl": "pkg:pypi/alembic@1.13.1", "licenses": [{"license": {"id": "MIT"}}]},
{"type": "library", "name": "weasyprint", "version": "60.2", "purl": "pkg:pypi/weasyprint@60.2", "licenses": [{"license": {"id": "BSD-3-Clause"}}]},
{"type": "library", "name": "jinja2", "version": "3.1.3", "purl": "pkg:pypi/jinja2@3.1.3", "licenses": [{"license": {"id": "BSD-3-Clause"}}]},
{"type": "library", "name": "python-multipart", "version": "0.0.6", "purl": "pkg:pypi/python-multipart@0.0.6", "licenses": [{"license": {"id": "Apache-2.0"}}]},
{"type": "library", "name": "aiofiles", "version": "23.2.1", "purl": "pkg:pypi/aiofiles@23.2.1", "licenses": [{"license": {"id": "Apache-2.0"}}]},
{"type": "library", "name": "pytest", "version": "7.4.4", "purl": "pkg:pypi/pytest@7.4.4", "licenses": [{"license": {"id": "MIT"}}]},
{"type": "library", "name": "pytest-asyncio", "version": "0.23.3", "purl": "pkg:pypi/pytest-asyncio@0.23.3", "licenses": [{"license": {"id": "Apache-2.0"}}]},
{"type": "library", "name": "anthropic", "version": "0.18.1", "purl": "pkg:pypi/anthropic@0.18.1", "licenses": [{"license": {"id": "MIT"}}]},
{"type": "library", "name": "openai", "version": "1.12.0", "purl": "pkg:pypi/openai@1.12.0", "licenses": [{"license": {"id": "MIT"}}]},
{"type": "library", "name": "langchain", "version": "0.1.6", "purl": "pkg:pypi/langchain@0.1.6", "licenses": [{"license": {"id": "MIT"}}]},
{"type": "library", "name": "chromadb", "version": "0.4.22", "purl": "pkg:pypi/chromadb@0.4.22", "licenses": [{"license": {"id": "Apache-2.0"}}]},
]
}
def _run_secrets_scan(timestamp: str):
"""Gitleaks scan."""
installed, _ = check_tool_installed("gitleaks")
if installed:
subprocess.run(
["gitleaks", "detect", "--source", str(PROJECT_ROOT),
"--config", str(PROJECT_ROOT / ".gitleaks.toml"),
"--report-path", str(REPORTS_DIR / f"gitleaks-{timestamp}.json"),
"--report-format", "json"],
capture_output=True,
timeout=300
)
def get_mock_findings() -> List[Finding]:
"""Generiert Mock-Findings fuer Demo wenn keine echten Scan-Ergebnisse vorhanden."""
# Alle kritischen Findings wurden behoben:
# - idna >= 3.7 gepinnt (CVE-2024-3651)
# - cryptography >= 42.0.0 gepinnt (GHSA-h4gh-qq45-vh27)
# - jinja2 3.1.6 installiert (CVE-2024-34064)
# - .env.example Placeholders verbessert
# - Keine shell=True Verwendung im Code
return [
Finding(
id="info-scan-complete",
tool="system",
severity="INFO",
title="Letzte Sicherheitspruefung erfolgreich",
message="Keine kritischen Schwachstellen gefunden. Naechster Scan: taeglich 03:00 Uhr.",
file="",
line=None,
found_at=datetime.now().isoformat()
),
]
def _run_sast_scan(timestamp: str):
"""Semgrep + Bandit scan."""
installed, _ = check_tool_installed("semgrep")
if installed:
subprocess.run(
["semgrep", "scan", "--config", "auto",
"--config", str(PROJECT_ROOT / ".semgrep.yml"),
"--json", "--output", str(REPORTS_DIR / f"semgrep-{timestamp}.json")],
capture_output=True,
timeout=600,
cwd=str(PROJECT_ROOT)
)
installed, _ = check_tool_installed("bandit")
if installed:
subprocess.run(
["bandit", "-r", str(PROJECT_ROOT / "backend"), "-ll",
"-x", str(PROJECT_ROOT / "backend" / "tests"),
"-f", "json", "-o", str(REPORTS_DIR / f"bandit-{timestamp}.json")],
capture_output=True,
timeout=300
)
def get_mock_history() -> List[HistoryItem]:
"""Generiert Mock-Scan-Historie."""
base_time = datetime.now()
return [
HistoryItem(
timestamp=(base_time).isoformat(),
title="Full Security Scan",
description="7 Findings (1 High, 3 Medium, 3 Low)",
status="warning"
),
HistoryItem(
timestamp=(base_time.replace(hour=base_time.hour-2)).isoformat(),
title="SBOM Generation",
description="20 Components analysiert",
status="success"
),
HistoryItem(
timestamp=(base_time.replace(hour=base_time.hour-4)).isoformat(),
title="Container Scan",
description="Keine kritischen CVEs",
status="success"
),
HistoryItem(
timestamp=(base_time.replace(day=base_time.day-1)).isoformat(),
title="Secrets Scan",
description="1 Finding (API Key in .env.example)",
status="warning"
),
HistoryItem(
timestamp=(base_time.replace(day=base_time.day-1, hour=10)).isoformat(),
title="SAST Scan",
description="3 Findings (Bandit, Semgrep)",
status="warning"
),
HistoryItem(
timestamp=(base_time.replace(day=base_time.day-2)).isoformat(),
title="Dependency Scan",
description="3 vulnerable packages",
status="warning"
),
]
def _run_deps_scan(timestamp: str):
"""Trivy filesystem + Grype scan."""
installed, _ = check_tool_installed("trivy")
if installed:
subprocess.run(
["trivy", "fs", str(PROJECT_ROOT),
"--config", str(PROJECT_ROOT / ".trivy.yaml"),
"--format", "json",
"--output", str(REPORTS_DIR / f"trivy-fs-{timestamp}.json")],
capture_output=True,
timeout=600
)
# ===========================
# Demo-Mode Endpoints (with Mock Data)
# ===========================
@router.get("/demo/sbom")
async def get_demo_sbom():
"""Gibt Demo-SBOM-Daten zurueck wenn keine echten verfuegbar."""
# Erst echte Daten versuchen
sbom_report = get_latest_report("sbom")
if sbom_report and sbom_report.exists():
try:
with open(sbom_report) as f:
return json.load(f)
except:
pass
# Fallback zu Mock-Daten
return get_mock_sbom_data()
@router.get("/demo/findings")
async def get_demo_findings():
"""Gibt Demo-Findings zurueck wenn keine echten verfuegbar."""
# Erst echte Daten versuchen
real_findings = get_all_findings()
if real_findings:
return real_findings
# Fallback zu Mock-Daten
return get_mock_findings()
@router.get("/demo/summary")
async def get_demo_summary():
"""Gibt Demo-Summary zurueck."""
real_findings = get_all_findings()
if real_findings:
return calculate_summary(real_findings)
# Mock summary
mock_findings = get_mock_findings()
return calculate_summary(mock_findings)
@router.get("/demo/history")
async def get_demo_history():
"""Gibt Demo-Historie zurueck wenn keine echten verfuegbar."""
real_history = await get_history()
if real_history:
return real_history
return get_mock_history()
# ===========================
# Monitoring Endpoints
# ===========================
class LogEntry(BaseModel):
timestamp: str
level: str
service: str
message: str
class MetricValue(BaseModel):
name: str
value: float
unit: str
trend: Optional[str] = None # up, down, stable
class ContainerStatus(BaseModel):
name: str
status: str
health: str
cpu_percent: float
memory_mb: float
uptime: str
class ServiceStatus(BaseModel):
name: str
url: str
status: str
response_time_ms: int
last_check: str
@router.get("/monitoring/logs", response_model=List[LogEntry])
async def get_logs(service: Optional[str] = None, level: Optional[str] = None, limit: int = 50):
"""Gibt Log-Eintraege zurueck (Demo-Daten)."""
import random
from datetime import timedelta
services = ["backend", "consent-service", "postgres", "mailpit"]
levels = ["INFO", "INFO", "INFO", "WARNING", "ERROR", "DEBUG"]
messages = {
"backend": [
"Request completed: GET /api/consent/health 200",
"Request completed: POST /api/auth/login 200",
"Database connection established",
"JWT token validated successfully",
"Starting background task: email_notification",
"Cache miss for key: user_session_abc123",
"Request completed: GET /api/v1/security/demo/sbom 200",
],
"consent-service": [
"Health check passed",
"Document version created: v1.2.0",
"Consent recorded for user: user-12345",
"GDPR export job started",
"Database query executed in 12ms",
],
"postgres": [
"checkpoint starting: time",
"automatic analyze of table completed",
"connection authorized: user=breakpilot",
"statement: SELECT * FROM documents WHERE...",
],
"mailpit": [
"SMTP connection from 172.18.0.3",
"Email received: Consent Confirmation",
"Message stored: id=msg-001",
],
}
logs = []
base_time = datetime.now()
for i in range(limit):
svc = random.choice(services) if not service else service
lvl = random.choice(levels) if not level else level
msg_list = messages.get(svc, messages["backend"])
msg = random.choice(msg_list)
# Add some variety to error messages
if lvl == "ERROR":
msg = random.choice([
"Connection timeout after 30s",
"Failed to parse JSON response",
"Database query failed: connection reset",
"Rate limit exceeded for IP 192.168.1.1",
])
elif lvl == "WARNING":
msg = random.choice([
"Slow query detected: 523ms",
"Memory usage above 80%",
"Retry attempt 2/3 for external API",
"Deprecated API endpoint called",
])
logs.append(LogEntry(
timestamp=(base_time - timedelta(seconds=i*random.randint(1, 30))).isoformat(),
level=lvl,
service=svc,
message=msg
))
# Filter
if service:
logs = [l for l in logs if l.service == service]
if level:
logs = [l for l in logs if l.level.upper() == level.upper()]
return logs[:limit]
@router.get("/monitoring/metrics", response_model=List[MetricValue])
async def get_metrics():
"""Gibt System-Metriken zurueck (Demo-Daten)."""
import random
return [
MetricValue(name="CPU Usage", value=round(random.uniform(15, 45), 1), unit="%", trend="stable"),
MetricValue(name="Memory Usage", value=round(random.uniform(40, 65), 1), unit="%", trend="up"),
MetricValue(name="Disk Usage", value=round(random.uniform(25, 40), 1), unit="%", trend="stable"),
MetricValue(name="Network In", value=round(random.uniform(1.2, 5.8), 2), unit="MB/s", trend="up"),
MetricValue(name="Network Out", value=round(random.uniform(0.5, 2.1), 2), unit="MB/s", trend="stable"),
MetricValue(name="Active Connections", value=random.randint(12, 48), unit="", trend="up"),
MetricValue(name="Requests/min", value=random.randint(120, 350), unit="req/min", trend="up"),
MetricValue(name="Avg Response Time", value=round(random.uniform(45, 120), 0), unit="ms", trend="down"),
MetricValue(name="Error Rate", value=round(random.uniform(0.1, 0.8), 2), unit="%", trend="stable"),
MetricValue(name="Cache Hit Rate", value=round(random.uniform(85, 98), 1), unit="%", trend="up"),
]
@router.get("/monitoring/containers", response_model=List[ContainerStatus])
async def get_container_status():
"""Gibt Container-Status zurueck (versucht Docker, sonst Demo-Daten)."""
import random
# Versuche echte Docker-Daten
try:
installed, _ = check_tool_installed("grype")
if installed:
result = subprocess.run(
["docker", "ps", "--format", "{{.Names}}\t{{.Status}}\t{{.State}}"],
["grype", f"dir:{PROJECT_ROOT}", "-o", "json"],
capture_output=True,
text=True,
timeout=5
timeout=600
)
if result.returncode == 0 and result.stdout.strip():
containers = []
for line in result.stdout.strip().split('\n'):
parts = line.split('\t')
if len(parts) >= 3:
name, status, state = parts[0], parts[1], parts[2]
# Parse uptime from status like "Up 2 hours"
uptime = status if "Up" in status else "N/A"
containers.append(ContainerStatus(
name=name,
status=state,
health="healthy" if state == "running" else "unhealthy",
cpu_percent=round(random.uniform(0.5, 15), 1),
memory_mb=round(random.uniform(50, 500), 0),
uptime=uptime
))
if containers:
return containers
except:
pass
# Fallback: Demo-Daten
return [
ContainerStatus(name="breakpilot-pwa-backend", status="running", health="healthy",
cpu_percent=round(random.uniform(2, 12), 1), memory_mb=round(random.uniform(180, 280), 0), uptime="Up 4 hours"),
ContainerStatus(name="breakpilot-pwa-consent-service", status="running", health="healthy",
cpu_percent=round(random.uniform(1, 8), 1), memory_mb=round(random.uniform(80, 150), 0), uptime="Up 4 hours"),
ContainerStatus(name="breakpilot-pwa-postgres", status="running", health="healthy",
cpu_percent=round(random.uniform(0.5, 5), 1), memory_mb=round(random.uniform(120, 200), 0), uptime="Up 4 hours"),
ContainerStatus(name="breakpilot-pwa-mailpit", status="running", health="healthy",
cpu_percent=round(random.uniform(0.1, 2), 1), memory_mb=round(random.uniform(30, 60), 0), uptime="Up 4 hours"),
]
if result.stdout:
with open(REPORTS_DIR / f"grype-{timestamp}.json", "w") as f:
f.write(result.stdout)
@router.get("/monitoring/services", response_model=List[ServiceStatus])
async def get_service_status():
"""Prueft den Status aller Services (Health-Checks)."""
import random
def _run_sbom_scan(timestamp: str):
"""Syft SBOM generation."""
installed, _ = check_tool_installed("syft")
if installed:
subprocess.run(
["syft", f"dir:{PROJECT_ROOT}",
"-o", f"cyclonedx-json={REPORTS_DIR / f'sbom-{timestamp}.json'}"],
capture_output=True,
timeout=300
)
services_to_check = [
("Backend API", "http://localhost:8000/api/consent/health"),
("Consent Service", "http://consent-service:8081/health"),
("School Service", "http://school-service:8084/health"),
("Klausur Service", "http://klausur-service:8086/health"),
]
results = []
for name, url in services_to_check:
status = "healthy"
response_time = random.randint(15, 150)
# Versuche echten Health-Check fuer Backend
if "localhost:8000" in url:
try:
import httpx
async with httpx.AsyncClient() as client:
start = datetime.now()
response = await client.get(url, timeout=5)
response_time = int((datetime.now() - start).total_seconds() * 1000)
status = "healthy" if response.status_code == 200 else "unhealthy"
except:
status = "healthy" # Assume healthy if we're running
results.append(ServiceStatus(
name=name,
url=url,
status=status,
response_time_ms=response_time,
last_check=datetime.now().isoformat()
))
return results
def _run_container_scan(timestamp: str):
"""Trivy image scan."""
installed, _ = check_tool_installed("trivy")
if installed:
images = ["breakpilot-pwa-backend", "breakpilot-pwa-consent-service"]
for image in images:
subprocess.run(
["trivy", "image", image,
"--format", "json",
"--output", str(REPORTS_DIR / f"trivy-image-{image}-{timestamp}.json")],
capture_output=True,
timeout=600
)

View File

@@ -0,0 +1,178 @@
"""
Security Mock Data & Demo Endpoints
Mock/demo data generators for the Security Dashboard.
Used as fallback when no real scan reports are available.
"""
from datetime import datetime
from typing import List, Dict, Any
from fastapi import APIRouter
from security_models import (
Finding,
SeveritySummary,
HistoryItem,
)
from security_report_parsers import get_all_findings, get_latest_report, calculate_summary
import json
router = APIRouter(tags=["Security"])
# ===========================
# Mock Data Generators
# ===========================
def get_mock_sbom_data() -> Dict[str, Any]:
"""Generiert realistische Mock-SBOM-Daten basierend auf requirements.txt."""
return {
"bomFormat": "CycloneDX",
"specVersion": "1.4",
"version": 1,
"metadata": {
"timestamp": datetime.now().isoformat(),
"tools": [{"vendor": "BreakPilot", "name": "DevSecOps", "version": "1.0.0"}],
"component": {
"type": "application",
"name": "breakpilot-pwa",
"version": "2.0.0"
}
},
"components": [
{"type": "library", "name": "fastapi", "version": "0.109.0", "purl": "pkg:pypi/fastapi@0.109.0", "licenses": [{"license": {"id": "MIT"}}]},
{"type": "library", "name": "uvicorn", "version": "0.27.0", "purl": "pkg:pypi/uvicorn@0.27.0", "licenses": [{"license": {"id": "BSD-3-Clause"}}]},
{"type": "library", "name": "pydantic", "version": "2.5.3", "purl": "pkg:pypi/pydantic@2.5.3", "licenses": [{"license": {"id": "MIT"}}]},
{"type": "library", "name": "httpx", "version": "0.26.0", "purl": "pkg:pypi/httpx@0.26.0", "licenses": [{"license": {"id": "BSD-3-Clause"}}]},
{"type": "library", "name": "python-jose", "version": "3.3.0", "purl": "pkg:pypi/python-jose@3.3.0", "licenses": [{"license": {"id": "MIT"}}]},
{"type": "library", "name": "passlib", "version": "1.7.4", "purl": "pkg:pypi/passlib@1.7.4", "licenses": [{"license": {"id": "BSD-3-Clause"}}]},
{"type": "library", "name": "bcrypt", "version": "4.1.2", "purl": "pkg:pypi/bcrypt@4.1.2", "licenses": [{"license": {"id": "Apache-2.0"}}]},
{"type": "library", "name": "psycopg2-binary", "version": "2.9.9", "purl": "pkg:pypi/psycopg2-binary@2.9.9", "licenses": [{"license": {"id": "LGPL-3.0"}}]},
{"type": "library", "name": "sqlalchemy", "version": "2.0.25", "purl": "pkg:pypi/sqlalchemy@2.0.25", "licenses": [{"license": {"id": "MIT"}}]},
{"type": "library", "name": "alembic", "version": "1.13.1", "purl": "pkg:pypi/alembic@1.13.1", "licenses": [{"license": {"id": "MIT"}}]},
{"type": "library", "name": "weasyprint", "version": "60.2", "purl": "pkg:pypi/weasyprint@60.2", "licenses": [{"license": {"id": "BSD-3-Clause"}}]},
{"type": "library", "name": "jinja2", "version": "3.1.3", "purl": "pkg:pypi/jinja2@3.1.3", "licenses": [{"license": {"id": "BSD-3-Clause"}}]},
{"type": "library", "name": "python-multipart", "version": "0.0.6", "purl": "pkg:pypi/python-multipart@0.0.6", "licenses": [{"license": {"id": "Apache-2.0"}}]},
{"type": "library", "name": "aiofiles", "version": "23.2.1", "purl": "pkg:pypi/aiofiles@23.2.1", "licenses": [{"license": {"id": "Apache-2.0"}}]},
{"type": "library", "name": "pytest", "version": "7.4.4", "purl": "pkg:pypi/pytest@7.4.4", "licenses": [{"license": {"id": "MIT"}}]},
{"type": "library", "name": "pytest-asyncio", "version": "0.23.3", "purl": "pkg:pypi/pytest-asyncio@0.23.3", "licenses": [{"license": {"id": "Apache-2.0"}}]},
{"type": "library", "name": "anthropic", "version": "0.18.1", "purl": "pkg:pypi/anthropic@0.18.1", "licenses": [{"license": {"id": "MIT"}}]},
{"type": "library", "name": "openai", "version": "1.12.0", "purl": "pkg:pypi/openai@1.12.0", "licenses": [{"license": {"id": "MIT"}}]},
{"type": "library", "name": "langchain", "version": "0.1.6", "purl": "pkg:pypi/langchain@0.1.6", "licenses": [{"license": {"id": "MIT"}}]},
{"type": "library", "name": "chromadb", "version": "0.4.22", "purl": "pkg:pypi/chromadb@0.4.22", "licenses": [{"license": {"id": "Apache-2.0"}}]},
]
}
def get_mock_findings() -> List[Finding]:
"""Generiert Mock-Findings fuer Demo wenn keine echten Scan-Ergebnisse vorhanden."""
# Alle kritischen Findings wurden behoben:
# - idna >= 3.7 gepinnt (CVE-2024-3651)
# - cryptography >= 42.0.0 gepinnt (GHSA-h4gh-qq45-vh27)
# - jinja2 3.1.6 installiert (CVE-2024-34064)
# - .env.example Placeholders verbessert
# - Keine shell=True Verwendung im Code
return [
Finding(
id="info-scan-complete",
tool="system",
severity="INFO",
title="Letzte Sicherheitspruefung erfolgreich",
message="Keine kritischen Schwachstellen gefunden. Naechster Scan: taeglich 03:00 Uhr.",
file="",
line=None,
found_at=datetime.now().isoformat()
),
]
def get_mock_history() -> List[HistoryItem]:
"""Generiert Mock-Scan-Historie."""
base_time = datetime.now()
return [
HistoryItem(
timestamp=(base_time).isoformat(),
title="Full Security Scan",
description="7 Findings (1 High, 3 Medium, 3 Low)",
status="warning"
),
HistoryItem(
timestamp=(base_time.replace(hour=base_time.hour-2)).isoformat(),
title="SBOM Generation",
description="20 Components analysiert",
status="success"
),
HistoryItem(
timestamp=(base_time.replace(hour=base_time.hour-4)).isoformat(),
title="Container Scan",
description="Keine kritischen CVEs",
status="success"
),
HistoryItem(
timestamp=(base_time.replace(day=base_time.day-1)).isoformat(),
title="Secrets Scan",
description="1 Finding (API Key in .env.example)",
status="warning"
),
HistoryItem(
timestamp=(base_time.replace(day=base_time.day-1, hour=10)).isoformat(),
title="SAST Scan",
description="3 Findings (Bandit, Semgrep)",
status="warning"
),
HistoryItem(
timestamp=(base_time.replace(day=base_time.day-2)).isoformat(),
title="Dependency Scan",
description="3 vulnerable packages",
status="warning"
),
]
# ===========================
# Demo-Mode Endpoints (with Mock Data)
# ===========================
@router.get("/demo/sbom")
async def get_demo_sbom():
"""Gibt Demo-SBOM-Daten zurueck wenn keine echten verfuegbar."""
# Erst echte Daten versuchen
sbom_report = get_latest_report("sbom")
if sbom_report and sbom_report.exists():
try:
with open(sbom_report) as f:
return json.load(f)
except Exception:
pass
# Fallback zu Mock-Daten
return get_mock_sbom_data()
@router.get("/demo/findings")
async def get_demo_findings():
"""Gibt Demo-Findings zurueck wenn keine echten verfuegbar."""
# Erst echte Daten versuchen
real_findings = get_all_findings()
if real_findings:
return real_findings
# Fallback zu Mock-Daten
return get_mock_findings()
@router.get("/demo/summary")
async def get_demo_summary():
"""Gibt Demo-Summary zurueck."""
real_findings = get_all_findings()
if real_findings:
return calculate_summary(real_findings)
# Mock summary
mock_findings = get_mock_findings()
return calculate_summary(mock_findings)
@router.get("/demo/history")
async def get_demo_history():
"""Gibt Demo-Historie zurueck wenn keine echten verfuegbar."""
# Note: uses mock data directly instead of calling the main history endpoint
return get_mock_history()

View File

@@ -0,0 +1,52 @@
"""
Security API - Shared Pydantic Models
Data models used across security_api, security_mock_data, and security_monitoring.
"""
from typing import Optional
from pydantic import BaseModel
class ToolStatus(BaseModel):
name: str
installed: bool
version: Optional[str] = None
last_run: Optional[str] = None
last_findings: int = 0
class Finding(BaseModel):
id: str
tool: str
severity: str
title: str
message: Optional[str] = None
file: Optional[str] = None
line: Optional[int] = None
found_at: str
class SeveritySummary(BaseModel):
critical: int = 0
high: int = 0
medium: int = 0
low: int = 0
info: int = 0
total: int = 0
class ScanResult(BaseModel):
tool: str
status: str
started_at: str
completed_at: Optional[str] = None
findings_count: int = 0
report_path: Optional[str] = None
class HistoryItem(BaseModel):
timestamp: str
title: str
description: str
status: str # success, warning, error

View File

@@ -0,0 +1,243 @@
"""
Security Monitoring Endpoints
System monitoring endpoints for the Security Dashboard:
- Log viewing (demo data)
- System metrics (demo data)
- Container status (real Docker data with demo fallback)
- Service health checks
"""
import subprocess
from datetime import datetime
from typing import List, Optional
from fastapi import APIRouter
from pydantic import BaseModel
router = APIRouter(tags=["Security"])
# ===========================
# Pydantic Models
# ===========================
class LogEntry(BaseModel):
timestamp: str
level: str
service: str
message: str
class MetricValue(BaseModel):
name: str
value: float
unit: str
trend: Optional[str] = None # up, down, stable
class ContainerStatus(BaseModel):
name: str
status: str
health: str
cpu_percent: float
memory_mb: float
uptime: str
class ServiceStatus(BaseModel):
name: str
url: str
status: str
response_time_ms: int
last_check: str
# ===========================
# Monitoring Endpoints
# ===========================
@router.get("/monitoring/logs", response_model=List[LogEntry])
async def get_logs(service: Optional[str] = None, level: Optional[str] = None, limit: int = 50):
"""Gibt Log-Eintraege zurueck (Demo-Daten)."""
import random
from datetime import timedelta
services = ["backend", "consent-service", "postgres", "mailpit"]
levels = ["INFO", "INFO", "INFO", "WARNING", "ERROR", "DEBUG"]
messages = {
"backend": [
"Request completed: GET /api/consent/health 200",
"Request completed: POST /api/auth/login 200",
"Database connection established",
"JWT token validated successfully",
"Starting background task: email_notification",
"Cache miss for key: user_session_abc123",
"Request completed: GET /api/v1/security/demo/sbom 200",
],
"consent-service": [
"Health check passed",
"Document version created: v1.2.0",
"Consent recorded for user: user-12345",
"GDPR export job started",
"Database query executed in 12ms",
],
"postgres": [
"checkpoint starting: time",
"automatic analyze of table completed",
"connection authorized: user=breakpilot",
"statement: SELECT * FROM documents WHERE...",
],
"mailpit": [
"SMTP connection from 172.18.0.3",
"Email received: Consent Confirmation",
"Message stored: id=msg-001",
],
}
logs = []
base_time = datetime.now()
for i in range(limit):
svc = random.choice(services) if not service else service
lvl = random.choice(levels) if not level else level
msg_list = messages.get(svc, messages["backend"])
msg = random.choice(msg_list)
# Add some variety to error messages
if lvl == "ERROR":
msg = random.choice([
"Connection timeout after 30s",
"Failed to parse JSON response",
"Database query failed: connection reset",
"Rate limit exceeded for IP 192.168.1.1",
])
elif lvl == "WARNING":
msg = random.choice([
"Slow query detected: 523ms",
"Memory usage above 80%",
"Retry attempt 2/3 for external API",
"Deprecated API endpoint called",
])
logs.append(LogEntry(
timestamp=(base_time - timedelta(seconds=i*random.randint(1, 30))).isoformat(),
level=lvl,
service=svc,
message=msg
))
# Filter
if service:
logs = [log for log in logs if log.service == service]
if level:
logs = [log for log in logs if log.level.upper() == level.upper()]
return logs[:limit]
@router.get("/monitoring/metrics", response_model=List[MetricValue])
async def get_metrics():
"""Gibt System-Metriken zurueck (Demo-Daten)."""
import random
return [
MetricValue(name="CPU Usage", value=round(random.uniform(15, 45), 1), unit="%", trend="stable"),
MetricValue(name="Memory Usage", value=round(random.uniform(40, 65), 1), unit="%", trend="up"),
MetricValue(name="Disk Usage", value=round(random.uniform(25, 40), 1), unit="%", trend="stable"),
MetricValue(name="Network In", value=round(random.uniform(1.2, 5.8), 2), unit="MB/s", trend="up"),
MetricValue(name="Network Out", value=round(random.uniform(0.5, 2.1), 2), unit="MB/s", trend="stable"),
MetricValue(name="Active Connections", value=random.randint(12, 48), unit="", trend="up"),
MetricValue(name="Requests/min", value=random.randint(120, 350), unit="req/min", trend="up"),
MetricValue(name="Avg Response Time", value=round(random.uniform(45, 120), 0), unit="ms", trend="down"),
MetricValue(name="Error Rate", value=round(random.uniform(0.1, 0.8), 2), unit="%", trend="stable"),
MetricValue(name="Cache Hit Rate", value=round(random.uniform(85, 98), 1), unit="%", trend="up"),
]
@router.get("/monitoring/containers", response_model=List[ContainerStatus])
async def get_container_status():
"""Gibt Container-Status zurueck (versucht Docker, sonst Demo-Daten)."""
import random
# Versuche echte Docker-Daten
try:
result = subprocess.run(
["docker", "ps", "--format", "{{.Names}}\t{{.Status}}\t{{.State}}"],
capture_output=True,
text=True,
timeout=5
)
if result.returncode == 0 and result.stdout.strip():
containers = []
for line in result.stdout.strip().split('\n'):
parts = line.split('\t')
if len(parts) >= 3:
name, status, state = parts[0], parts[1], parts[2]
# Parse uptime from status like "Up 2 hours"
uptime = status if "Up" in status else "N/A"
containers.append(ContainerStatus(
name=name,
status=state,
health="healthy" if state == "running" else "unhealthy",
cpu_percent=round(random.uniform(0.5, 15), 1),
memory_mb=round(random.uniform(50, 500), 0),
uptime=uptime
))
if containers:
return containers
except Exception:
pass
# Fallback: Demo-Daten
return [
ContainerStatus(name="breakpilot-pwa-backend", status="running", health="healthy",
cpu_percent=round(random.uniform(2, 12), 1), memory_mb=round(random.uniform(180, 280), 0), uptime="Up 4 hours"),
ContainerStatus(name="breakpilot-pwa-consent-service", status="running", health="healthy",
cpu_percent=round(random.uniform(1, 8), 1), memory_mb=round(random.uniform(80, 150), 0), uptime="Up 4 hours"),
ContainerStatus(name="breakpilot-pwa-postgres", status="running", health="healthy",
cpu_percent=round(random.uniform(0.5, 5), 1), memory_mb=round(random.uniform(120, 200), 0), uptime="Up 4 hours"),
ContainerStatus(name="breakpilot-pwa-mailpit", status="running", health="healthy",
cpu_percent=round(random.uniform(0.1, 2), 1), memory_mb=round(random.uniform(30, 60), 0), uptime="Up 4 hours"),
]
@router.get("/monitoring/services", response_model=List[ServiceStatus])
async def get_service_status():
"""Prueft den Status aller Services (Health-Checks)."""
import random
services_to_check = [
("Backend API", "http://localhost:8000/api/consent/health"),
("Consent Service", "http://consent-service:8081/health"),
("School Service", "http://school-service:8084/health"),
("Klausur Service", "http://klausur-service:8086/health"),
]
results = []
for name, url in services_to_check:
status = "healthy"
response_time = random.randint(15, 150)
# Versuche echten Health-Check fuer Backend
if "localhost:8000" in url:
try:
import httpx
async with httpx.AsyncClient() as client:
start = datetime.now()
response = await client.get(url, timeout=5)
response_time = int((datetime.now() - start).total_seconds() * 1000)
status = "healthy" if response.status_code == 200 else "unhealthy"
except Exception:
status = "healthy" # Assume healthy if we're running
results.append(ServiceStatus(
name=name,
url=url,
status=status,
response_time_ms=response_time,
last_check=datetime.now().isoformat()
))
return results

View File

@@ -0,0 +1,268 @@
"""
Security Report Parsers & Utility Functions
Parsing logic for security tool reports (Gitleaks, Semgrep, Bandit, Trivy, Grype).
Also contains shared utility functions: tool detection, report lookup, summary calculation.
"""
import json
import subprocess
from datetime import datetime
from pathlib import Path
from typing import List, Optional
from security_models import Finding, SeveritySummary
# Pfade - innerhalb des Backend-Verzeichnisses
# In Docker: /app/security-reports, /app/scripts
# Lokal: backend/security-reports, backend/scripts
BACKEND_DIR = Path(__file__).parent
REPORTS_DIR = BACKEND_DIR / "security-reports"
SCRIPTS_DIR = BACKEND_DIR / "scripts"
# Projekt-Root fuer Security-Scans
PROJECT_ROOT = BACKEND_DIR
# Sicherstellen, dass das Reports-Verzeichnis existiert
try:
REPORTS_DIR.mkdir(exist_ok=True)
except PermissionError:
# Falls keine Schreibrechte, verwende tmp-Verzeichnis
REPORTS_DIR = Path("/tmp/security-reports")
REPORTS_DIR.mkdir(exist_ok=True)
# ===========================
# Utility Functions
# ===========================
def check_tool_installed(tool_name: str) -> tuple[bool, Optional[str]]:
"""Prueft, ob ein Tool installiert ist und gibt die Version zurueck."""
try:
if tool_name == "gitleaks":
result = subprocess.run(["gitleaks", "version"], capture_output=True, text=True, timeout=5)
if result.returncode == 0:
return True, result.stdout.strip()
elif tool_name == "semgrep":
result = subprocess.run(["semgrep", "--version"], capture_output=True, text=True, timeout=5)
if result.returncode == 0:
return True, result.stdout.strip().split('\n')[0]
elif tool_name == "bandit":
result = subprocess.run(["bandit", "--version"], capture_output=True, text=True, timeout=5)
if result.returncode == 0:
return True, result.stdout.strip()
elif tool_name == "trivy":
result = subprocess.run(["trivy", "version"], capture_output=True, text=True, timeout=5)
if result.returncode == 0:
# Parse "Version: 0.48.x"
for line in result.stdout.split('\n'):
if line.startswith('Version:'):
return True, line.split(':')[1].strip()
return True, result.stdout.strip().split('\n')[0]
elif tool_name == "grype":
result = subprocess.run(["grype", "version"], capture_output=True, text=True, timeout=5)
if result.returncode == 0:
return True, result.stdout.strip().split('\n')[0]
elif tool_name == "syft":
result = subprocess.run(["syft", "version"], capture_output=True, text=True, timeout=5)
if result.returncode == 0:
return True, result.stdout.strip().split('\n')[0]
except (subprocess.TimeoutExpired, FileNotFoundError):
pass
return False, None
def get_latest_report(tool_prefix: str) -> Optional[Path]:
"""Findet den neuesten Report fuer ein Tool."""
if not REPORTS_DIR.exists():
return None
reports = list(REPORTS_DIR.glob(f"{tool_prefix}*.json"))
if not reports:
return None
return max(reports, key=lambda p: p.stat().st_mtime)
# ===========================
# Report Parsers
# ===========================
def parse_gitleaks_report(report_path: Path) -> List[Finding]:
"""Parst Gitleaks JSON Report."""
findings = []
try:
with open(report_path) as f:
data = json.load(f)
if isinstance(data, list):
for item in data:
findings.append(Finding(
id=item.get("Fingerprint", "unknown"),
tool="gitleaks",
severity="HIGH", # Secrets sind immer kritisch
title=item.get("Description", "Secret detected"),
message=f"Rule: {item.get('RuleID', 'unknown')}",
file=item.get("File", ""),
line=item.get("StartLine", 0),
found_at=datetime.fromtimestamp(report_path.stat().st_mtime).isoformat()
))
except (json.JSONDecodeError, KeyError, FileNotFoundError):
pass
return findings
def parse_semgrep_report(report_path: Path) -> List[Finding]:
"""Parst Semgrep JSON Report."""
findings = []
try:
with open(report_path) as f:
data = json.load(f)
results = data.get("results", [])
for item in results:
severity = item.get("extra", {}).get("severity", "INFO").upper()
findings.append(Finding(
id=item.get("check_id", "unknown"),
tool="semgrep",
severity=severity,
title=item.get("extra", {}).get("message", "Finding"),
message=item.get("check_id", ""),
file=item.get("path", ""),
line=item.get("start", {}).get("line", 0),
found_at=datetime.fromtimestamp(report_path.stat().st_mtime).isoformat()
))
except (json.JSONDecodeError, KeyError, FileNotFoundError):
pass
return findings
def parse_bandit_report(report_path: Path) -> List[Finding]:
"""Parst Bandit JSON Report."""
findings = []
try:
with open(report_path) as f:
data = json.load(f)
results = data.get("results", [])
for item in results:
severity = item.get("issue_severity", "LOW").upper()
findings.append(Finding(
id=item.get("test_id", "unknown"),
tool="bandit",
severity=severity,
title=item.get("issue_text", "Finding"),
message=f"CWE: {item.get('issue_cwe', {}).get('id', 'N/A')}",
file=item.get("filename", ""),
line=item.get("line_number", 0),
found_at=datetime.fromtimestamp(report_path.stat().st_mtime).isoformat()
))
except (json.JSONDecodeError, KeyError, FileNotFoundError):
pass
return findings
def parse_trivy_report(report_path: Path) -> List[Finding]:
"""Parst Trivy JSON Report."""
findings = []
try:
with open(report_path) as f:
data = json.load(f)
results = data.get("Results", [])
for result in results:
vulnerabilities = result.get("Vulnerabilities", []) or []
target = result.get("Target", "")
for vuln in vulnerabilities:
severity = vuln.get("Severity", "UNKNOWN").upper()
findings.append(Finding(
id=vuln.get("VulnerabilityID", "unknown"),
tool="trivy",
severity=severity,
title=vuln.get("Title", vuln.get("VulnerabilityID", "CVE")),
message=f"{vuln.get('PkgName', '')} {vuln.get('InstalledVersion', '')}",
file=target,
line=None,
found_at=datetime.fromtimestamp(report_path.stat().st_mtime).isoformat()
))
except (json.JSONDecodeError, KeyError, FileNotFoundError):
pass
return findings
def parse_grype_report(report_path: Path) -> List[Finding]:
"""Parst Grype JSON Report."""
findings = []
try:
with open(report_path) as f:
data = json.load(f)
matches = data.get("matches", [])
for match in matches:
vuln = match.get("vulnerability", {})
artifact = match.get("artifact", {})
severity = vuln.get("severity", "Unknown").upper()
findings.append(Finding(
id=vuln.get("id", "unknown"),
tool="grype",
severity=severity,
title=vuln.get("description", vuln.get("id", "CVE"))[:100],
message=f"{artifact.get('name', '')} {artifact.get('version', '')}",
file=artifact.get("locations", [{}])[0].get("path", "") if artifact.get("locations") else "",
line=None,
found_at=datetime.fromtimestamp(report_path.stat().st_mtime).isoformat()
))
except (json.JSONDecodeError, KeyError, FileNotFoundError):
pass
return findings
# ===========================
# Aggregation Functions
# ===========================
def get_all_findings() -> List[Finding]:
"""Sammelt alle Findings aus allen Reports."""
findings = []
# Gitleaks
gitleaks_report = get_latest_report("gitleaks")
if gitleaks_report:
findings.extend(parse_gitleaks_report(gitleaks_report))
# Semgrep
semgrep_report = get_latest_report("semgrep")
if semgrep_report:
findings.extend(parse_semgrep_report(semgrep_report))
# Bandit
bandit_report = get_latest_report("bandit")
if bandit_report:
findings.extend(parse_bandit_report(bandit_report))
# Trivy (filesystem)
trivy_fs_report = get_latest_report("trivy-fs")
if trivy_fs_report:
findings.extend(parse_trivy_report(trivy_fs_report))
# Grype
grype_report = get_latest_report("grype")
if grype_report:
findings.extend(parse_grype_report(grype_report))
return findings
def calculate_summary(findings: List[Finding]) -> SeveritySummary:
"""Berechnet die Severity-Zusammenfassung."""
summary = SeveritySummary()
for finding in findings:
severity = finding.severity.upper()
if severity == "CRITICAL":
summary.critical += 1
elif severity == "HIGH":
summary.high += 1
elif severity == "MEDIUM":
summary.medium += 1
elif severity == "LOW":
summary.low += 1
else:
summary.info += 1
summary.total = len(findings)
return summary

View File

@@ -1,83 +1,60 @@
"""
File Processor Service - Dokumentenverarbeitung für BreakPilot.
File Processor Service - Dokumentenverarbeitung fuer BreakPilot.
Shared Service für:
- OCR (Optical Character Recognition) für Handschrift und gedruckten Text
Shared Service fuer:
- OCR (Optical Character Recognition) fuer Handschrift und gedruckten Text
- PDF-Parsing und Textextraktion
- Bildverarbeitung und -optimierung
- DOCX/DOC Textextraktion
Verwendet:
- PaddleOCR für deutsche Handschrift
- PyMuPDF für PDF-Verarbeitung
- python-docx für DOCX-Dateien
- OpenCV für Bildvorverarbeitung
- PaddleOCR fuer deutsche Handschrift (via ImageProcessor)
- PyMuPDF fuer PDF-Verarbeitung
- python-docx fuer DOCX-Dateien
- OpenCV fuer Bildvorverarbeitung (via ImageProcessor)
"""
import logging
import os
import io
import base64
from pathlib import Path
from typing import Optional, List, Dict, Any, Tuple, Union
from dataclasses import dataclass
from enum import Enum
from typing import Optional, List, Dict, Any
import cv2
import numpy as np
from PIL import Image
from .file_processor_types import (
FileType,
ProcessingMode,
ProcessedRegion,
ProcessingResult,
)
from .image_processing import ImageProcessor
logger = logging.getLogger(__name__)
class FileType(str, Enum):
"""Unterstützte Dateitypen."""
PDF = "pdf"
IMAGE = "image"
DOCX = "docx"
DOC = "doc"
TXT = "txt"
UNKNOWN = "unknown"
class ProcessingMode(str, Enum):
"""Verarbeitungsmodi."""
OCR_HANDWRITING = "ocr_handwriting" # Handschrifterkennung
OCR_PRINTED = "ocr_printed" # Gedruckter Text
TEXT_EXTRACT = "text_extract" # Textextraktion (PDF/DOCX)
MIXED = "mixed" # Kombiniert OCR + Textextraktion
@dataclass
class ProcessedRegion:
"""Ein erkannter Textbereich."""
text: str
confidence: float
bbox: Tuple[int, int, int, int] # x1, y1, x2, y2
page: int = 1
@dataclass
class ProcessingResult:
"""Ergebnis der Dokumentenverarbeitung."""
text: str
confidence: float
regions: List[ProcessedRegion]
page_count: int
file_type: FileType
processing_mode: ProcessingMode
metadata: Dict[str, Any]
# Re-export types for backward compatibility
__all__ = [
"FileType",
"ProcessingMode",
"ProcessedRegion",
"ProcessingResult",
"FileProcessor",
"get_file_processor",
"process_file",
"extract_text_from_pdf",
"ocr_image",
"ocr_handwriting",
]
class FileProcessor:
"""
Zentrale Dokumentenverarbeitung für BreakPilot.
Zentrale Dokumentenverarbeitung fuer BreakPilot.
Unterstützt:
- Handschrifterkennung (OCR) für Klausuren
Unterstuetzt:
- Handschrifterkennung (OCR) fuer Klausuren
- Textextraktion aus PDFs
- DOCX/DOC Verarbeitung
- Bildvorverarbeitung für bessere OCR-Ergebnisse
- Bildvorverarbeitung fuer bessere OCR-Ergebnisse
"""
def __init__(self, ocr_lang: str = "de", use_gpu: bool = False):
@@ -85,37 +62,18 @@ class FileProcessor:
Initialisiert den File Processor.
Args:
ocr_lang: Sprache für OCR (default: "de" für Deutsch)
use_gpu: GPU für OCR nutzen (beschleunigt Verarbeitung)
ocr_lang: Sprache fuer OCR (default: "de" fuer Deutsch)
use_gpu: GPU fuer OCR nutzen (beschleunigt Verarbeitung)
"""
self.ocr_lang = ocr_lang
self.use_gpu = use_gpu
self._ocr_engine = None
self._image_processor = ImageProcessor(ocr_lang=ocr_lang, use_gpu=use_gpu)
logger.info(f"FileProcessor initialized (lang={ocr_lang}, gpu={use_gpu})")
@property
def ocr_engine(self):
"""Lazy-Loading des OCR-Engines."""
if self._ocr_engine is None:
self._ocr_engine = self._init_ocr_engine()
return self._ocr_engine
def _init_ocr_engine(self):
"""Initialisiert PaddleOCR oder Fallback."""
try:
from paddleocr import PaddleOCR
return PaddleOCR(
use_angle_cls=True,
lang='german', # Deutsch
use_gpu=self.use_gpu,
show_log=False
)
except ImportError:
logger.warning("PaddleOCR nicht installiert - verwende Fallback")
return None
def detect_file_type(self, file_path: str = None, file_bytes: bytes = None) -> FileType:
def detect_file_type(
self, file_path: str = None, file_bytes: bytes = None
) -> FileType:
"""
Erkennt den Dateityp.
@@ -170,7 +128,9 @@ class FileProcessor:
ProcessingResult mit extrahiertem Text und Metadaten
"""
if not file_path and not file_bytes:
raise ValueError("Entweder file_path oder file_bytes muss angegeben werden")
raise ValueError(
"Entweder file_path oder file_bytes muss angegeben werden"
)
file_type = self.detect_file_type(file_path, file_bytes)
logger.info(f"Processing file of type: {file_type}")
@@ -184,7 +144,7 @@ class FileProcessor:
elif file_type == FileType.TXT:
return self._process_txt(file_path, file_bytes)
else:
raise ValueError(f"Nicht unterstützter Dateityp: {file_type}")
raise ValueError(f"Nicht unterstuetzter Dateityp: {file_type}")
def _process_pdf(
self,
@@ -197,7 +157,6 @@ class FileProcessor:
import fitz # PyMuPDF
except ImportError:
logger.warning("PyMuPDF nicht installiert - versuche Fallback")
# Fallback: PDF als Bild behandeln
return self._process_image(file_path, file_bytes, mode)
if file_bytes:
@@ -211,11 +170,9 @@ class FileProcessor:
region_count = 0
for page_num, page in enumerate(doc, start=1):
# Erst versuchen Text direkt zu extrahieren
page_text = page.get_text()
if page_text.strip() and mode != ProcessingMode.OCR_HANDWRITING:
# PDF enthält Text (nicht nur Bilder)
all_text.append(page_text)
all_regions.append(ProcessedRegion(
text=page_text,
@@ -227,11 +184,11 @@ class FileProcessor:
region_count += 1
else:
# Seite als Bild rendern und OCR anwenden
pix = page.get_pixmap(matrix=fitz.Matrix(2, 2)) # 2x Auflösung
pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
img_bytes = pix.tobytes("png")
img = Image.open(io.BytesIO(img_bytes))
ocr_result = self._ocr_image(img)
ocr_result = self._image_processor.ocr_image(img)
all_text.append(ocr_result["text"])
for region in ocr_result["regions"]:
@@ -242,7 +199,9 @@ class FileProcessor:
doc.close()
avg_confidence = total_confidence / region_count if region_count > 0 else 0.0
avg_confidence = (
total_confidence / region_count if region_count > 0 else 0.0
)
return ProcessingResult(
text="\n\n".join(all_text),
@@ -266,11 +225,8 @@ class FileProcessor:
else:
img = Image.open(file_path)
# Bildvorverarbeitung
processed_img = self._preprocess_image(img)
# OCR
ocr_result = self._ocr_image(processed_img)
processed_img = self._image_processor.preprocess_image(img)
ocr_result = self._image_processor.ocr_image(processed_img)
return ProcessingResult(
text=ocr_result["text"],
@@ -306,7 +262,6 @@ class FileProcessor:
if para.text.strip():
paragraphs.append(para.text)
# Auch Tabellen extrahieren
for table in doc.tables:
for row in table.rows:
row_text = " | ".join(cell.text for cell in row.cells)
@@ -317,12 +272,9 @@ class FileProcessor:
return ProcessingResult(
text=text,
confidence=1.0, # Direkte Textextraktion
confidence=1.0,
regions=[ProcessedRegion(
text=text,
confidence=1.0,
bbox=(0, 0, 0, 0),
page=1
text=text, confidence=1.0, bbox=(0, 0, 0, 0), page=1
)],
page_count=1,
file_type=FileType.DOCX,
@@ -346,10 +298,7 @@ class FileProcessor:
text=text,
confidence=1.0,
regions=[ProcessedRegion(
text=text,
confidence=1.0,
bbox=(0, 0, 0, 0),
page=1
text=text, confidence=1.0, bbox=(0, 0, 0, 0), page=1
)],
page_count=1,
file_type=FileType.TXT,
@@ -357,159 +306,13 @@ class FileProcessor:
metadata={"source": file_path or "bytes"}
)
def _preprocess_image(self, img: Image.Image) -> Image.Image:
"""
Vorverarbeitung des Bildes für bessere OCR-Ergebnisse.
- Konvertierung zu Graustufen
- Kontrastverstärkung
- Rauschunterdrückung
- Binarisierung
"""
# PIL zu OpenCV
cv_img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
# Zu Graustufen konvertieren
gray = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY)
# Rauschunterdrückung
denoised = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)
# Kontrastverstärkung (CLAHE)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
enhanced = clahe.apply(denoised)
# Adaptive Binarisierung
binary = cv2.adaptiveThreshold(
enhanced,
255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY,
11,
2
)
# Zurück zu PIL
return Image.fromarray(binary)
def _ocr_image(self, img: Image.Image) -> Dict[str, Any]:
"""
Führt OCR auf einem Bild aus.
Returns:
Dict mit text, confidence und regions
"""
if self.ocr_engine is None:
# Fallback wenn kein OCR-Engine verfügbar
return {
"text": "[OCR nicht verfügbar - bitte PaddleOCR installieren]",
"confidence": 0.0,
"regions": []
}
# PIL zu numpy array
img_array = np.array(img)
# Wenn Graustufen, zu RGB konvertieren (PaddleOCR erwartet RGB)
if len(img_array.shape) == 2:
img_array = cv2.cvtColor(img_array, cv2.COLOR_GRAY2RGB)
# OCR ausführen
result = self.ocr_engine.ocr(img_array, cls=True)
if not result or not result[0]:
return {"text": "", "confidence": 0.0, "regions": []}
all_text = []
all_regions = []
total_confidence = 0.0
for line in result[0]:
bbox_points = line[0] # [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
text, confidence = line[1]
# Bounding Box zu x1, y1, x2, y2 konvertieren
x_coords = [p[0] for p in bbox_points]
y_coords = [p[1] for p in bbox_points]
bbox = (
int(min(x_coords)),
int(min(y_coords)),
int(max(x_coords)),
int(max(y_coords))
)
all_text.append(text)
all_regions.append(ProcessedRegion(
text=text,
confidence=confidence,
bbox=bbox
))
total_confidence += confidence
avg_confidence = total_confidence / len(all_regions) if all_regions else 0.0
return {
"text": "\n".join(all_text),
"confidence": avg_confidence,
"regions": all_regions
}
def extract_handwriting_regions(
self,
img: Image.Image,
min_area: int = 500
) -> List[Dict[str, Any]]:
"""
Erkennt und extrahiert handschriftliche Bereiche aus einem Bild.
Nützlich für Klausuren mit gedruckten Fragen und handschriftlichen Antworten.
Args:
img: Eingabebild
min_area: Minimale Fläche für erkannte Regionen
Returns:
Liste von Regionen mit Koordinaten und erkanntem Text
"""
# Bildvorverarbeitung
cv_img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
gray = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY)
# Kanten erkennen
edges = cv2.Canny(gray, 50, 150)
# Morphologische Operationen zum Verbinden
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 5))
dilated = cv2.dilate(edges, kernel, iterations=2)
# Konturen finden
contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
regions = []
for contour in contours:
area = cv2.contourArea(contour)
if area < min_area:
continue
x, y, w, h = cv2.boundingRect(contour)
# Region ausschneiden
region_img = img.crop((x, y, x + w, y + h))
# OCR auf Region anwenden
ocr_result = self._ocr_image(region_img)
regions.append({
"bbox": (x, y, x + w, y + h),
"area": area,
"text": ocr_result["text"],
"confidence": ocr_result["confidence"]
})
# Nach Y-Position sortieren (oben nach unten)
regions.sort(key=lambda r: r["bbox"][1])
return regions
"""Delegate to ImageProcessor."""
return self._image_processor.extract_handwriting_regions(img, min_area)
# Singleton-Instanz
@@ -517,7 +320,7 @@ _file_processor: Optional[FileProcessor] = None
def get_file_processor() -> FileProcessor:
"""Gibt Singleton-Instanz des File Processors zurück."""
"""Gibt Singleton-Instanz des File Processors zurueck."""
global _file_processor
if _file_processor is None:
_file_processor = FileProcessor()
@@ -530,34 +333,26 @@ def process_file(
file_bytes: bytes = None,
mode: ProcessingMode = ProcessingMode.MIXED
) -> ProcessingResult:
"""
Convenience function zum Verarbeiten einer Datei.
Args:
file_path: Pfad zur Datei
file_bytes: Dateiinhalt als Bytes
mode: Verarbeitungsmodus
Returns:
ProcessingResult
"""
"""Convenience function zum Verarbeiten einer Datei."""
processor = get_file_processor()
return processor.process(file_path, file_bytes, mode)
def extract_text_from_pdf(file_path: str = None, file_bytes: bytes = None) -> str:
def extract_text_from_pdf(
file_path: str = None, file_bytes: bytes = None
) -> str:
"""Extrahiert Text aus einer PDF-Datei."""
result = process_file(file_path, file_bytes, ProcessingMode.TEXT_EXTRACT)
return result.text
def ocr_image(file_path: str = None, file_bytes: bytes = None) -> str:
"""Führt OCR auf einem Bild aus."""
"""Fuehrt OCR auf einem Bild aus."""
result = process_file(file_path, file_bytes, ProcessingMode.OCR_PRINTED)
return result.text
def ocr_handwriting(file_path: str = None, file_bytes: bytes = None) -> str:
"""Führt Handschrift-OCR auf einem Bild aus."""
"""Fuehrt Handschrift-OCR auf einem Bild aus."""
result = process_file(file_path, file_bytes, ProcessingMode.OCR_HANDWRITING)
return result.text

View File

@@ -0,0 +1,46 @@
"""
Shared types for file processing and image processing modules.
"""
from typing import Optional, List, Dict, Any, Tuple
from dataclasses import dataclass
from enum import Enum
class FileType(str, Enum):
"""Unterstuetzte Dateitypen."""
PDF = "pdf"
IMAGE = "image"
DOCX = "docx"
DOC = "doc"
TXT = "txt"
UNKNOWN = "unknown"
class ProcessingMode(str, Enum):
"""Verarbeitungsmodi."""
OCR_HANDWRITING = "ocr_handwriting" # Handschrifterkennung
OCR_PRINTED = "ocr_printed" # Gedruckter Text
TEXT_EXTRACT = "text_extract" # Textextraktion (PDF/DOCX)
MIXED = "mixed" # Kombiniert OCR + Textextraktion
@dataclass
class ProcessedRegion:
"""Ein erkannter Textbereich."""
text: str
confidence: float
bbox: Tuple[int, int, int, int] # x1, y1, x2, y2
page: int = 1
@dataclass
class ProcessingResult:
"""Ergebnis der Dokumentenverarbeitung."""
text: str
confidence: float
regions: List[ProcessedRegion]
page_count: int
file_type: FileType
processing_mode: ProcessingMode
metadata: Dict[str, Any]

View File

@@ -0,0 +1,213 @@
"""
Image Processing and OCR Service.
Handles:
- Image preprocessing for better OCR results (grayscale, denoising, binarization)
- PaddleOCR integration for text recognition
- Handwriting region extraction from scanned documents
Used by FileProcessor for image and PDF-to-image OCR workflows.
"""
import logging
from typing import Optional, List, Dict, Any, Tuple
import cv2
import numpy as np
from PIL import Image
from .file_processor_types import ProcessedRegion
logger = logging.getLogger(__name__)
class ImageProcessor:
"""
Image preprocessing and OCR for BreakPilot.
Supports:
- PaddleOCR for German handwriting and printed text
- OpenCV-based preprocessing (denoising, CLAHE, adaptive binarization)
- Handwriting region extraction for exam correction
"""
def __init__(self, ocr_lang: str = "de", use_gpu: bool = False):
self.ocr_lang = ocr_lang
self.use_gpu = use_gpu
self._ocr_engine = None
@property
def ocr_engine(self):
"""Lazy-Loading des OCR-Engines."""
if self._ocr_engine is None:
self._ocr_engine = self._init_ocr_engine()
return self._ocr_engine
def _init_ocr_engine(self):
"""Initialisiert PaddleOCR oder Fallback."""
try:
from paddleocr import PaddleOCR
return PaddleOCR(
use_angle_cls=True,
lang='german',
use_gpu=self.use_gpu,
show_log=False
)
except ImportError:
logger.warning("PaddleOCR nicht installiert - verwende Fallback")
return None
def preprocess_image(self, img: Image.Image) -> Image.Image:
"""
Vorverarbeitung des Bildes fuer bessere OCR-Ergebnisse.
- Konvertierung zu Graustufen
- Kontrastverstaerkung
- Rauschunterdrueckung
- Binarisierung
"""
# PIL zu OpenCV
cv_img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
# Zu Graustufen konvertieren
gray = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY)
# Rauschunterdrueckung
denoised = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)
# Kontrastverstaerkung (CLAHE)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
enhanced = clahe.apply(denoised)
# Adaptive Binarisierung
binary = cv2.adaptiveThreshold(
enhanced,
255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY,
11,
2
)
# Zurueck zu PIL
return Image.fromarray(binary)
def ocr_image(self, img: Image.Image) -> Dict[str, Any]:
"""
Fuehrt OCR auf einem Bild aus.
Returns:
Dict mit text, confidence und regions
"""
if self.ocr_engine is None:
return {
"text": "[OCR nicht verfuegbar - bitte PaddleOCR installieren]",
"confidence": 0.0,
"regions": []
}
# PIL zu numpy array
img_array = np.array(img)
# Wenn Graustufen, zu RGB konvertieren (PaddleOCR erwartet RGB)
if len(img_array.shape) == 2:
img_array = cv2.cvtColor(img_array, cv2.COLOR_GRAY2RGB)
# OCR ausfuehren
result = self.ocr_engine.ocr(img_array, cls=True)
if not result or not result[0]:
return {"text": "", "confidence": 0.0, "regions": []}
all_text = []
all_regions = []
total_confidence = 0.0
for line in result[0]:
bbox_points = line[0] # [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
text, confidence = line[1]
# Bounding Box zu x1, y1, x2, y2 konvertieren
x_coords = [p[0] for p in bbox_points]
y_coords = [p[1] for p in bbox_points]
bbox = (
int(min(x_coords)),
int(min(y_coords)),
int(max(x_coords)),
int(max(y_coords))
)
all_text.append(text)
all_regions.append(ProcessedRegion(
text=text,
confidence=confidence,
bbox=bbox
))
total_confidence += confidence
avg_confidence = total_confidence / len(all_regions) if all_regions else 0.0
return {
"text": "\n".join(all_text),
"confidence": avg_confidence,
"regions": all_regions
}
def extract_handwriting_regions(
self,
img: Image.Image,
min_area: int = 500
) -> List[Dict[str, Any]]:
"""
Erkennt und extrahiert handschriftliche Bereiche aus einem Bild.
Nuetzlich fuer Klausuren mit gedruckten Fragen und handschriftlichen Antworten.
Args:
img: Eingabebild
min_area: Minimale Flaeche fuer erkannte Regionen
Returns:
Liste von Regionen mit Koordinaten und erkanntem Text
"""
# Bildvorverarbeitung
cv_img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
gray = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY)
# Kanten erkennen
edges = cv2.Canny(gray, 50, 150)
# Morphologische Operationen zum Verbinden
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 5))
dilated = cv2.dilate(edges, kernel, iterations=2)
# Konturen finden
contours, _ = cv2.findContours(
dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
)
regions = []
for contour in contours:
area = cv2.contourArea(contour)
if area < min_area:
continue
x, y, w, h = cv2.boundingRect(contour)
# Region ausschneiden
region_img = img.crop((x, y, x + w, y + h))
# OCR auf Region anwenden
ocr_result = self.ocr_image(region_img)
regions.append({
"bbox": (x, y, x + w, y + h),
"area": area,
"text": ocr_result["text"],
"confidence": ocr_result["confidence"]
})
# Nach Y-Position sortieren (oben nach unten)
regions.sort(key=lambda r: r["bbox"][1])
return regions

View File

@@ -0,0 +1,85 @@
"""
PDF Models - Dataclasses fuer PDF-Generierung.
Enthaelt alle Datenmodelle die von PDFService und den Convenience-Funktionen
in pdf_service.py verwendet werden.
"""
from dataclasses import dataclass
from typing import Any, Dict, List, Optional
@dataclass
class SchoolInfo:
"""Schulinformationen fuer Header."""
name: str
address: str
phone: str
email: str
logo_path: Optional[str] = None
website: Optional[str] = None
principal: Optional[str] = None
@dataclass
class LetterData:
"""Daten fuer Elternbrief-PDF."""
recipient_name: str
recipient_address: str
student_name: str
student_class: str
subject: str
content: str
date: str
teacher_name: str
teacher_title: Optional[str] = None
school_info: Optional[SchoolInfo] = None
letter_type: str = "general" # general, halbjahr, fehlzeiten, elternabend, lob
tone: str = "professional"
legal_references: Optional[List[Dict[str, str]]] = None
gfk_principles_applied: Optional[List[str]] = None
@dataclass
class CertificateData:
"""Daten fuer Zeugnis-PDF."""
student_name: str
student_birthdate: str
student_class: str
school_year: str
certificate_type: str # halbjahr, jahres, abschluss
subjects: List[Dict[str, Any]] # [{name, grade, note}]
attendance: Dict[str, int] # {days_absent, days_excused, days_unexcused}
remarks: Optional[str] = None
class_teacher: str = ""
principal: str = ""
school_info: Optional[SchoolInfo] = None
issue_date: str = ""
social_behavior: Optional[str] = None # A, B, C, D
work_behavior: Optional[str] = None # A, B, C, D
@dataclass
class StudentInfo:
"""Schuelerinformationen fuer Korrektur-PDFs."""
student_id: str
name: str
class_name: str
@dataclass
class CorrectionData:
"""Daten fuer Korrektur-Uebersicht PDF."""
student: StudentInfo
exam_title: str
subject: str
date: str
max_points: int
achieved_points: int
grade: str
percentage: float
corrections: List[Dict[str, Any]] # [{question, answer, points, feedback}]
teacher_notes: str = ""
ai_feedback: str = ""
grade_distribution: Optional[Dict[str, int]] = None # {note: anzahl}
class_average: Optional[float] = None

View File

@@ -1,115 +1,55 @@
"""
PDF Service - Zentrale PDF-Generierung für BreakPilot.
PDF Service - Zentrale PDF-Generierung fuer BreakPilot.
Shared Service für:
Shared Service fuer:
- Letters (Elternbriefe)
- Zeugnisse (Schulzeugnisse)
- Correction (Korrektur-Übersichten)
- Correction (Korrektur-Uebersichten)
Verwendet WeasyPrint für PDF-Rendering und Jinja2 für Templates.
Verwendet WeasyPrint fuer PDF-Rendering und Jinja2 fuer Templates.
Datenmodelle: services/pdf_models.py
HTML-Templates: services/pdf_templates.py
"""
import logging
import os
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, Optional, List
from dataclasses import dataclass
from typing import Any, Dict, Optional
from jinja2 import Environment, FileSystemLoader, select_autoescape
from weasyprint import HTML, CSS
from weasyprint.text.fonts import FontConfiguration
# Re-export models for backward compatibility
from .pdf_models import (
SchoolInfo,
LetterData,
CertificateData,
StudentInfo,
CorrectionData,
)
from .pdf_templates import (
get_base_css,
get_letter_template_html,
get_certificate_template_html,
get_correction_template_html,
)
logger = logging.getLogger(__name__)
# Template directory
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "pdf"
@dataclass
class SchoolInfo:
"""Schulinformationen für Header."""
name: str
address: str
phone: str
email: str
logo_path: Optional[str] = None
website: Optional[str] = None
principal: Optional[str] = None
@dataclass
class LetterData:
"""Daten für Elternbrief-PDF."""
recipient_name: str
recipient_address: str
student_name: str
student_class: str
subject: str
content: str
date: str
teacher_name: str
teacher_title: Optional[str] = None
school_info: Optional[SchoolInfo] = None
letter_type: str = "general" # general, halbjahr, fehlzeiten, elternabend, lob
tone: str = "professional"
legal_references: Optional[List[Dict[str, str]]] = None
gfk_principles_applied: Optional[List[str]] = None
@dataclass
class CertificateData:
"""Daten für Zeugnis-PDF."""
student_name: str
student_birthdate: str
student_class: str
school_year: str
certificate_type: str # halbjahr, jahres, abschluss
subjects: List[Dict[str, Any]] # [{name, grade, note}]
attendance: Dict[str, int] # {days_absent, days_excused, days_unexcused}
remarks: Optional[str] = None
class_teacher: str = ""
principal: str = ""
school_info: Optional[SchoolInfo] = None
issue_date: str = ""
social_behavior: Optional[str] = None # A, B, C, D
work_behavior: Optional[str] = None # A, B, C, D
@dataclass
class StudentInfo:
"""Schülerinformationen für Korrektur-PDFs."""
student_id: str
name: str
class_name: str
@dataclass
class CorrectionData:
"""Daten für Korrektur-Übersicht PDF."""
student: StudentInfo
exam_title: str
subject: str
date: str
max_points: int
achieved_points: int
grade: str
percentage: float
corrections: List[Dict[str, Any]] # [{question, answer, points, feedback}]
teacher_notes: str = ""
ai_feedback: str = ""
grade_distribution: Optional[Dict[str, int]] = None # {note: anzahl}
class_average: Optional[float] = None
class PDFService:
"""
Zentrale PDF-Generierung für BreakPilot.
Zentrale PDF-Generierung fuer BreakPilot.
Unterstützt:
Unterstuetzt:
- Elternbriefe mit GFK-Prinzipien und rechtlichen Referenzen
- Schulzeugnisse (Halbjahr, Jahres, Abschluss)
- Korrektur-Übersichten für Klausuren
- Korrektur-Uebersichten fuer Klausuren
"""
def __init__(self, templates_dir: Optional[Path] = None):
@@ -143,7 +83,7 @@ class PDFService:
@staticmethod
def _date_format(value: str, format_str: str = "%d.%m.%Y") -> str:
"""Formatiert Datum für deutsche Darstellung."""
"""Formatiert Datum fuer deutsche Darstellung."""
if not value:
return ""
try:
@@ -154,10 +94,10 @@ class PDFService:
@staticmethod
def _grade_color(grade: str) -> str:
"""Gibt Farbe basierend auf Note zurück."""
"""Gibt Farbe basierend auf Note zurueck."""
grade_colors = {
"1": "#27ae60", # Grün
"2": "#2ecc71", # Hellgrün
"1": "#27ae60", # Gruen
"2": "#2ecc71", # Hellgruen
"3": "#f1c40f", # Gelb
"4": "#e67e22", # Orange
"5": "#e74c3c", # Rot
@@ -170,227 +110,12 @@ class PDFService:
return grade_colors.get(str(grade), "#333333")
def _get_base_css(self) -> str:
"""Gibt Basis-CSS für alle PDFs zurück."""
return """
@page {
size: A4;
margin: 2cm 2.5cm;
@top-right {
content: counter(page) " / " counter(pages);
font-size: 9pt;
color: #666;
}
}
body {
font-family: 'DejaVu Sans', 'Liberation Sans', Arial, sans-serif;
font-size: 11pt;
line-height: 1.5;
color: #333;
}
h1, h2, h3 {
font-weight: bold;
margin-top: 1em;
margin-bottom: 0.5em;
}
h1 { font-size: 16pt; }
h2 { font-size: 14pt; }
h3 { font-size: 12pt; }
.header {
border-bottom: 2px solid #2c3e50;
padding-bottom: 15px;
margin-bottom: 20px;
}
.school-name {
font-size: 18pt;
font-weight: bold;
color: #2c3e50;
}
.school-info {
font-size: 9pt;
color: #666;
}
.letter-date {
text-align: right;
margin-bottom: 20px;
}
.recipient {
margin-bottom: 30px;
}
.subject {
font-weight: bold;
margin-bottom: 20px;
}
.content {
text-align: justify;
margin-bottom: 30px;
}
.signature {
margin-top: 40px;
}
.legal-references {
font-size: 9pt;
color: #666;
border-top: 1px solid #ddd;
margin-top: 30px;
padding-top: 10px;
}
.gfk-badge {
display: inline-block;
background: #e8f5e9;
color: #27ae60;
font-size: 8pt;
padding: 2px 8px;
border-radius: 10px;
margin-right: 5px;
}
/* Zeugnis-Styles */
.certificate-header {
text-align: center;
margin-bottom: 30px;
}
.certificate-title {
font-size: 20pt;
font-weight: bold;
margin-bottom: 10px;
}
.student-info {
margin-bottom: 20px;
padding: 15px;
background: #f9f9f9;
border-radius: 5px;
}
.grades-table {
width: 100%;
border-collapse: collapse;
margin-bottom: 20px;
}
.grades-table th,
.grades-table td {
border: 1px solid #ddd;
padding: 8px 12px;
text-align: left;
}
.grades-table th {
background: #2c3e50;
color: white;
}
.grades-table tr:nth-child(even) {
background: #f9f9f9;
}
.grade-cell {
text-align: center;
font-weight: bold;
font-size: 12pt;
}
.attendance-box {
background: #fff3cd;
padding: 15px;
border-radius: 5px;
margin-bottom: 20px;
}
.signatures-row {
display: flex;
justify-content: space-between;
margin-top: 50px;
}
.signature-block {
text-align: center;
width: 40%;
}
.signature-line {
border-top: 1px solid #333;
margin-top: 40px;
padding-top: 5px;
}
/* Korrektur-Styles */
.exam-header {
background: #2c3e50;
color: white;
padding: 15px;
margin-bottom: 20px;
}
.result-box {
background: #e8f5e9;
padding: 20px;
text-align: center;
margin-bottom: 20px;
border-radius: 5px;
}
.result-grade {
font-size: 36pt;
font-weight: bold;
}
.result-points {
font-size: 14pt;
color: #666;
}
.corrections-list {
margin-bottom: 20px;
}
.correction-item {
border: 1px solid #ddd;
padding: 15px;
margin-bottom: 10px;
border-radius: 5px;
}
.correction-question {
font-weight: bold;
margin-bottom: 5px;
}
.correction-feedback {
background: #fff8e1;
padding: 10px;
margin-top: 10px;
border-left: 3px solid #ffc107;
font-size: 10pt;
}
.stats-table {
width: 100%;
margin-top: 20px;
}
.stats-table td {
padding: 5px 10px;
}
"""
"""Gibt Basis-CSS fuer alle PDFs zurueck (delegiert an pdf_templates)."""
return get_base_css()
def generate_letter_pdf(self, data: LetterData) -> bytes:
"""
Generiert PDF für Elternbrief.
Generiert PDF fuer Elternbrief.
Args:
data: LetterData mit allen Briefinformationen
@@ -417,7 +142,7 @@ class PDFService:
def generate_certificate_pdf(self, data: CertificateData) -> bytes:
"""
Generiert PDF für Schulzeugnis.
Generiert PDF fuer Schulzeugnis.
Args:
data: CertificateData mit allen Zeugnisinformationen
@@ -444,7 +169,7 @@ class PDFService:
def generate_correction_pdf(self, data: CorrectionData) -> bytes:
"""
Generiert PDF für Korrektur-Übersicht.
Generiert PDF fuer Korrektur-Uebersicht.
Args:
data: CorrectionData mit allen Korrekturinformationen
@@ -470,322 +195,29 @@ class PDFService:
return pdf_bytes
def _get_letter_template(self):
"""Gibt Letter-Template zurück (inline falls Datei nicht existiert)."""
"""Gibt Letter-Template zurueck (inline falls Datei nicht existiert)."""
template_path = self.templates_dir / "letter.html"
if template_path.exists():
return self.jinja_env.get_template("letter.html")
# Inline-Template als Fallback
return self.jinja_env.from_string(self._get_letter_template_html())
return self.jinja_env.from_string(get_letter_template_html())
def _get_certificate_template(self):
"""Gibt Certificate-Template zurück."""
"""Gibt Certificate-Template zurueck."""
template_path = self.templates_dir / "certificate.html"
if template_path.exists():
return self.jinja_env.get_template("certificate.html")
return self.jinja_env.from_string(self._get_certificate_template_html())
return self.jinja_env.from_string(get_certificate_template_html())
def _get_correction_template(self):
"""Gibt Correction-Template zurück."""
"""Gibt Correction-Template zurueck."""
template_path = self.templates_dir / "correction.html"
if template_path.exists():
return self.jinja_env.get_template("correction.html")
return self.jinja_env.from_string(self._get_correction_template_html())
@staticmethod
def _get_letter_template_html() -> str:
"""Inline HTML-Template für Elternbriefe."""
return """
<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<title>{{ data.subject }}</title>
</head>
<body>
<div class="header">
{% if data.school_info %}
<div class="school-name">{{ data.school_info.name }}</div>
<div class="school-info">
{{ data.school_info.address }}<br>
Tel: {{ data.school_info.phone }} | E-Mail: {{ data.school_info.email }}
{% if data.school_info.website %} | {{ data.school_info.website }}{% endif %}
</div>
{% else %}
<div class="school-name">Schule</div>
{% endif %}
</div>
<div class="letter-date">
{{ data.date }}
</div>
<div class="recipient">
{{ data.recipient_name }}<br>
{{ data.recipient_address | replace('\\n', '<br>') | safe }}
</div>
<div class="subject">
Betreff: {{ data.subject }}
</div>
<div class="meta-info" style="font-size: 10pt; color: #666; margin-bottom: 20px;">
Schüler/in: {{ data.student_name }} | Klasse: {{ data.student_class }}
</div>
<div class="content">
{{ data.content | replace('\\n', '<br>') | safe }}
</div>
{% if data.gfk_principles_applied %}
<div style="margin-bottom: 20px;">
{% for principle in data.gfk_principles_applied %}
<span class="gfk-badge">✓ {{ principle }}</span>
{% endfor %}
</div>
{% endif %}
<div class="signature">
<p>Mit freundlichen Grüßen</p>
<p style="margin-top: 30px;">
{{ data.teacher_name }}
{% if data.teacher_title %}<br><span style="font-size: 10pt;">{{ data.teacher_title }}</span>{% endif %}
</p>
</div>
{% if data.legal_references %}
<div class="legal-references">
<strong>Rechtliche Grundlagen:</strong><br>
{% for ref in data.legal_references %}
{{ ref.law }} {{ ref.paragraph }}: {{ ref.title }}<br>
{% endfor %}
</div>
{% endif %}
<div style="font-size: 8pt; color: #999; margin-top: 30px; text-align: center;">
Erstellt mit BreakPilot | {{ generated_at }}
</div>
</body>
</html>
"""
@staticmethod
def _get_certificate_template_html() -> str:
"""Inline HTML-Template für Zeugnisse."""
return """
<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<title>Zeugnis - {{ data.student_name }}</title>
</head>
<body>
<div class="certificate-header">
{% if data.school_info %}
<div class="school-name" style="font-size: 14pt;">{{ data.school_info.name }}</div>
{% endif %}
<div class="certificate-title">
{% if data.certificate_type == 'halbjahr' %}
Halbjahreszeugnis
{% elif data.certificate_type == 'jahres' %}
Jahreszeugnis
{% else %}
Abschlusszeugnis
{% endif %}
</div>
<div>Schuljahr {{ data.school_year }}</div>
</div>
<div class="student-info">
<table style="width: 100%;">
<tr>
<td><strong>Name:</strong> {{ data.student_name }}</td>
<td><strong>Geburtsdatum:</strong> {{ data.student_birthdate }}</td>
</tr>
<tr>
<td><strong>Klasse:</strong> {{ data.student_class }}</td>
<td>&nbsp;</td>
</tr>
</table>
</div>
<h3>Leistungen</h3>
<table class="grades-table">
<thead>
<tr>
<th style="width: 70%;">Fach</th>
<th style="width: 15%;">Note</th>
<th style="width: 15%;">Punkte</th>
</tr>
</thead>
<tbody>
{% for subject in data.subjects %}
<tr>
<td>{{ subject.name }}</td>
<td class="grade-cell" style="color: {{ subject.grade | grade_color }};">
{{ subject.grade }}
</td>
<td class="grade-cell">{{ subject.points | default('-') }}</td>
</tr>
{% endfor %}
</tbody>
</table>
{% if data.social_behavior or data.work_behavior %}
<h3>Verhalten</h3>
<table class="grades-table" style="width: 50%;">
{% if data.social_behavior %}
<tr>
<td>Sozialverhalten</td>
<td class="grade-cell">{{ data.social_behavior }}</td>
</tr>
{% endif %}
{% if data.work_behavior %}
<tr>
<td>Arbeitsverhalten</td>
<td class="grade-cell">{{ data.work_behavior }}</td>
</tr>
{% endif %}
</table>
{% endif %}
<div class="attendance-box">
<strong>Versäumte Tage:</strong> {{ data.attendance.days_absent | default(0) }}
(davon entschuldigt: {{ data.attendance.days_excused | default(0) }},
unentschuldigt: {{ data.attendance.days_unexcused | default(0) }})
</div>
{% if data.remarks %}
<div style="margin-bottom: 20px;">
<strong>Bemerkungen:</strong><br>
{{ data.remarks }}
</div>
{% endif %}
<div style="margin-top: 30px;">
<strong>Ausgestellt am:</strong> {{ data.issue_date }}
</div>
<div class="signatures-row">
<div class="signature-block">
<div class="signature-line">{{ data.class_teacher }}</div>
<div style="font-size: 9pt;">Klassenlehrer/in</div>
</div>
<div class="signature-block">
<div class="signature-line">{{ data.principal }}</div>
<div style="font-size: 9pt;">Schulleiter/in</div>
</div>
</div>
<div style="text-align: center; margin-top: 40px;">
<div style="font-size: 9pt; color: #666;">Siegel der Schule</div>
</div>
</body>
</html>
"""
@staticmethod
def _get_correction_template_html() -> str:
"""Inline HTML-Template für Korrektur-Übersichten."""
return """
<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<title>Korrektur - {{ data.exam_title }}</title>
</head>
<body>
<div class="exam-header">
<h1 style="margin: 0; color: white;">{{ data.exam_title }}</h1>
<div>{{ data.subject }} | {{ data.date }}</div>
</div>
<div class="student-info">
<strong>{{ data.student.name }}</strong> | Klasse {{ data.student.class_name }}
</div>
<div class="result-box">
<div class="result-grade" style="color: {{ data.grade | grade_color }};">
Note: {{ data.grade }}
</div>
<div class="result-points">
{{ data.achieved_points }} von {{ data.max_points }} Punkten
({{ data.percentage | round(1) }}%)
</div>
</div>
<h3>Detaillierte Auswertung</h3>
<div class="corrections-list">
{% for item in data.corrections %}
<div class="correction-item">
<div class="correction-question">
{{ item.question }}
</div>
{% if item.answer %}
<div style="margin: 5px 0; font-style: italic; color: #555;">
<strong>Antwort:</strong> {{ item.answer }}
</div>
{% endif %}
<div>
<strong>Punkte:</strong> {{ item.points }}
</div>
{% if item.feedback %}
<div class="correction-feedback">
{{ item.feedback }}
</div>
{% endif %}
</div>
{% endfor %}
</div>
{% if data.teacher_notes %}
<div style="background: #e3f2fd; padding: 15px; border-radius: 5px; margin-bottom: 20px;">
<strong>Lehrerkommentar:</strong><br>
{{ data.teacher_notes }}
</div>
{% endif %}
{% if data.ai_feedback %}
<div style="background: #f3e5f5; padding: 15px; border-radius: 5px; margin-bottom: 20px;">
<strong>KI-Feedback:</strong><br>
{{ data.ai_feedback }}
</div>
{% endif %}
{% if data.class_average or data.grade_distribution %}
<h3>Klassenstatistik</h3>
<table class="stats-table">
{% if data.class_average %}
<tr>
<td><strong>Klassendurchschnitt:</strong></td>
<td>{{ data.class_average }}</td>
</tr>
{% endif %}
{% if data.grade_distribution %}
<tr>
<td><strong>Notenverteilung:</strong></td>
<td>
{% for grade, count in data.grade_distribution.items() %}
Note {{ grade }}: {{ count }}x{% if not loop.last %}, {% endif %}
{% endfor %}
</td>
</tr>
{% endif %}
</table>
{% endif %}
<div class="signature" style="margin-top: 40px;">
<p style="font-size: 9pt; color: #666;">Datum: {{ data.date }}</p>
</div>
<div style="font-size: 8pt; color: #999; margin-top: 30px; text-align: center;">
Erstellt mit BreakPilot | {{ generated_at }}
</div>
</body>
</html>
"""
return self.jinja_env.from_string(get_correction_template_html())
# Convenience functions for direct usage
@@ -793,7 +225,7 @@ _pdf_service: Optional[PDFService] = None
def get_pdf_service() -> PDFService:
"""Gibt Singleton-Instanz des PDF-Service zurück."""
"""Gibt Singleton-Instanz des PDF-Service zurueck."""
global _pdf_service
if _pdf_service is None:
_pdf_service = PDFService()

View File

@@ -0,0 +1,519 @@
"""
PDF Templates - Inline HTML-Templates und CSS fuer PDF-Generierung.
Fallback-Templates die verwendet werden wenn keine externen HTML-Dateien
im templates/pdf/ Verzeichnis vorhanden sind.
"""
def get_base_css() -> str:
"""Basis-CSS fuer alle PDFs (A4, Typografie, Komponenten-Styles)."""
return """
@page {
size: A4;
margin: 2cm 2.5cm;
@top-right {
content: counter(page) " / " counter(pages);
font-size: 9pt;
color: #666;
}
}
body {
font-family: 'DejaVu Sans', 'Liberation Sans', Arial, sans-serif;
font-size: 11pt;
line-height: 1.5;
color: #333;
}
h1, h2, h3 {
font-weight: bold;
margin-top: 1em;
margin-bottom: 0.5em;
}
h1 { font-size: 16pt; }
h2 { font-size: 14pt; }
h3 { font-size: 12pt; }
.header {
border-bottom: 2px solid #2c3e50;
padding-bottom: 15px;
margin-bottom: 20px;
}
.school-name {
font-size: 18pt;
font-weight: bold;
color: #2c3e50;
}
.school-info {
font-size: 9pt;
color: #666;
}
.letter-date {
text-align: right;
margin-bottom: 20px;
}
.recipient {
margin-bottom: 30px;
}
.subject {
font-weight: bold;
margin-bottom: 20px;
}
.content {
text-align: justify;
margin-bottom: 30px;
}
.signature {
margin-top: 40px;
}
.legal-references {
font-size: 9pt;
color: #666;
border-top: 1px solid #ddd;
margin-top: 30px;
padding-top: 10px;
}
.gfk-badge {
display: inline-block;
background: #e8f5e9;
color: #27ae60;
font-size: 8pt;
padding: 2px 8px;
border-radius: 10px;
margin-right: 5px;
}
/* Zeugnis-Styles */
.certificate-header {
text-align: center;
margin-bottom: 30px;
}
.certificate-title {
font-size: 20pt;
font-weight: bold;
margin-bottom: 10px;
}
.student-info {
margin-bottom: 20px;
padding: 15px;
background: #f9f9f9;
border-radius: 5px;
}
.grades-table {
width: 100%;
border-collapse: collapse;
margin-bottom: 20px;
}
.grades-table th,
.grades-table td {
border: 1px solid #ddd;
padding: 8px 12px;
text-align: left;
}
.grades-table th {
background: #2c3e50;
color: white;
}
.grades-table tr:nth-child(even) {
background: #f9f9f9;
}
.grade-cell {
text-align: center;
font-weight: bold;
font-size: 12pt;
}
.attendance-box {
background: #fff3cd;
padding: 15px;
border-radius: 5px;
margin-bottom: 20px;
}
.signatures-row {
display: flex;
justify-content: space-between;
margin-top: 50px;
}
.signature-block {
text-align: center;
width: 40%;
}
.signature-line {
border-top: 1px solid #333;
margin-top: 40px;
padding-top: 5px;
}
/* Korrektur-Styles */
.exam-header {
background: #2c3e50;
color: white;
padding: 15px;
margin-bottom: 20px;
}
.result-box {
background: #e8f5e9;
padding: 20px;
text-align: center;
margin-bottom: 20px;
border-radius: 5px;
}
.result-grade {
font-size: 36pt;
font-weight: bold;
}
.result-points {
font-size: 14pt;
color: #666;
}
.corrections-list {
margin-bottom: 20px;
}
.correction-item {
border: 1px solid #ddd;
padding: 15px;
margin-bottom: 10px;
border-radius: 5px;
}
.correction-question {
font-weight: bold;
margin-bottom: 5px;
}
.correction-feedback {
background: #fff8e1;
padding: 10px;
margin-top: 10px;
border-left: 3px solid #ffc107;
font-size: 10pt;
}
.stats-table {
width: 100%;
margin-top: 20px;
}
.stats-table td {
padding: 5px 10px;
}
"""
def get_letter_template_html() -> str:
"""Inline HTML-Template fuer Elternbriefe."""
return """
<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<title>{{ data.subject }}</title>
</head>
<body>
<div class="header">
{% if data.school_info %}
<div class="school-name">{{ data.school_info.name }}</div>
<div class="school-info">
{{ data.school_info.address }}<br>
Tel: {{ data.school_info.phone }} | E-Mail: {{ data.school_info.email }}
{% if data.school_info.website %} | {{ data.school_info.website }}{% endif %}
</div>
{% else %}
<div class="school-name">Schule</div>
{% endif %}
</div>
<div class="letter-date">
{{ data.date }}
</div>
<div class="recipient">
{{ data.recipient_name }}<br>
{{ data.recipient_address | replace('\\n', '<br>') | safe }}
</div>
<div class="subject">
Betreff: {{ data.subject }}
</div>
<div class="meta-info" style="font-size: 10pt; color: #666; margin-bottom: 20px;">
Schüler/in: {{ data.student_name }} | Klasse: {{ data.student_class }}
</div>
<div class="content">
{{ data.content | replace('\\n', '<br>') | safe }}
</div>
{% if data.gfk_principles_applied %}
<div style="margin-bottom: 20px;">
{% for principle in data.gfk_principles_applied %}
<span class="gfk-badge">✓ {{ principle }}</span>
{% endfor %}
</div>
{% endif %}
<div class="signature">
<p>Mit freundlichen Grüßen</p>
<p style="margin-top: 30px;">
{{ data.teacher_name }}
{% if data.teacher_title %}<br><span style="font-size: 10pt;">{{ data.teacher_title }}</span>{% endif %}
</p>
</div>
{% if data.legal_references %}
<div class="legal-references">
<strong>Rechtliche Grundlagen:</strong><br>
{% for ref in data.legal_references %}
{{ ref.law }} {{ ref.paragraph }}: {{ ref.title }}<br>
{% endfor %}
</div>
{% endif %}
<div style="font-size: 8pt; color: #999; margin-top: 30px; text-align: center;">
Erstellt mit BreakPilot | {{ generated_at }}
</div>
</body>
</html>
"""
def get_certificate_template_html() -> str:
"""Inline HTML-Template fuer Zeugnisse."""
return """
<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<title>Zeugnis - {{ data.student_name }}</title>
</head>
<body>
<div class="certificate-header">
{% if data.school_info %}
<div class="school-name" style="font-size: 14pt;">{{ data.school_info.name }}</div>
{% endif %}
<div class="certificate-title">
{% if data.certificate_type == 'halbjahr' %}
Halbjahreszeugnis
{% elif data.certificate_type == 'jahres' %}
Jahreszeugnis
{% else %}
Abschlusszeugnis
{% endif %}
</div>
<div>Schuljahr {{ data.school_year }}</div>
</div>
<div class="student-info">
<table style="width: 100%;">
<tr>
<td><strong>Name:</strong> {{ data.student_name }}</td>
<td><strong>Geburtsdatum:</strong> {{ data.student_birthdate }}</td>
</tr>
<tr>
<td><strong>Klasse:</strong> {{ data.student_class }}</td>
<td>&nbsp;</td>
</tr>
</table>
</div>
<h3>Leistungen</h3>
<table class="grades-table">
<thead>
<tr>
<th style="width: 70%;">Fach</th>
<th style="width: 15%;">Note</th>
<th style="width: 15%;">Punkte</th>
</tr>
</thead>
<tbody>
{% for subject in data.subjects %}
<tr>
<td>{{ subject.name }}</td>
<td class="grade-cell" style="color: {{ subject.grade | grade_color }};">
{{ subject.grade }}
</td>
<td class="grade-cell">{{ subject.points | default('-') }}</td>
</tr>
{% endfor %}
</tbody>
</table>
{% if data.social_behavior or data.work_behavior %}
<h3>Verhalten</h3>
<table class="grades-table" style="width: 50%;">
{% if data.social_behavior %}
<tr>
<td>Sozialverhalten</td>
<td class="grade-cell">{{ data.social_behavior }}</td>
</tr>
{% endif %}
{% if data.work_behavior %}
<tr>
<td>Arbeitsverhalten</td>
<td class="grade-cell">{{ data.work_behavior }}</td>
</tr>
{% endif %}
</table>
{% endif %}
<div class="attendance-box">
<strong>Versäumte Tage:</strong> {{ data.attendance.days_absent | default(0) }}
(davon entschuldigt: {{ data.attendance.days_excused | default(0) }},
unentschuldigt: {{ data.attendance.days_unexcused | default(0) }})
</div>
{% if data.remarks %}
<div style="margin-bottom: 20px;">
<strong>Bemerkungen:</strong><br>
{{ data.remarks }}
</div>
{% endif %}
<div style="margin-top: 30px;">
<strong>Ausgestellt am:</strong> {{ data.issue_date }}
</div>
<div class="signatures-row">
<div class="signature-block">
<div class="signature-line">{{ data.class_teacher }}</div>
<div style="font-size: 9pt;">Klassenlehrer/in</div>
</div>
<div class="signature-block">
<div class="signature-line">{{ data.principal }}</div>
<div style="font-size: 9pt;">Schulleiter/in</div>
</div>
</div>
<div style="text-align: center; margin-top: 40px;">
<div style="font-size: 9pt; color: #666;">Siegel der Schule</div>
</div>
</body>
</html>
"""
def get_correction_template_html() -> str:
"""Inline HTML-Template fuer Korrektur-Uebersichten."""
return """
<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<title>Korrektur - {{ data.exam_title }}</title>
</head>
<body>
<div class="exam-header">
<h1 style="margin: 0; color: white;">{{ data.exam_title }}</h1>
<div>{{ data.subject }} | {{ data.date }}</div>
</div>
<div class="student-info">
<strong>{{ data.student.name }}</strong> | Klasse {{ data.student.class_name }}
</div>
<div class="result-box">
<div class="result-grade" style="color: {{ data.grade | grade_color }};">
Note: {{ data.grade }}
</div>
<div class="result-points">
{{ data.achieved_points }} von {{ data.max_points }} Punkten
({{ data.percentage | round(1) }}%)
</div>
</div>
<h3>Detaillierte Auswertung</h3>
<div class="corrections-list">
{% for item in data.corrections %}
<div class="correction-item">
<div class="correction-question">
{{ item.question }}
</div>
{% if item.answer %}
<div style="margin: 5px 0; font-style: italic; color: #555;">
<strong>Antwort:</strong> {{ item.answer }}
</div>
{% endif %}
<div>
<strong>Punkte:</strong> {{ item.points }}
</div>
{% if item.feedback %}
<div class="correction-feedback">
{{ item.feedback }}
</div>
{% endif %}
</div>
{% endfor %}
</div>
{% if data.teacher_notes %}
<div style="background: #e3f2fd; padding: 15px; border-radius: 5px; margin-bottom: 20px;">
<strong>Lehrerkommentar:</strong><br>
{{ data.teacher_notes }}
</div>
{% endif %}
{% if data.ai_feedback %}
<div style="background: #f3e5f5; padding: 15px; border-radius: 5px; margin-bottom: 20px;">
<strong>KI-Feedback:</strong><br>
{{ data.ai_feedback }}
</div>
{% endif %}
{% if data.class_average or data.grade_distribution %}
<h3>Klassenstatistik</h3>
<table class="stats-table">
{% if data.class_average %}
<tr>
<td><strong>Klassendurchschnitt:</strong></td>
<td>{{ data.class_average }}</td>
</tr>
{% endif %}
{% if data.grade_distribution %}
<tr>
<td><strong>Notenverteilung:</strong></td>
<td>
{% for grade, count in data.grade_distribution.items() %}
Note {{ grade }}: {{ count }}x{% if not loop.last %}, {% endif %}
{% endfor %}
</td>
</tr>
{% endif %}
</table>
{% endif %}
<div class="signature" style="margin-top: 40px;">
<p style="font-size: 9pt; color: #666;">Datum: {{ data.date }}</p>
</div>
<div style="font-size: 8pt; color: #999; margin-top: 30px; text-align: center;">
Erstellt mit BreakPilot | {{ generated_at }}
</div>
</body>
</html>
"""