Initial commit: breakpilot-core - Shared Infrastructure

Docker Compose with 24+ services:
- PostgreSQL (PostGIS), Valkey, MinIO, Qdrant
- Vault (PKI/TLS), Nginx (Reverse Proxy)
- Backend Core API, Consent Service, Billing Service
- RAG Service, Embedding Service
- Gitea, Woodpecker CI/CD
- Night Scheduler, Health Aggregator
- Jitsi (Web/XMPP/JVB/Jicofo), Mailpit

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Boenisch
2026-02-11 23:47:13 +01:00
commit ad111d5e69
244 changed files with 84288 additions and 0 deletions

View File

@@ -0,0 +1,12 @@
"""Alert Agent Models."""
from .alert_item import AlertItem, AlertSource, AlertStatus
from .relevance_profile import RelevanceProfile, PriorityItem
__all__ = [
"AlertItem",
"AlertSource",
"AlertStatus",
"RelevanceProfile",
"PriorityItem",
]

View File

@@ -0,0 +1,174 @@
"""
AlertItem Model.
Repräsentiert einen einzelnen Alert aus Google Alerts (RSS oder Email).
"""
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from typing import Optional
import hashlib
import uuid
class AlertSource(str, Enum):
"""Quelle des Alerts."""
GOOGLE_ALERTS_RSS = "google_alerts_rss"
GOOGLE_ALERTS_EMAIL = "google_alerts_email"
MANUAL = "manual"
class AlertStatus(str, Enum):
"""Verarbeitungsstatus des Alerts."""
NEW = "new"
PROCESSED = "processed"
DUPLICATE = "duplicate"
SCORED = "scored"
REVIEWED = "reviewed"
ARCHIVED = "archived"
@dataclass
class AlertItem:
"""Ein einzelner Alert-Eintrag."""
# Identifikation
id: str = field(default_factory=lambda: str(uuid.uuid4()))
# Quelle
source: AlertSource = AlertSource.GOOGLE_ALERTS_RSS
topic_label: str = "" # z.B. "Schulrecht Bayern"
feed_url: Optional[str] = None
# Content
title: str = ""
url: str = ""
snippet: str = ""
article_text: Optional[str] = None
# Metadaten
lang: str = "de"
published_at: Optional[datetime] = None
fetched_at: datetime = field(default_factory=datetime.utcnow)
# Deduplication
canonical_url: Optional[str] = None
url_hash: Optional[str] = None
content_hash: Optional[str] = None # SimHash für fuzzy matching
# Verarbeitung
status: AlertStatus = AlertStatus.NEW
cluster_id: Optional[str] = None
# Relevanz (nach Scoring)
relevance_score: Optional[float] = None # 0.0 - 1.0
relevance_decision: Optional[str] = None # KEEP, DROP, REVIEW
relevance_reasons: list = field(default_factory=list)
relevance_summary: Optional[str] = None
def __post_init__(self):
"""Berechne Hashes nach Initialisierung."""
if not self.url_hash and self.url:
self.url_hash = self._compute_url_hash()
if not self.canonical_url and self.url:
self.canonical_url = self._normalize_url(self.url)
def _compute_url_hash(self) -> str:
"""Berechne SHA256 Hash der URL."""
normalized = self._normalize_url(self.url)
return hashlib.sha256(normalized.encode()).hexdigest()[:16]
def _normalize_url(self, url: str) -> str:
"""Normalisiere URL für Deduplizierung."""
# Entferne Tracking-Parameter
import urllib.parse
parsed = urllib.parse.urlparse(url)
# Google News Redirect auflösen
if "news.google.com" in parsed.netloc and "/articles/" in parsed.path:
# news.google.com URLs enthalten die echte URL base64-kodiert
# Hier nur Basic-Handling - echte Auflösung komplexer
pass
# Tracking-Parameter entfernen
tracking_params = {
"utm_source", "utm_medium", "utm_campaign", "utm_content", "utm_term",
"fbclid", "gclid", "ref", "source"
}
query_params = urllib.parse.parse_qs(parsed.query)
cleaned_params = {k: v for k, v in query_params.items()
if k.lower() not in tracking_params}
cleaned_query = urllib.parse.urlencode(cleaned_params, doseq=True)
# Rekonstruiere URL ohne Fragment
normalized = urllib.parse.urlunparse((
parsed.scheme,
parsed.netloc.lower(),
parsed.path.rstrip("/"),
parsed.params,
cleaned_query,
"" # No fragment
))
return normalized
def compute_content_hash(self, text: Optional[str] = None) -> str:
"""
Berechne SimHash des Inhalts für Fuzzy-Matching.
SimHash erlaubt es, ähnliche Texte zu erkennen, auch wenn sie
sich leicht unterscheiden (z.B. verschiedene Quellen zum selben Thema).
"""
from ..processing.dedup import compute_simhash
content = text or self.article_text or self.snippet or self.title
if content:
self.content_hash = compute_simhash(content)
return self.content_hash or ""
def to_dict(self) -> dict:
"""Konvertiere zu Dictionary für JSON/DB."""
return {
"id": self.id,
"source": self.source.value,
"topic_label": self.topic_label,
"feed_url": self.feed_url,
"title": self.title,
"url": self.url,
"snippet": self.snippet,
"article_text": self.article_text,
"lang": self.lang,
"published_at": self.published_at.isoformat() if self.published_at else None,
"fetched_at": self.fetched_at.isoformat() if self.fetched_at else None,
"canonical_url": self.canonical_url,
"url_hash": self.url_hash,
"content_hash": self.content_hash,
"status": self.status.value,
"cluster_id": self.cluster_id,
"relevance_score": self.relevance_score,
"relevance_decision": self.relevance_decision,
"relevance_reasons": self.relevance_reasons,
"relevance_summary": self.relevance_summary,
}
@classmethod
def from_dict(cls, data: dict) -> "AlertItem":
"""Erstelle AlertItem aus Dictionary."""
# Parse Enums
if "source" in data and isinstance(data["source"], str):
data["source"] = AlertSource(data["source"])
if "status" in data and isinstance(data["status"], str):
data["status"] = AlertStatus(data["status"])
# Parse Timestamps
for field_name in ["published_at", "fetched_at"]:
if field_name in data and isinstance(data[field_name], str):
data[field_name] = datetime.fromisoformat(data[field_name])
return cls(**data)
def __repr__(self) -> str:
return f"AlertItem(id={self.id[:8]}, title='{self.title[:50]}...', status={self.status.value})"

View File

@@ -0,0 +1,288 @@
"""
RelevanceProfile Model.
Definiert das Relevanzprofil eines Nutzers für die Alerts-Filterung.
Lernt über Zeit durch Feedback.
"""
from dataclasses import dataclass, field
from datetime import datetime
from typing import Optional
import uuid
@dataclass
class PriorityItem:
"""Ein Prioritäts-Thema im Profil."""
label: str # z.B. "Inklusion", "Datenschutz Schule"
weight: float = 0.5 # 0.0 - 1.0, höher = wichtiger
keywords: list = field(default_factory=list) # Zusätzliche Keywords
description: Optional[str] = None # Kontext für LLM
def to_dict(self) -> dict:
return {
"label": self.label,
"weight": self.weight,
"keywords": self.keywords,
"description": self.description,
}
@classmethod
def from_dict(cls, data: dict) -> "PriorityItem":
return cls(**data)
@dataclass
class RelevanceProfile:
"""
Nutzerprofil für Relevanz-Scoring.
Das Profil wird verwendet, um Alerts auf Relevanz zu prüfen.
Es enthält:
- Prioritäten: Themen die wichtig sind (mit Gewichtung)
- Ausschlüsse: Themen die ignoriert werden sollen
- Positive Beispiele: URLs/Titel die relevant waren
- Negative Beispiele: URLs/Titel die irrelevant waren
- Policies: Zusätzliche Regeln (z.B. nur deutsche Quellen)
"""
# Identifikation
id: str = field(default_factory=lambda: str(uuid.uuid4()))
user_id: Optional[str] = None # Falls benutzerspezifisch
# Relevanz-Kriterien
priorities: list = field(default_factory=list) # List[PriorityItem]
exclusions: list = field(default_factory=list) # Keywords zum Ausschließen
# Beispiele für Few-Shot Learning
positive_examples: list = field(default_factory=list) # Relevante Alerts
negative_examples: list = field(default_factory=list) # Irrelevante Alerts
# Policies
policies: dict = field(default_factory=dict)
# Metadaten
created_at: datetime = field(default_factory=datetime.utcnow)
updated_at: datetime = field(default_factory=datetime.utcnow)
# Statistiken
total_scored: int = 0
total_kept: int = 0
total_dropped: int = 0
accuracy_estimate: Optional[float] = None # Geschätzte Genauigkeit
def add_priority(self, label: str, weight: float = 0.5, **kwargs) -> None:
"""Füge ein Prioritäts-Thema hinzu."""
self.priorities.append(PriorityItem(
label=label,
weight=weight,
**kwargs
))
self.updated_at = datetime.utcnow()
def add_exclusion(self, keyword: str) -> None:
"""Füge ein Ausschluss-Keyword hinzu."""
if keyword not in self.exclusions:
self.exclusions.append(keyword)
self.updated_at = datetime.utcnow()
def add_positive_example(self, title: str, url: str, reason: str = "") -> None:
"""Füge ein positives Beispiel hinzu (für Few-Shot Learning)."""
self.positive_examples.append({
"title": title,
"url": url,
"reason": reason,
"added_at": datetime.utcnow().isoformat(),
})
# Begrenze auf letzte 20 Beispiele
self.positive_examples = self.positive_examples[-20:]
self.updated_at = datetime.utcnow()
def add_negative_example(self, title: str, url: str, reason: str = "") -> None:
"""Füge ein negatives Beispiel hinzu."""
self.negative_examples.append({
"title": title,
"url": url,
"reason": reason,
"added_at": datetime.utcnow().isoformat(),
})
# Begrenze auf letzte 20 Beispiele
self.negative_examples = self.negative_examples[-20:]
self.updated_at = datetime.utcnow()
def update_from_feedback(self, alert_title: str, alert_url: str,
is_relevant: bool, reason: str = "") -> None:
"""
Aktualisiere Profil basierend auf Nutzer-Feedback.
Args:
alert_title: Titel des Alerts
alert_url: URL des Alerts
is_relevant: True wenn der Nutzer den Alert als relevant markiert hat
reason: Optional - Grund für die Entscheidung
"""
if is_relevant:
self.add_positive_example(alert_title, alert_url, reason)
self.total_kept += 1
else:
self.add_negative_example(alert_title, alert_url, reason)
self.total_dropped += 1
self.total_scored += 1
# Aktualisiere Accuracy-Schätzung (vereinfacht)
if self.total_scored > 10:
# Hier könnte eine komplexere Berechnung erfolgen
# basierend auf Vergleich von Vorhersage vs. tatsächlichem Feedback
pass
def get_prompt_context(self) -> str:
"""
Generiere Kontext für LLM-Prompt.
Dieser Text wird in den System-Prompt des Relevanz-Scorers eingefügt.
"""
lines = ["## Relevanzprofil des Nutzers\n"]
# Prioritäten
if self.priorities:
lines.append("### Prioritäten (Themen von Interesse):")
for p in self.priorities:
if isinstance(p, dict):
p = PriorityItem.from_dict(p)
weight_label = "Sehr wichtig" if p.weight > 0.7 else "Wichtig" if p.weight > 0.4 else "Interessant"
lines.append(f"- **{p.label}** ({weight_label})")
if p.description:
lines.append(f" {p.description}")
if p.keywords:
lines.append(f" Keywords: {', '.join(p.keywords)}")
lines.append("")
# Ausschlüsse
if self.exclusions:
lines.append("### Ausschlüsse (ignorieren):")
lines.append(f"Themen mit diesen Keywords: {', '.join(self.exclusions)}")
lines.append("")
# Positive Beispiele
if self.positive_examples:
lines.append("### Beispiele für relevante Alerts:")
for ex in self.positive_examples[-5:]: # Letzte 5
lines.append(f"- \"{ex['title']}\"")
if ex.get("reason"):
lines.append(f" Grund: {ex['reason']}")
lines.append("")
# Negative Beispiele
if self.negative_examples:
lines.append("### Beispiele für irrelevante Alerts:")
for ex in self.negative_examples[-5:]: # Letzte 5
lines.append(f"- \"{ex['title']}\"")
if ex.get("reason"):
lines.append(f" Grund: {ex['reason']}")
lines.append("")
# Policies
if self.policies:
lines.append("### Zusätzliche Regeln:")
for key, value in self.policies.items():
lines.append(f"- {key}: {value}")
return "\n".join(lines)
def to_dict(self) -> dict:
"""Konvertiere zu Dictionary."""
return {
"id": self.id,
"user_id": self.user_id,
"priorities": [p.to_dict() if isinstance(p, PriorityItem) else p
for p in self.priorities],
"exclusions": self.exclusions,
"positive_examples": self.positive_examples,
"negative_examples": self.negative_examples,
"policies": self.policies,
"created_at": self.created_at.isoformat(),
"updated_at": self.updated_at.isoformat(),
"total_scored": self.total_scored,
"total_kept": self.total_kept,
"total_dropped": self.total_dropped,
"accuracy_estimate": self.accuracy_estimate,
}
@classmethod
def from_dict(cls, data: dict) -> "RelevanceProfile":
"""Erstelle RelevanceProfile aus Dictionary."""
# Parse Timestamps
for field_name in ["created_at", "updated_at"]:
if field_name in data and isinstance(data[field_name], str):
data[field_name] = datetime.fromisoformat(data[field_name])
# Parse Priorities
if "priorities" in data:
data["priorities"] = [
PriorityItem.from_dict(p) if isinstance(p, dict) else p
for p in data["priorities"]
]
return cls(**data)
@classmethod
def create_default_education_profile(cls) -> "RelevanceProfile":
"""
Erstelle ein Standard-Profil für Bildungsthemen.
Dieses Profil ist für Lehrkräfte/Schulpersonal optimiert.
"""
profile = cls()
# Bildungs-relevante Prioritäten
profile.add_priority(
"Inklusion",
weight=0.9,
keywords=["inklusiv", "Förderbedarf", "Behinderung", "Barrierefreiheit"],
description="Inklusive Bildung, Förderschulen, Nachteilsausgleich"
)
profile.add_priority(
"Datenschutz Schule",
weight=0.85,
keywords=["DSGVO", "Schülerfotos", "Einwilligung", "personenbezogene Daten"],
description="DSGVO in Schulen, Datenschutz bei Klassenfotos"
)
profile.add_priority(
"Schulrecht Bayern",
weight=0.8,
keywords=["BayEUG", "Schulordnung", "Kultusministerium", "Bayern"],
description="Bayerisches Schulrecht, Verordnungen"
)
profile.add_priority(
"Digitalisierung Schule",
weight=0.7,
keywords=["DigitalPakt", "Tablet-Klasse", "Lernplattform"],
description="Digitale Medien im Unterricht"
)
profile.add_priority(
"Elternarbeit",
weight=0.6,
keywords=["Elternbeirat", "Elternabend", "Kommunikation"],
description="Zusammenarbeit mit Eltern"
)
# Standard-Ausschlüsse
profile.exclusions = [
"Stellenanzeige",
"Praktikum gesucht",
"Werbung",
"Pressemitteilung", # Oft generisch
]
# Policies
profile.policies = {
"prefer_german_sources": True,
"max_age_days": 30, # Ältere Alerts ignorieren
"min_content_length": 100, # Sehr kurze Snippets ignorieren
}
return profile
def __repr__(self) -> str:
return f"RelevanceProfile(id={self.id[:8]}, priorities={len(self.priorities)}, examples={len(self.positive_examples) + len(self.negative_examples)})"

View File

@@ -0,0 +1,31 @@
"""
Pydantic Models für OpenAI-kompatible API.
"""
from .chat import (
ChatMessage,
ChatCompletionRequest,
ChatCompletionResponse,
ChatCompletionChunk,
ChatChoice,
ChatChoiceDelta,
Usage,
ToolCall,
FunctionCall,
Tool,
ToolFunction,
)
__all__ = [
"ChatMessage",
"ChatCompletionRequest",
"ChatCompletionResponse",
"ChatCompletionChunk",
"ChatChoice",
"ChatChoiceDelta",
"Usage",
"ToolCall",
"FunctionCall",
"Tool",
"ToolFunction",
]

View File

@@ -0,0 +1,135 @@
"""
OpenAI-kompatible Chat Completion Models.
Basiert auf OpenAI API Spezifikation:
https://platform.openai.com/docs/api-reference/chat/create
"""
from __future__ import annotations
from typing import Optional, Literal, Any, Union, List, Dict
from pydantic import BaseModel, Field
import time
import uuid
class FunctionCall(BaseModel):
"""Function call in einer Tool-Anfrage."""
name: str
arguments: str # JSON string
class ToolCall(BaseModel):
"""Tool Call vom Modell."""
id: str = Field(default_factory=lambda: f"call_{uuid.uuid4().hex[:12]}")
type: Literal["function"] = "function"
function: FunctionCall
class ChatMessage(BaseModel):
"""Eine Nachricht im Chat."""
role: Literal["system", "user", "assistant", "tool"]
content: Optional[str] = None
name: Optional[str] = None
tool_call_id: Optional[str] = None
tool_calls: Optional[list[ToolCall]] = None
class ToolFunction(BaseModel):
"""Definition einer Tool-Funktion."""
name: str
description: Optional[str] = None
parameters: dict[str, Any] = Field(default_factory=dict)
class Tool(BaseModel):
"""Tool-Definition für Function Calling."""
type: Literal["function"] = "function"
function: ToolFunction
class RequestMetadata(BaseModel):
"""Zusätzliche Metadaten für die Anfrage."""
playbook_id: Optional[str] = None
tenant_id: Optional[str] = None
user_id: Optional[str] = None
class ChatCompletionRequest(BaseModel):
"""Request für Chat Completions."""
model: str
messages: list[ChatMessage]
stream: bool = False
temperature: Optional[float] = Field(default=0.7, ge=0, le=2)
top_p: Optional[float] = Field(default=1.0, ge=0, le=1)
max_tokens: Optional[int] = Field(default=None, ge=1)
stop: Optional[Union[List[str], str]] = None
presence_penalty: Optional[float] = Field(default=0, ge=-2, le=2)
frequency_penalty: Optional[float] = Field(default=0, ge=-2, le=2)
user: Optional[str] = None
tools: Optional[list[Tool]] = None
tool_choice: Optional[Union[str, Dict[str, Any]]] = None
metadata: Optional[RequestMetadata] = None
class ChatChoice(BaseModel):
"""Ein Choice in der Response."""
index: int = 0
message: ChatMessage
finish_reason: Optional[Literal["stop", "length", "tool_calls", "content_filter"]] = None
class ChatChoiceDelta(BaseModel):
"""Delta für Streaming Response."""
role: Optional[str] = None
content: Optional[str] = None
tool_calls: Optional[list[ToolCall]] = None
class StreamChoice(BaseModel):
"""Choice in Streaming Response."""
index: int = 0
delta: ChatChoiceDelta
finish_reason: Optional[Literal["stop", "length", "tool_calls", "content_filter"]] = None
class Usage(BaseModel):
"""Token Usage Statistiken."""
prompt_tokens: int = 0
completion_tokens: int = 0
total_tokens: int = 0
class ChatCompletionResponse(BaseModel):
"""Response für Chat Completions (non-streaming)."""
id: str = Field(default_factory=lambda: f"chatcmpl-{uuid.uuid4().hex[:12]}")
object: Literal["chat.completion"] = "chat.completion"
created: int = Field(default_factory=lambda: int(time.time()))
model: str
choices: list[ChatChoice]
usage: Optional[Usage] = None
class ChatCompletionChunk(BaseModel):
"""Chunk für Streaming Response."""
id: str = Field(default_factory=lambda: f"chatcmpl-{uuid.uuid4().hex[:12]}")
object: Literal["chat.completion.chunk"] = "chat.completion.chunk"
created: int = Field(default_factory=lambda: int(time.time()))
model: str
choices: list[StreamChoice]
# Model Info
class ModelInfo(BaseModel):
"""Information über ein verfügbares Modell."""
id: str
object: Literal["model"] = "model"
created: int = Field(default_factory=lambda: int(time.time()))
owned_by: str = "breakpilot"
description: Optional[str] = None
context_length: int = 8192
class ModelListResponse(BaseModel):
"""Response für /v1/models."""
object: Literal["list"] = "list"
data: list[ModelInfo]

View File

@@ -0,0 +1,155 @@
-- ============================================================================
-- Abitur Documents Migration
-- ============================================================================
-- Creates tables for storing Abitur documents (NiBiS, etc.) persistently.
-- Run with: psql -h localhost -U breakpilot -d breakpilot_dev -f add_abitur_docs_tables.sql
--
-- Tables created:
-- 1. abitur_dokumente - Main document metadata
-- 2. abitur_dokumente_chunks - Text chunks for RAG (optional, Qdrant primary)
-- ============================================================================
-- ============================================================================
-- ENUMS
-- ============================================================================
DO $$ BEGIN
CREATE TYPE bundesland_enum AS ENUM (
'niedersachsen', 'bayern', 'baden_wuerttemberg', 'nordrhein_westfalen',
'hessen', 'sachsen', 'thueringen', 'berlin', 'hamburg',
'schleswig_holstein', 'bremen', 'brandenburg', 'mecklenburg_vorpommern',
'sachsen_anhalt', 'rheinland_pfalz', 'saarland'
);
EXCEPTION WHEN duplicate_object THEN NULL;
END $$;
DO $$ BEGIN
CREATE TYPE abitur_fach_enum AS ENUM (
'deutsch', 'englisch', 'mathematik', 'biologie', 'chemie', 'physik',
'geschichte', 'erdkunde', 'politik_wirtschaft', 'franzoesisch', 'spanisch',
'latein', 'griechisch', 'kunst', 'musik', 'sport', 'informatik',
'ev_religion', 'kath_religion', 'werte_normen', 'brc', 'bvw',
'ernaehrung', 'mechatronik', 'gesundheit_pflege', 'paedagogik_psychologie'
);
EXCEPTION WHEN duplicate_object THEN NULL;
END $$;
DO $$ BEGIN
CREATE TYPE abitur_niveau_enum AS ENUM ('eA', 'gA');
EXCEPTION WHEN duplicate_object THEN NULL;
END $$;
DO $$ BEGIN
CREATE TYPE abitur_dok_typ_enum AS ENUM (
'aufgabe', 'erwartungshorizont', 'deckblatt', 'material',
'hoerverstehen', 'sprachmittlung', 'bewertungsbogen'
);
EXCEPTION WHEN duplicate_object THEN NULL;
END $$;
DO $$ BEGIN
CREATE TYPE abitur_status_enum AS ENUM (
'pending', 'processing', 'recognized', 'confirmed', 'indexed', 'error'
);
EXCEPTION WHEN duplicate_object THEN NULL;
END $$;
-- ============================================================================
-- MAIN TABLE: abitur_dokumente
-- ============================================================================
CREATE TABLE IF NOT EXISTS abitur_dokumente (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-- File info
dateiname VARCHAR(500) NOT NULL,
original_dateiname VARCHAR(500) NOT NULL,
file_path VARCHAR(1000), -- MinIO path or local path
file_size BIGINT DEFAULT 0,
file_hash VARCHAR(64), -- SHA-256 for deduplication
-- Metadata
bundesland bundesland_enum NOT NULL DEFAULT 'niedersachsen',
fach abitur_fach_enum NOT NULL,
jahr INTEGER NOT NULL CHECK (jahr >= 2000 AND jahr <= 2100),
niveau abitur_niveau_enum NOT NULL DEFAULT 'eA',
typ abitur_dok_typ_enum NOT NULL DEFAULT 'aufgabe',
aufgaben_nummer VARCHAR(20), -- I, II, III, 1, 2, etc.
variante VARCHAR(50), -- BG, Tech, Wirt, etc.
-- Processing status
status abitur_status_enum NOT NULL DEFAULT 'pending',
confidence REAL DEFAULT 0.0,
-- Vector store integration
indexed BOOLEAN DEFAULT FALSE,
vector_ids TEXT[], -- Qdrant vector IDs
qdrant_collection VARCHAR(100) DEFAULT 'bp_nibis_eh',
-- Source tracking
source_dir VARCHAR(500), -- Original source directory
import_batch_id UUID, -- For batch imports
-- Timestamps
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),
indexed_at TIMESTAMPTZ
);
-- Indexes for common queries
CREATE INDEX IF NOT EXISTS idx_abitur_dok_bundesland ON abitur_dokumente(bundesland);
CREATE INDEX IF NOT EXISTS idx_abitur_dok_fach ON abitur_dokumente(fach);
CREATE INDEX IF NOT EXISTS idx_abitur_dok_jahr ON abitur_dokumente(jahr);
CREATE INDEX IF NOT EXISTS idx_abitur_dok_niveau ON abitur_dokumente(niveau);
CREATE INDEX IF NOT EXISTS idx_abitur_dok_typ ON abitur_dokumente(typ);
CREATE INDEX IF NOT EXISTS idx_abitur_dok_status ON abitur_dokumente(status);
CREATE INDEX IF NOT EXISTS idx_abitur_dok_indexed ON abitur_dokumente(indexed);
CREATE INDEX IF NOT EXISTS idx_abitur_dok_file_hash ON abitur_dokumente(file_hash);
-- Composite index for typical searches
CREATE INDEX IF NOT EXISTS idx_abitur_dok_search
ON abitur_dokumente(bundesland, fach, jahr, niveau);
-- ============================================================================
-- TRIGGER: Auto-update updated_at
-- ============================================================================
CREATE OR REPLACE FUNCTION update_abitur_dok_timestamp()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
DROP TRIGGER IF EXISTS trigger_abitur_dok_updated ON abitur_dokumente;
CREATE TRIGGER trigger_abitur_dok_updated
BEFORE UPDATE ON abitur_dokumente
FOR EACH ROW
EXECUTE FUNCTION update_abitur_dok_timestamp();
-- ============================================================================
-- HELPER VIEWS
-- ============================================================================
CREATE OR REPLACE VIEW v_abitur_dok_stats AS
SELECT
bundesland,
fach,
jahr,
COUNT(*) as total,
COUNT(*) FILTER (WHERE indexed = TRUE) as indexed_count,
COUNT(*) FILTER (WHERE typ = 'aufgabe') as aufgaben_count,
COUNT(*) FILTER (WHERE typ = 'erwartungshorizont') as ewh_count
FROM abitur_dokumente
GROUP BY bundesland, fach, jahr
ORDER BY jahr DESC, fach;
-- ============================================================================
-- SAMPLE DATA CHECK
-- ============================================================================
-- Show table structure
-- \d abitur_dokumente
SELECT 'Migration completed: abitur_dokumente table created' AS status;

View File

@@ -0,0 +1,293 @@
-- Migration: Add Multi-Agent Architecture Tables
-- Date: 2025-01-15
-- Description: Creates tables for agent sessions, memory store, and message audit
-- Enable required extensions
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
CREATE EXTENSION IF NOT EXISTS "pgcrypto";
-- ============================================================================
-- 1. Agent Sessions Table
-- ============================================================================
-- Stores agent session data with state, context, and checkpoints
CREATE TABLE IF NOT EXISTS agent_sessions (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
agent_type VARCHAR(50) NOT NULL,
user_id UUID REFERENCES users(id) ON DELETE SET NULL,
state VARCHAR(20) NOT NULL DEFAULT 'active'
CHECK (state IN ('active', 'paused', 'completed', 'failed', 'deleted')),
context JSONB DEFAULT '{}'::jsonb,
checkpoints JSONB DEFAULT '[]'::jsonb,
metadata JSONB DEFAULT '{}'::jsonb,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),
last_heartbeat TIMESTAMPTZ DEFAULT NOW()
);
-- Indexes for common queries
CREATE INDEX IF NOT EXISTS idx_agent_sessions_user
ON agent_sessions(user_id);
CREATE INDEX IF NOT EXISTS idx_agent_sessions_state
ON agent_sessions(state) WHERE state = 'active';
CREATE INDEX IF NOT EXISTS idx_agent_sessions_agent_type
ON agent_sessions(agent_type);
CREATE INDEX IF NOT EXISTS idx_agent_sessions_heartbeat
ON agent_sessions(last_heartbeat);
CREATE INDEX IF NOT EXISTS idx_agent_sessions_created
ON agent_sessions(created_at DESC);
-- GIN index for JSONB context queries
CREATE INDEX IF NOT EXISTS idx_agent_sessions_context
ON agent_sessions USING GIN (context jsonb_path_ops);
-- Comments for documentation
COMMENT ON TABLE agent_sessions IS 'Stores agent session state and checkpoints for recovery';
COMMENT ON COLUMN agent_sessions.agent_type IS 'Type: tutor-agent, grader-agent, quality-judge, alert-agent, orchestrator';
COMMENT ON COLUMN agent_sessions.state IS 'Session state: active, paused, completed, failed, deleted';
COMMENT ON COLUMN agent_sessions.context IS 'Session context data (entities, conversation state)';
COMMENT ON COLUMN agent_sessions.checkpoints IS 'Recovery checkpoints as JSON array';
COMMENT ON COLUMN agent_sessions.last_heartbeat IS 'Last heartbeat timestamp for liveness detection';
-- ============================================================================
-- 2. Agent Memory Table
-- ============================================================================
-- Long-term memory store for agents with TTL support
CREATE TABLE IF NOT EXISTS agent_memory (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
namespace VARCHAR(100) NOT NULL DEFAULT 'breakpilot',
key VARCHAR(500) NOT NULL,
value JSONB NOT NULL,
agent_id VARCHAR(50) NOT NULL,
access_count INTEGER DEFAULT 0,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),
last_accessed TIMESTAMPTZ,
expires_at TIMESTAMPTZ,
metadata JSONB DEFAULT '{}'::jsonb,
-- Unique constraint per namespace
CONSTRAINT agent_memory_namespace_key_unique UNIQUE (namespace, key)
);
-- Indexes for efficient queries
CREATE INDEX IF NOT EXISTS idx_agent_memory_namespace
ON agent_memory(namespace);
CREATE INDEX IF NOT EXISTS idx_agent_memory_agent
ON agent_memory(agent_id);
CREATE INDEX IF NOT EXISTS idx_agent_memory_expires
ON agent_memory(expires_at) WHERE expires_at IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_agent_memory_key_pattern
ON agent_memory(key varchar_pattern_ops);
CREATE INDEX IF NOT EXISTS idx_agent_memory_access_count
ON agent_memory(access_count DESC);
-- GIN index for value queries
CREATE INDEX IF NOT EXISTS idx_agent_memory_value
ON agent_memory USING GIN (value jsonb_path_ops);
-- Comments
COMMENT ON TABLE agent_memory IS 'Long-term memory store for agents with TTL';
COMMENT ON COLUMN agent_memory.namespace IS 'Namespace for isolation (default: breakpilot)';
COMMENT ON COLUMN agent_memory.key IS 'Memory key (e.g., evaluation:math:student123)';
COMMENT ON COLUMN agent_memory.value IS 'Stored value as JSONB';
COMMENT ON COLUMN agent_memory.access_count IS 'Number of times this memory was accessed';
COMMENT ON COLUMN agent_memory.expires_at IS 'When this memory expires (NULL = never)';
-- ============================================================================
-- 3. Agent Messages Table (Audit Trail)
-- ============================================================================
-- Stores all inter-agent messages for audit and debugging
CREATE TABLE IF NOT EXISTS agent_messages (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
sender VARCHAR(50) NOT NULL,
receiver VARCHAR(50) NOT NULL,
message_type VARCHAR(50) NOT NULL,
payload JSONB NOT NULL,
priority INTEGER DEFAULT 1 CHECK (priority BETWEEN 0 AND 3),
correlation_id UUID,
reply_to VARCHAR(50),
created_at TIMESTAMPTZ DEFAULT NOW(),
-- Partition hint for future partitioning
created_date DATE GENERATED ALWAYS AS (DATE(created_at)) STORED
);
-- Indexes for message queries
CREATE INDEX IF NOT EXISTS idx_agent_messages_correlation
ON agent_messages(correlation_id) WHERE correlation_id IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_agent_messages_sender
ON agent_messages(sender);
CREATE INDEX IF NOT EXISTS idx_agent_messages_receiver
ON agent_messages(receiver);
CREATE INDEX IF NOT EXISTS idx_agent_messages_type
ON agent_messages(message_type);
CREATE INDEX IF NOT EXISTS idx_agent_messages_created
ON agent_messages(created_at DESC);
CREATE INDEX IF NOT EXISTS idx_agent_messages_date
ON agent_messages(created_date);
-- Comments
COMMENT ON TABLE agent_messages IS 'Audit trail for inter-agent communication';
COMMENT ON COLUMN agent_messages.priority IS '0=LOW, 1=NORMAL, 2=HIGH, 3=CRITICAL';
COMMENT ON COLUMN agent_messages.correlation_id IS 'Links request/response pairs';
COMMENT ON COLUMN agent_messages.created_date IS 'Partition column for future table partitioning';
-- ============================================================================
-- 4. Helper Functions
-- ============================================================================
-- Function to clean up expired memories
CREATE OR REPLACE FUNCTION cleanup_expired_agent_memory()
RETURNS INTEGER AS $$
DECLARE
deleted_count INTEGER;
BEGIN
DELETE FROM agent_memory
WHERE expires_at IS NOT NULL AND expires_at < NOW();
GET DIAGNOSTICS deleted_count = ROW_COUNT;
RETURN deleted_count;
END;
$$ LANGUAGE plpgsql;
COMMENT ON FUNCTION cleanup_expired_agent_memory() IS 'Removes expired memory entries, returns count';
-- Function to clean up stale sessions
CREATE OR REPLACE FUNCTION cleanup_stale_agent_sessions(max_age_hours INTEGER DEFAULT 48)
RETURNS INTEGER AS $$
DECLARE
updated_count INTEGER;
BEGIN
UPDATE agent_sessions
SET state = 'failed',
updated_at = NOW(),
context = context || '{"failure_reason": "heartbeat_timeout"}'::jsonb
WHERE state = 'active'
AND last_heartbeat < NOW() - (max_age_hours || ' hours')::INTERVAL;
GET DIAGNOSTICS updated_count = ROW_COUNT;
RETURN updated_count;
END;
$$ LANGUAGE plpgsql;
COMMENT ON FUNCTION cleanup_stale_agent_sessions(INTEGER) IS 'Marks stale sessions as failed, returns count';
-- Function to update session heartbeat
CREATE OR REPLACE FUNCTION update_session_heartbeat(session_uuid UUID)
RETURNS BOOLEAN AS $$
BEGIN
UPDATE agent_sessions
SET last_heartbeat = NOW(), updated_at = NOW()
WHERE id = session_uuid AND state = 'active';
RETURN FOUND;
END;
$$ LANGUAGE plpgsql;
COMMENT ON FUNCTION update_session_heartbeat(UUID) IS 'Updates session heartbeat, returns true if found';
-- ============================================================================
-- 5. Triggers
-- ============================================================================
-- Auto-update updated_at on agent_sessions
CREATE OR REPLACE FUNCTION trigger_set_updated_at()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
DROP TRIGGER IF EXISTS agent_sessions_updated_at ON agent_sessions;
CREATE TRIGGER agent_sessions_updated_at
BEFORE UPDATE ON agent_sessions
FOR EACH ROW
EXECUTE FUNCTION trigger_set_updated_at();
DROP TRIGGER IF EXISTS agent_memory_updated_at ON agent_memory;
CREATE TRIGGER agent_memory_updated_at
BEFORE UPDATE ON agent_memory
FOR EACH ROW
EXECUTE FUNCTION trigger_set_updated_at();
-- ============================================================================
-- 6. DSGVO Compliance: Audit Views
-- ============================================================================
-- View for session audit without PII
CREATE OR REPLACE VIEW v_agent_sessions_audit AS
SELECT
id,
agent_type,
-- Hash user_id for privacy
CASE
WHEN user_id IS NOT NULL
THEN encode(digest(user_id::text, 'sha256'), 'hex')
ELSE NULL
END AS user_id_hash,
state,
-- Only expose non-sensitive context keys
jsonb_build_object(
'message_count', COALESCE((context->>'message_count')::int, 0),
'intent_count', COALESCE(jsonb_array_length(context->'intent_history'), 0)
) AS context_summary,
jsonb_array_length(checkpoints) AS checkpoint_count,
created_at,
updated_at,
last_heartbeat,
EXTRACT(EPOCH FROM (updated_at - created_at)) AS session_duration_seconds
FROM agent_sessions;
COMMENT ON VIEW v_agent_sessions_audit IS 'Privacy-safe view of agent sessions for auditing';
-- View for message audit
CREATE OR REPLACE VIEW v_agent_messages_daily_stats AS
SELECT
created_date,
sender,
receiver,
message_type,
COUNT(*) AS message_count,
AVG(priority) AS avg_priority
FROM agent_messages
GROUP BY created_date, sender, receiver, message_type;
COMMENT ON VIEW v_agent_messages_daily_stats IS 'Daily statistics for inter-agent messages';
-- ============================================================================
-- 7. Sample Data for Testing (Optional - Comment out in production)
-- ============================================================================
/*
-- Uncomment to insert sample data for testing
INSERT INTO agent_sessions (agent_type, state, context)
VALUES
('tutor-agent', 'active', '{"subject": "math", "grade": 10}'::jsonb),
('grader-agent', 'active', '{"exam_type": "vorabitur"}'::jsonb);
INSERT INTO agent_memory (namespace, key, value, agent_id, expires_at)
VALUES
('breakpilot', 'test:memory:1', '{"test": true}'::jsonb, 'tutor-agent', NOW() + INTERVAL '30 days');
*/
-- ============================================================================
-- 8. Grants (Adjust based on your user/role setup)
-- ============================================================================
-- Uncomment and adjust for your environment
/*
GRANT SELECT, INSERT, UPDATE ON agent_sessions TO breakpilot_app;
GRANT SELECT, INSERT, UPDATE, DELETE ON agent_memory TO breakpilot_app;
GRANT SELECT, INSERT ON agent_messages TO breakpilot_app;
GRANT EXECUTE ON FUNCTION cleanup_expired_agent_memory() TO breakpilot_app;
GRANT EXECUTE ON FUNCTION cleanup_stale_agent_sessions(INTEGER) TO breakpilot_app;
GRANT EXECUTE ON FUNCTION update_session_heartbeat(UUID) TO breakpilot_app;
*/
-- ============================================================================
-- Migration Complete
-- ============================================================================
-- To verify migration:
-- \dt agent_*
-- \df cleanup_*
-- \dv v_agent_*

View File

@@ -0,0 +1,807 @@
-- ============================================================================
-- Compliance & Audit Framework Migration
-- ============================================================================
-- This migration creates all tables required for the Compliance module.
-- Run with: psql -h localhost -U breakpilot -d breakpilot -f add_compliance_tables.sql
--
-- Tables created:
-- 1. Core Compliance Framework (7 tables)
-- 2. Service Module Registry (3 tables)
-- 3. Audit Sessions & Sign-Off (2 tables)
-- 4. ISO 27001 ISMS Models (11 tables)
-- ============================================================================
-- ============================================================================
-- ENUMS (PostgreSQL ENUM types)
-- ============================================================================
DO $$ BEGIN
CREATE TYPE regulation_type AS ENUM (
'eu_regulation', 'eu_directive', 'de_law', 'bsi_standard', 'industry_standard'
);
EXCEPTION WHEN duplicate_object THEN NULL;
END $$;
DO $$ BEGIN
CREATE TYPE control_type AS ENUM ('preventive', 'detective', 'corrective');
EXCEPTION WHEN duplicate_object THEN NULL;
END $$;
DO $$ BEGIN
CREATE TYPE control_domain AS ENUM (
'gov', 'priv', 'iam', 'crypto', 'sdlc', 'ops', 'ai', 'cra', 'aud'
);
EXCEPTION WHEN duplicate_object THEN NULL;
END $$;
DO $$ BEGIN
CREATE TYPE control_status AS ENUM ('pass', 'partial', 'fail', 'n/a', 'planned');
EXCEPTION WHEN duplicate_object THEN NULL;
END $$;
DO $$ BEGIN
CREATE TYPE risk_level AS ENUM ('low', 'medium', 'high', 'critical');
EXCEPTION WHEN duplicate_object THEN NULL;
END $$;
DO $$ BEGIN
CREATE TYPE evidence_status AS ENUM ('valid', 'expired', 'pending', 'failed');
EXCEPTION WHEN duplicate_object THEN NULL;
END $$;
DO $$ BEGIN
CREATE TYPE export_status AS ENUM ('pending', 'generating', 'completed', 'failed');
EXCEPTION WHEN duplicate_object THEN NULL;
END $$;
DO $$ BEGIN
CREATE TYPE service_type AS ENUM (
'backend', 'database', 'ai', 'communication', 'storage',
'infrastructure', 'monitoring', 'security'
);
EXCEPTION WHEN duplicate_object THEN NULL;
END $$;
DO $$ BEGIN
CREATE TYPE relevance_level AS ENUM ('critical', 'high', 'medium', 'low');
EXCEPTION WHEN duplicate_object THEN NULL;
END $$;
DO $$ BEGIN
CREATE TYPE audit_result AS ENUM (
'compliant', 'compliant_notes', 'non_compliant', 'not_applicable', 'pending'
);
EXCEPTION WHEN duplicate_object THEN NULL;
END $$;
DO $$ BEGIN
CREATE TYPE audit_session_status AS ENUM ('draft', 'in_progress', 'completed', 'archived');
EXCEPTION WHEN duplicate_object THEN NULL;
END $$;
DO $$ BEGIN
CREATE TYPE approval_status AS ENUM ('draft', 'under_review', 'approved', 'superseded');
EXCEPTION WHEN duplicate_object THEN NULL;
END $$;
DO $$ BEGIN
CREATE TYPE finding_type AS ENUM ('major', 'minor', 'ofi', 'positive');
EXCEPTION WHEN duplicate_object THEN NULL;
END $$;
DO $$ BEGIN
CREATE TYPE finding_status AS ENUM (
'open', 'in_progress', 'capa_pending', 'verification_pending', 'verified', 'closed'
);
EXCEPTION WHEN duplicate_object THEN NULL;
END $$;
DO $$ BEGIN
CREATE TYPE capa_type AS ENUM ('corrective', 'preventive', 'both');
EXCEPTION WHEN duplicate_object THEN NULL;
END $$;
-- ============================================================================
-- CORE COMPLIANCE TABLES
-- ============================================================================
-- Table 1: compliance_regulations
CREATE TABLE IF NOT EXISTS compliance_regulations (
id VARCHAR(36) PRIMARY KEY,
code VARCHAR(20) UNIQUE NOT NULL,
name VARCHAR(200) NOT NULL,
full_name TEXT,
regulation_type regulation_type NOT NULL,
source_url VARCHAR(500),
local_pdf_path VARCHAR(500),
effective_date DATE,
description TEXT,
is_active BOOLEAN DEFAULT TRUE,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS ix_regulations_code ON compliance_regulations(code);
-- Table 2: compliance_requirements
CREATE TABLE IF NOT EXISTS compliance_requirements (
id VARCHAR(36) PRIMARY KEY,
regulation_id VARCHAR(36) NOT NULL REFERENCES compliance_regulations(id),
article VARCHAR(50) NOT NULL,
paragraph VARCHAR(20),
requirement_id_external VARCHAR(50),
title VARCHAR(300) NOT NULL,
description TEXT,
requirement_text TEXT,
breakpilot_interpretation TEXT,
implementation_status VARCHAR(30) DEFAULT 'not_started',
implementation_details TEXT,
code_references JSONB,
documentation_links JSONB,
evidence_description TEXT,
evidence_artifacts JSONB,
auditor_notes TEXT,
audit_status VARCHAR(30) DEFAULT 'pending',
last_audit_date TIMESTAMP,
last_auditor VARCHAR(100),
is_applicable BOOLEAN DEFAULT TRUE,
applicability_reason TEXT,
priority INTEGER DEFAULT 2,
source_page INTEGER,
source_section VARCHAR(100),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS ix_requirement_regulation_article ON compliance_requirements(regulation_id, article);
CREATE INDEX IF NOT EXISTS ix_requirement_audit_status ON compliance_requirements(audit_status);
CREATE INDEX IF NOT EXISTS ix_requirement_impl_status ON compliance_requirements(implementation_status);
-- Table 3: compliance_controls
CREATE TABLE IF NOT EXISTS compliance_controls (
id VARCHAR(36) PRIMARY KEY,
control_id VARCHAR(20) UNIQUE NOT NULL,
domain control_domain NOT NULL,
control_type control_type NOT NULL,
title VARCHAR(300) NOT NULL,
description TEXT,
pass_criteria TEXT NOT NULL,
implementation_guidance TEXT,
code_reference VARCHAR(500),
documentation_url VARCHAR(500),
is_automated BOOLEAN DEFAULT FALSE,
automation_tool VARCHAR(100),
automation_config JSONB,
status control_status DEFAULT 'planned',
status_notes TEXT,
owner VARCHAR(100),
review_frequency_days INTEGER DEFAULT 90,
last_reviewed_at TIMESTAMP,
next_review_at TIMESTAMP,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS ix_control_id ON compliance_controls(control_id);
CREATE INDEX IF NOT EXISTS ix_control_domain_status ON compliance_controls(domain, status);
-- Table 4: compliance_control_mappings
CREATE TABLE IF NOT EXISTS compliance_control_mappings (
id VARCHAR(36) PRIMARY KEY,
requirement_id VARCHAR(36) NOT NULL REFERENCES compliance_requirements(id),
control_id VARCHAR(36) NOT NULL REFERENCES compliance_controls(id),
coverage_level VARCHAR(20) DEFAULT 'full',
notes TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE UNIQUE INDEX IF NOT EXISTS ix_mapping_req_ctrl ON compliance_control_mappings(requirement_id, control_id);
-- Table 5: compliance_evidence
CREATE TABLE IF NOT EXISTS compliance_evidence (
id VARCHAR(36) PRIMARY KEY,
control_id VARCHAR(36) NOT NULL REFERENCES compliance_controls(id),
evidence_type VARCHAR(50) NOT NULL,
title VARCHAR(300) NOT NULL,
description TEXT,
artifact_path VARCHAR(500),
artifact_url VARCHAR(500),
artifact_hash VARCHAR(64),
file_size_bytes INTEGER,
mime_type VARCHAR(100),
valid_from TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
valid_until TIMESTAMP,
status evidence_status DEFAULT 'valid',
source VARCHAR(100),
ci_job_id VARCHAR(100),
uploaded_by VARCHAR(100),
collected_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS ix_evidence_control_type ON compliance_evidence(control_id, evidence_type);
CREATE INDEX IF NOT EXISTS ix_evidence_status ON compliance_evidence(status);
-- Table 6: compliance_risks
CREATE TABLE IF NOT EXISTS compliance_risks (
id VARCHAR(36) PRIMARY KEY,
risk_id VARCHAR(20) UNIQUE NOT NULL,
title VARCHAR(300) NOT NULL,
description TEXT,
category VARCHAR(50) NOT NULL,
likelihood INTEGER NOT NULL,
impact INTEGER NOT NULL,
inherent_risk risk_level NOT NULL,
mitigating_controls JSONB,
residual_likelihood INTEGER,
residual_impact INTEGER,
residual_risk risk_level,
owner VARCHAR(100),
status VARCHAR(20) DEFAULT 'open',
treatment_plan TEXT,
identified_date DATE DEFAULT CURRENT_DATE,
review_date DATE,
last_assessed_at TIMESTAMP,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS ix_risk_id ON compliance_risks(risk_id);
CREATE INDEX IF NOT EXISTS ix_risk_category_status ON compliance_risks(category, status);
CREATE INDEX IF NOT EXISTS ix_risk_inherent ON compliance_risks(inherent_risk);
-- Table 7: compliance_audit_exports
CREATE TABLE IF NOT EXISTS compliance_audit_exports (
id VARCHAR(36) PRIMARY KEY,
export_type VARCHAR(50) NOT NULL,
export_name VARCHAR(200),
included_regulations JSONB,
included_domains JSONB,
date_range_start DATE,
date_range_end DATE,
requested_by VARCHAR(100) NOT NULL,
requested_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
completed_at TIMESTAMP,
file_path VARCHAR(500),
file_hash VARCHAR(64),
file_size_bytes INTEGER,
status export_status DEFAULT 'pending',
error_message TEXT,
total_controls INTEGER,
total_evidence INTEGER,
compliance_score FLOAT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- ============================================================================
-- SERVICE MODULE REGISTRY TABLES
-- ============================================================================
-- Table 8: compliance_service_modules
CREATE TABLE IF NOT EXISTS compliance_service_modules (
id VARCHAR(36) PRIMARY KEY,
name VARCHAR(100) UNIQUE NOT NULL,
display_name VARCHAR(200) NOT NULL,
description TEXT,
service_type service_type NOT NULL,
port INTEGER,
technology_stack JSONB,
repository_path VARCHAR(500),
docker_image VARCHAR(200),
data_categories JSONB,
processes_pii BOOLEAN DEFAULT FALSE,
processes_health_data BOOLEAN DEFAULT FALSE,
ai_components BOOLEAN DEFAULT FALSE,
is_active BOOLEAN DEFAULT TRUE,
criticality VARCHAR(20) DEFAULT 'medium',
compliance_score FLOAT,
last_compliance_check TIMESTAMP,
owner_team VARCHAR(100),
owner_contact VARCHAR(200),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS ix_module_name ON compliance_service_modules(name);
CREATE INDEX IF NOT EXISTS ix_module_type_active ON compliance_service_modules(service_type, is_active);
-- Table 9: compliance_module_regulations
CREATE TABLE IF NOT EXISTS compliance_module_regulations (
id VARCHAR(36) PRIMARY KEY,
module_id VARCHAR(36) NOT NULL REFERENCES compliance_service_modules(id),
regulation_id VARCHAR(36) NOT NULL REFERENCES compliance_regulations(id),
relevance_level relevance_level DEFAULT 'medium',
notes TEXT,
applicable_articles JSONB,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE UNIQUE INDEX IF NOT EXISTS ix_module_regulation ON compliance_module_regulations(module_id, regulation_id);
-- Table 10: compliance_module_risks
CREATE TABLE IF NOT EXISTS compliance_module_risks (
id VARCHAR(36) PRIMARY KEY,
module_id VARCHAR(36) NOT NULL REFERENCES compliance_service_modules(id),
risk_id VARCHAR(36) NOT NULL REFERENCES compliance_risks(id),
module_likelihood INTEGER,
module_impact INTEGER,
module_risk_level risk_level,
assessment_notes TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE UNIQUE INDEX IF NOT EXISTS ix_module_risk ON compliance_module_risks(module_id, risk_id);
-- ============================================================================
-- AUDIT SESSION & SIGN-OFF TABLES
-- ============================================================================
-- Table 11: compliance_audit_sessions
CREATE TABLE IF NOT EXISTS compliance_audit_sessions (
id VARCHAR(36) PRIMARY KEY,
name VARCHAR(200) NOT NULL,
description TEXT,
auditor_name VARCHAR(100) NOT NULL,
auditor_email VARCHAR(200),
auditor_organization VARCHAR(200),
status audit_session_status DEFAULT 'draft',
regulation_ids JSONB,
total_items INTEGER DEFAULT 0,
completed_items INTEGER DEFAULT 0,
compliant_count INTEGER DEFAULT 0,
non_compliant_count INTEGER DEFAULT 0,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
started_at TIMESTAMP,
completed_at TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS ix_audit_session_status ON compliance_audit_sessions(status);
CREATE INDEX IF NOT EXISTS ix_audit_session_auditor ON compliance_audit_sessions(auditor_name);
-- Table 12: compliance_audit_signoffs
CREATE TABLE IF NOT EXISTS compliance_audit_signoffs (
id VARCHAR(36) PRIMARY KEY,
session_id VARCHAR(36) NOT NULL REFERENCES compliance_audit_sessions(id),
requirement_id VARCHAR(36) NOT NULL REFERENCES compliance_requirements(id),
result audit_result DEFAULT 'pending',
notes TEXT,
evidence_ids JSONB,
signature_hash VARCHAR(64),
signed_at TIMESTAMP,
signed_by VARCHAR(100),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE UNIQUE INDEX IF NOT EXISTS ix_signoff_session_requirement ON compliance_audit_signoffs(session_id, requirement_id);
CREATE INDEX IF NOT EXISTS ix_signoff_result ON compliance_audit_signoffs(result);
-- ============================================================================
-- ISO 27001 ISMS TABLES
-- ============================================================================
-- Table 13: compliance_isms_scope
CREATE TABLE IF NOT EXISTS compliance_isms_scope (
id VARCHAR(36) PRIMARY KEY,
version VARCHAR(20) NOT NULL DEFAULT '1.0',
scope_statement TEXT NOT NULL,
included_locations JSONB,
included_processes JSONB,
included_services JSONB,
excluded_items JSONB,
exclusion_justification TEXT,
organizational_boundary TEXT,
physical_boundary TEXT,
technical_boundary TEXT,
status approval_status DEFAULT 'draft',
approved_by VARCHAR(100),
approved_at TIMESTAMP,
approval_signature VARCHAR(64),
effective_date DATE,
review_date DATE,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
created_by VARCHAR(100),
updated_by VARCHAR(100)
);
CREATE INDEX IF NOT EXISTS ix_isms_scope_status ON compliance_isms_scope(status);
-- Table 14: compliance_isms_context
CREATE TABLE IF NOT EXISTS compliance_isms_context (
id VARCHAR(36) PRIMARY KEY,
version VARCHAR(20) NOT NULL DEFAULT '1.0',
internal_issues JSONB,
external_issues JSONB,
interested_parties JSONB,
regulatory_requirements JSONB,
contractual_requirements JSONB,
swot_strengths JSONB,
swot_weaknesses JSONB,
swot_opportunities JSONB,
swot_threats JSONB,
status approval_status DEFAULT 'draft',
approved_by VARCHAR(100),
approved_at TIMESTAMP,
last_reviewed_at TIMESTAMP,
next_review_date DATE,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Table 15: compliance_isms_policies
CREATE TABLE IF NOT EXISTS compliance_isms_policies (
id VARCHAR(36) PRIMARY KEY,
policy_id VARCHAR(30) UNIQUE NOT NULL,
title VARCHAR(200) NOT NULL,
policy_type VARCHAR(50) NOT NULL,
description TEXT,
policy_text TEXT NOT NULL,
applies_to JSONB,
version VARCHAR(20) NOT NULL DEFAULT '1.0',
status approval_status DEFAULT 'draft',
authored_by VARCHAR(100),
reviewed_by VARCHAR(100),
approved_by VARCHAR(100),
approved_at TIMESTAMP,
approval_signature VARCHAR(64),
effective_date DATE,
review_frequency_months INTEGER DEFAULT 12,
next_review_date DATE,
parent_policy_id VARCHAR(36) REFERENCES compliance_isms_policies(id),
related_controls JSONB,
document_path VARCHAR(500),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS ix_policy_id ON compliance_isms_policies(policy_id);
CREATE INDEX IF NOT EXISTS ix_policy_type_status ON compliance_isms_policies(policy_type, status);
-- Table 16: compliance_security_objectives
CREATE TABLE IF NOT EXISTS compliance_security_objectives (
id VARCHAR(36) PRIMARY KEY,
objective_id VARCHAR(30) UNIQUE NOT NULL,
title VARCHAR(200) NOT NULL,
description TEXT,
category VARCHAR(50),
specific TEXT,
measurable TEXT,
achievable TEXT,
relevant TEXT,
time_bound TEXT,
kpi_name VARCHAR(100),
kpi_target VARCHAR(100),
kpi_current VARCHAR(100),
kpi_unit VARCHAR(50),
measurement_frequency VARCHAR(50),
owner VARCHAR(100),
accountable VARCHAR(100),
status VARCHAR(30) DEFAULT 'active',
progress_percentage INTEGER DEFAULT 0,
target_date DATE,
achieved_date DATE,
related_controls JSONB,
related_risks JSONB,
approved_by VARCHAR(100),
approved_at TIMESTAMP,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS ix_objective_id ON compliance_security_objectives(objective_id);
CREATE INDEX IF NOT EXISTS ix_objective_status ON compliance_security_objectives(status);
CREATE INDEX IF NOT EXISTS ix_objective_category ON compliance_security_objectives(category);
-- Table 17: compliance_soa (Statement of Applicability)
CREATE TABLE IF NOT EXISTS compliance_soa (
id VARCHAR(36) PRIMARY KEY,
annex_a_control VARCHAR(20) NOT NULL,
annex_a_title VARCHAR(300) NOT NULL,
annex_a_category VARCHAR(100),
is_applicable BOOLEAN NOT NULL,
applicability_justification TEXT NOT NULL,
implementation_status VARCHAR(30) DEFAULT 'planned',
implementation_notes TEXT,
breakpilot_control_ids JSONB,
coverage_level VARCHAR(20) DEFAULT 'full',
evidence_description TEXT,
evidence_ids JSONB,
risk_assessment_notes TEXT,
compensating_controls TEXT,
reviewed_by VARCHAR(100),
reviewed_at TIMESTAMP,
approved_by VARCHAR(100),
approved_at TIMESTAMP,
version VARCHAR(20) DEFAULT '1.0',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE UNIQUE INDEX IF NOT EXISTS ix_soa_annex_control ON compliance_soa(annex_a_control);
CREATE INDEX IF NOT EXISTS ix_soa_applicable ON compliance_soa(is_applicable);
CREATE INDEX IF NOT EXISTS ix_soa_status ON compliance_soa(implementation_status);
-- Table 18: compliance_internal_audits (MUST be before audit_findings due to FK)
CREATE TABLE IF NOT EXISTS compliance_internal_audits (
id VARCHAR(36) PRIMARY KEY,
audit_id VARCHAR(30) UNIQUE NOT NULL,
title VARCHAR(200) NOT NULL,
audit_type VARCHAR(50) NOT NULL,
scope_description TEXT NOT NULL,
iso_chapters_covered JSONB,
annex_a_controls_covered JSONB,
processes_covered JSONB,
departments_covered JSONB,
criteria TEXT,
planned_date DATE NOT NULL,
actual_start_date DATE,
actual_end_date DATE,
lead_auditor VARCHAR(100) NOT NULL,
audit_team JSONB,
auditee_representatives JSONB,
status VARCHAR(30) DEFAULT 'planned',
total_findings INTEGER DEFAULT 0,
major_findings INTEGER DEFAULT 0,
minor_findings INTEGER DEFAULT 0,
ofi_count INTEGER DEFAULT 0,
positive_observations INTEGER DEFAULT 0,
audit_conclusion TEXT,
overall_assessment VARCHAR(30),
report_date DATE,
report_document_path VARCHAR(500),
report_approved_by VARCHAR(100),
report_approved_at TIMESTAMP,
follow_up_audit_required BOOLEAN DEFAULT FALSE,
follow_up_audit_id VARCHAR(36),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS ix_internal_audit_id ON compliance_internal_audits(audit_id);
CREATE INDEX IF NOT EXISTS ix_internal_audit_date ON compliance_internal_audits(planned_date);
CREATE INDEX IF NOT EXISTS ix_internal_audit_status ON compliance_internal_audits(status);
-- Table 19: compliance_audit_findings
CREATE TABLE IF NOT EXISTS compliance_audit_findings (
id VARCHAR(36) PRIMARY KEY,
finding_id VARCHAR(30) UNIQUE NOT NULL,
audit_session_id VARCHAR(36) REFERENCES compliance_audit_sessions(id),
internal_audit_id VARCHAR(36) REFERENCES compliance_internal_audits(id),
finding_type finding_type NOT NULL,
iso_chapter VARCHAR(20),
annex_a_control VARCHAR(20),
title VARCHAR(300) NOT NULL,
description TEXT NOT NULL,
objective_evidence TEXT NOT NULL,
root_cause TEXT,
root_cause_method VARCHAR(50),
impact_description TEXT,
affected_processes JSONB,
affected_assets JSONB,
status finding_status DEFAULT 'open',
owner VARCHAR(100),
auditor VARCHAR(100),
identified_date DATE NOT NULL DEFAULT CURRENT_DATE,
due_date DATE,
closed_date DATE,
verification_method TEXT,
verified_by VARCHAR(100),
verified_at TIMESTAMP,
verification_evidence TEXT,
closure_notes TEXT,
closed_by VARCHAR(100),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS ix_finding_id ON compliance_audit_findings(finding_id);
CREATE INDEX IF NOT EXISTS ix_finding_type_status ON compliance_audit_findings(finding_type, status);
CREATE INDEX IF NOT EXISTS ix_finding_due_date ON compliance_audit_findings(due_date);
-- Table 20: compliance_corrective_actions
CREATE TABLE IF NOT EXISTS compliance_corrective_actions (
id VARCHAR(36) PRIMARY KEY,
capa_id VARCHAR(30) UNIQUE NOT NULL,
finding_id VARCHAR(36) NOT NULL REFERENCES compliance_audit_findings(id),
capa_type capa_type NOT NULL,
title VARCHAR(300) NOT NULL,
description TEXT NOT NULL,
expected_outcome TEXT,
assigned_to VARCHAR(100) NOT NULL,
approved_by VARCHAR(100),
planned_start DATE,
planned_completion DATE NOT NULL,
actual_completion DATE,
status VARCHAR(30) DEFAULT 'planned',
progress_percentage INTEGER DEFAULT 0,
estimated_effort_hours INTEGER,
actual_effort_hours INTEGER,
resources_required TEXT,
implementation_evidence TEXT,
evidence_ids JSONB,
effectiveness_criteria TEXT,
effectiveness_verified BOOLEAN DEFAULT FALSE,
effectiveness_verification_date DATE,
effectiveness_notes TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS ix_capa_id ON compliance_corrective_actions(capa_id);
CREATE INDEX IF NOT EXISTS ix_capa_status ON compliance_corrective_actions(status);
CREATE INDEX IF NOT EXISTS ix_capa_due ON compliance_corrective_actions(planned_completion);
-- Table 21: compliance_management_reviews
CREATE TABLE IF NOT EXISTS compliance_management_reviews (
id VARCHAR(36) PRIMARY KEY,
review_id VARCHAR(30) UNIQUE NOT NULL,
title VARCHAR(200) NOT NULL,
review_date DATE NOT NULL,
review_period_start DATE,
review_period_end DATE,
chairperson VARCHAR(100) NOT NULL,
attendees JSONB,
input_previous_actions TEXT,
input_isms_changes TEXT,
input_security_performance TEXT,
input_interested_party_feedback TEXT,
input_risk_assessment_results TEXT,
input_improvement_opportunities TEXT,
input_policy_effectiveness TEXT,
input_objective_achievement TEXT,
input_resource_adequacy TEXT,
output_improvement_decisions TEXT,
output_isms_changes TEXT,
output_resource_needs TEXT,
action_items JSONB,
isms_effectiveness_rating VARCHAR(20),
key_decisions TEXT,
status VARCHAR(30) DEFAULT 'draft',
approved_by VARCHAR(100),
approved_at TIMESTAMP,
minutes_document_path VARCHAR(500),
next_review_date DATE,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS ix_mgmt_review_id ON compliance_management_reviews(review_id);
CREATE INDEX IF NOT EXISTS ix_mgmt_review_date ON compliance_management_reviews(review_date);
CREATE INDEX IF NOT EXISTS ix_mgmt_review_status ON compliance_management_reviews(status);
-- Table 22: compliance_audit_trail
CREATE TABLE IF NOT EXISTS compliance_audit_trail (
id VARCHAR(36) PRIMARY KEY,
entity_type VARCHAR(50) NOT NULL,
entity_id VARCHAR(36) NOT NULL,
entity_name VARCHAR(200),
action VARCHAR(20) NOT NULL,
field_changed VARCHAR(100),
old_value TEXT,
new_value TEXT,
change_summary TEXT,
performed_by VARCHAR(100) NOT NULL,
performed_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
ip_address VARCHAR(45),
user_agent VARCHAR(500),
session_id VARCHAR(100),
checksum VARCHAR(64),
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS ix_audit_trail_entity ON compliance_audit_trail(entity_type, entity_id);
CREATE INDEX IF NOT EXISTS ix_audit_trail_time ON compliance_audit_trail(performed_at);
CREATE INDEX IF NOT EXISTS ix_audit_trail_user ON compliance_audit_trail(performed_by);
-- Table 23: compliance_isms_readiness
CREATE TABLE IF NOT EXISTS compliance_isms_readiness (
id VARCHAR(36) PRIMARY KEY,
check_date TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
triggered_by VARCHAR(100),
overall_status VARCHAR(20) NOT NULL,
certification_possible BOOLEAN NOT NULL,
chapter_4_status VARCHAR(20),
chapter_5_status VARCHAR(20),
chapter_6_status VARCHAR(20),
chapter_7_status VARCHAR(20),
chapter_8_status VARCHAR(20),
chapter_9_status VARCHAR(20),
chapter_10_status VARCHAR(20),
potential_majors JSONB,
potential_minors JSONB,
improvement_opportunities JSONB,
readiness_score FLOAT,
documentation_score FLOAT,
implementation_score FLOAT,
evidence_score FLOAT,
priority_actions JSONB,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS ix_readiness_date ON compliance_isms_readiness(check_date);
CREATE INDEX IF NOT EXISTS ix_readiness_status ON compliance_isms_readiness(overall_status);
-- ============================================================================
-- UPDATE TIMESTAMPS TRIGGER
-- ============================================================================
CREATE OR REPLACE FUNCTION update_compliance_timestamp()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = CURRENT_TIMESTAMP;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Apply trigger to all compliance tables with updated_at
DO $$
DECLARE
t TEXT;
BEGIN
FOR t IN
SELECT table_name
FROM information_schema.columns
WHERE table_schema = 'public'
AND column_name = 'updated_at'
AND table_name LIKE 'compliance_%'
LOOP
EXECUTE format('
DROP TRIGGER IF EXISTS trigger_%I_updated_at ON %I;
CREATE TRIGGER trigger_%I_updated_at
BEFORE UPDATE ON %I
FOR EACH ROW EXECUTE FUNCTION update_compliance_timestamp();
', t, t, t, t);
END LOOP;
END $$;
-- ============================================================================
-- CLEANUP FUNCTIONS
-- ============================================================================
-- Function to cleanup expired evidence
CREATE OR REPLACE FUNCTION cleanup_expired_compliance_evidence()
RETURNS INTEGER AS $$
DECLARE
deleted_count INTEGER;
BEGIN
UPDATE compliance_evidence
SET status = 'expired'
WHERE valid_until < CURRENT_TIMESTAMP
AND status = 'valid';
GET DIAGNOSTICS deleted_count = ROW_COUNT;
RETURN deleted_count;
END;
$$ LANGUAGE plpgsql;
-- ============================================================================
-- GRANT PERMISSIONS
-- ============================================================================
-- Grant permissions to the application user (adjust username as needed)
-- GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO breakpilot;
-- GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public TO breakpilot;
-- ============================================================================
-- MIGRATION COMPLETE
-- ============================================================================
SELECT 'Compliance migration completed. ' ||
(SELECT COUNT(*) FROM information_schema.tables
WHERE table_schema = 'public' AND table_name LIKE 'compliance_%') ||
' tables created.' AS status;

View File

@@ -0,0 +1,241 @@
-- ==============================================
-- Breakpilot Drive - Game Tables Migration
-- ==============================================
-- Run this migration to add game-related tables to PostgreSQL.
--
-- Execute with:
-- psql -h localhost -U breakpilot -d breakpilot -f add_game_tables.sql
-- Enable UUID extension if not already enabled
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
-- ==============================================
-- Student Learning State
-- ==============================================
-- Tracks the learning progress of each student across subjects.
-- This is the core table for adaptive difficulty.
CREATE TABLE IF NOT EXISTS student_learning_state (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
student_id UUID NOT NULL,
overall_level INTEGER DEFAULT 3 CHECK (overall_level >= 1 AND overall_level <= 5),
math_level DECIMAL(3,2) DEFAULT 3.0 CHECK (math_level >= 1.0 AND math_level <= 5.0),
german_level DECIMAL(3,2) DEFAULT 3.0 CHECK (german_level >= 1.0 AND german_level <= 5.0),
english_level DECIMAL(3,2) DEFAULT 3.0 CHECK (english_level >= 1.0 AND english_level <= 5.0),
total_play_time_minutes INTEGER DEFAULT 0,
total_sessions INTEGER DEFAULT 0,
questions_answered INTEGER DEFAULT 0,
questions_correct INTEGER DEFAULT 0,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),
CONSTRAINT unique_student_learning UNIQUE(student_id)
);
-- Index for fast lookups by student_id
CREATE INDEX IF NOT EXISTS idx_learning_state_student ON student_learning_state(student_id);
-- Comment on table
COMMENT ON TABLE student_learning_state IS 'Tracks learning progress for Breakpilot Drive game';
COMMENT ON COLUMN student_learning_state.overall_level IS 'Overall difficulty level 1-5 (1=Beginner/Grade 2-3, 5=Expert/Grade 6+)';
COMMENT ON COLUMN student_learning_state.math_level IS 'Math subject proficiency level';
COMMENT ON COLUMN student_learning_state.german_level IS 'German subject proficiency level';
COMMENT ON COLUMN student_learning_state.english_level IS 'English subject proficiency level';
-- ==============================================
-- Game Sessions
-- ==============================================
-- Records each game session played by a student.
CREATE TABLE IF NOT EXISTS game_sessions (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
student_id UUID NOT NULL,
game_mode VARCHAR(20) NOT NULL CHECK (game_mode IN ('video', 'audio')),
duration_seconds INTEGER NOT NULL CHECK (duration_seconds >= 0),
distance_traveled DECIMAL(10,2),
score INTEGER NOT NULL DEFAULT 0,
questions_answered INTEGER DEFAULT 0,
questions_correct INTEGER DEFAULT 0,
difficulty_level INTEGER NOT NULL CHECK (difficulty_level >= 1 AND difficulty_level <= 5),
started_at TIMESTAMPTZ NOT NULL,
ended_at TIMESTAMPTZ DEFAULT NOW(),
metadata JSONB,
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- Indexes for common queries
CREATE INDEX IF NOT EXISTS idx_game_sessions_student ON game_sessions(student_id);
CREATE INDEX IF NOT EXISTS idx_game_sessions_date ON game_sessions(ended_at);
CREATE INDEX IF NOT EXISTS idx_game_sessions_score ON game_sessions(score DESC);
-- Comment on table
COMMENT ON TABLE game_sessions IS 'Records individual game sessions for Breakpilot Drive';
COMMENT ON COLUMN game_sessions.game_mode IS 'Game mode: video (visual) or audio (voice-guided)';
COMMENT ON COLUMN game_sessions.distance_traveled IS 'Distance traveled in game units';
COMMENT ON COLUMN game_sessions.metadata IS 'Additional session data in JSON format';
-- ==============================================
-- Game Quiz Answers
-- ==============================================
-- Tracks individual quiz answers for detailed analytics.
CREATE TABLE IF NOT EXISTS game_quiz_answers (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
session_id UUID REFERENCES game_sessions(id) ON DELETE CASCADE,
question_id VARCHAR(100) NOT NULL,
subject VARCHAR(50) NOT NULL CHECK (subject IN ('math', 'german', 'english', 'general')),
difficulty INTEGER NOT NULL CHECK (difficulty >= 1 AND difficulty <= 5),
is_correct BOOLEAN NOT NULL,
answer_time_ms INTEGER CHECK (answer_time_ms >= 0),
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- Indexes for analytics queries
CREATE INDEX IF NOT EXISTS idx_quiz_answers_session ON game_quiz_answers(session_id);
CREATE INDEX IF NOT EXISTS idx_quiz_answers_subject ON game_quiz_answers(subject);
CREATE INDEX IF NOT EXISTS idx_quiz_answers_correct ON game_quiz_answers(is_correct);
-- Comment on table
COMMENT ON TABLE game_quiz_answers IS 'Individual quiz answer records for learning analytics';
-- ==============================================
-- Trigger: Update updated_at timestamp
-- ==============================================
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ language 'plpgsql';
-- Apply trigger to student_learning_state
DROP TRIGGER IF EXISTS update_student_learning_state_updated_at ON student_learning_state;
CREATE TRIGGER update_student_learning_state_updated_at
BEFORE UPDATE ON student_learning_state
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
-- ==============================================
-- View: Student Summary Statistics
-- ==============================================
CREATE OR REPLACE VIEW game_student_summary AS
SELECT
sls.student_id,
sls.overall_level,
sls.math_level,
sls.german_level,
sls.english_level,
sls.total_play_time_minutes,
sls.total_sessions,
sls.questions_answered,
sls.questions_correct,
CASE WHEN sls.questions_answered > 0
THEN ROUND((sls.questions_correct::DECIMAL / sls.questions_answered) * 100, 1)
ELSE 0 END as accuracy_percent,
COALESCE(recent.recent_score, 0) as recent_score,
COALESCE(recent.recent_sessions, 0) as sessions_last_7_days
FROM student_learning_state sls
LEFT JOIN (
SELECT
student_id,
SUM(score) as recent_score,
COUNT(*) as recent_sessions
FROM game_sessions
WHERE ended_at > NOW() - INTERVAL '7 days'
GROUP BY student_id
) recent ON sls.student_id = recent.student_id;
COMMENT ON VIEW game_student_summary IS 'Summary statistics for each student including recent activity';
-- ==============================================
-- View: Daily Leaderboard
-- ==============================================
CREATE OR REPLACE VIEW game_daily_leaderboard AS
SELECT
student_id,
SUM(score) as total_score,
COUNT(*) as session_count,
SUM(questions_correct) as total_correct,
SUM(questions_answered) as total_questions,
RANK() OVER (ORDER BY SUM(score) DESC) as rank
FROM game_sessions
WHERE ended_at > NOW() - INTERVAL '1 day'
GROUP BY student_id
ORDER BY total_score DESC;
COMMENT ON VIEW game_daily_leaderboard IS 'Daily leaderboard for Breakpilot Drive';
-- ==============================================
-- Function: Calculate Level Adjustment
-- ==============================================
-- Returns the recommended level adjustment based on recent performance.
-- Returns: -1 (decrease), 0 (keep), 1 (increase)
CREATE OR REPLACE FUNCTION calculate_level_adjustment(p_student_id UUID)
RETURNS INTEGER AS $$
DECLARE
v_recent_accuracy DECIMAL;
v_recent_questions INTEGER;
BEGIN
-- Get accuracy from last 10 questions
SELECT
CASE WHEN COUNT(*) > 0
THEN SUM(CASE WHEN is_correct THEN 1 ELSE 0 END)::DECIMAL / COUNT(*)
ELSE 0 END,
COUNT(*)
INTO v_recent_accuracy, v_recent_questions
FROM (
SELECT is_correct
FROM game_quiz_answers qa
JOIN game_sessions gs ON qa.session_id = gs.id
WHERE gs.student_id = p_student_id
ORDER BY qa.created_at DESC
LIMIT 10
) recent;
-- Need at least 5 questions for adjustment
IF v_recent_questions < 5 THEN
RETURN 0;
END IF;
-- High accuracy (>=80%) -> increase level
IF v_recent_accuracy >= 0.8 THEN
RETURN 1;
END IF;
-- Low accuracy (<40%) -> decrease level
IF v_recent_accuracy < 0.4 THEN
RETURN -1;
END IF;
-- Keep current level
RETURN 0;
END;
$$ LANGUAGE plpgsql;
COMMENT ON FUNCTION calculate_level_adjustment IS 'Calculates recommended difficulty adjustment based on recent performance';
-- ==============================================
-- Grant Permissions (adjust user as needed)
-- ==============================================
-- GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO breakpilot;
-- GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public TO breakpilot;
-- GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA public TO breakpilot;
-- ==============================================
-- Migration Complete Message
-- ==============================================
DO $$
BEGIN
RAISE NOTICE 'Breakpilot Drive game tables created successfully!';
RAISE NOTICE 'Tables: student_learning_state, game_sessions, game_quiz_answers';
RAISE NOTICE 'Views: game_student_summary, game_daily_leaderboard';
END $$;

View File

@@ -0,0 +1,409 @@
-- ==============================================
-- Jitsi Recordings & Transcription Tables Migration
-- ==============================================
-- Run this migration to add recording and transcription tables.
--
-- Execute with:
-- psql -h localhost -U breakpilot -d breakpilot_db -f add_recording_transcription_tables.sql
-- Enable UUID extension if not already enabled
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
-- ==============================================
-- Meeting Recording Consents (DSGVO)
-- ==============================================
-- Tracks consent for meeting recordings.
-- All participants must consent before recording starts.
CREATE TABLE IF NOT EXISTS meeting_recording_consents (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
meeting_id VARCHAR(255) NOT NULL,
user_id UUID,
consent_type VARCHAR(50) NOT NULL CHECK (consent_type IN ('opt_in', 'announced', 'implicit')),
all_participants_consented BOOLEAN DEFAULT FALSE,
participant_count INTEGER DEFAULT 0,
consented_count INTEGER DEFAULT 0,
consented_at TIMESTAMPTZ,
withdrawn_at TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- Indexes
CREATE INDEX IF NOT EXISTS idx_recording_consents_meeting ON meeting_recording_consents(meeting_id);
CREATE INDEX IF NOT EXISTS idx_recording_consents_user ON meeting_recording_consents(user_id);
-- Comments
COMMENT ON TABLE meeting_recording_consents IS 'DSGVO-compliant consent tracking for meeting recordings';
COMMENT ON COLUMN meeting_recording_consents.consent_type IS 'Type: opt_in (explicit), announced (verbally announced), implicit (policy-based)';
COMMENT ON COLUMN meeting_recording_consents.withdrawn_at IS 'Set when consent is withdrawn (soft delete)';
-- ==============================================
-- Recordings
-- ==============================================
-- Stores metadata for recorded meetings.
CREATE TABLE IF NOT EXISTS recordings (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
meeting_id VARCHAR(255) NOT NULL,
jibri_session_id VARCHAR(255),
title VARCHAR(500),
storage_path VARCHAR(1000) NOT NULL,
audio_path VARCHAR(1000),
file_size_bytes BIGINT,
duration_seconds INTEGER,
participant_count INTEGER DEFAULT 0,
status VARCHAR(50) NOT NULL DEFAULT 'uploaded' CHECK (status IN ('uploaded', 'processing', 'ready', 'failed', 'deleted')),
created_by UUID,
recorded_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
retention_days INTEGER DEFAULT 365,
deleted_at TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- Indexes
CREATE INDEX IF NOT EXISTS idx_recordings_meeting ON recordings(meeting_id);
CREATE INDEX IF NOT EXISTS idx_recordings_status ON recordings(status);
CREATE INDEX IF NOT EXISTS idx_recordings_created_by ON recordings(created_by);
CREATE INDEX IF NOT EXISTS idx_recordings_recorded_at ON recordings(recorded_at);
-- Comments
COMMENT ON TABLE recordings IS 'Jitsi meeting recordings stored in MinIO';
COMMENT ON COLUMN recordings.storage_path IS 'Path in MinIO bucket: recordings/{recording_name}/video.mp4';
COMMENT ON COLUMN recordings.audio_path IS 'Extracted audio for transcription: recordings/{recording_name}/audio.wav';
COMMENT ON COLUMN recordings.retention_days IS 'Days until automatic deletion (DSGVO compliance)';
COMMENT ON COLUMN recordings.deleted_at IS 'Soft delete timestamp for DSGVO audit trail';
-- ==============================================
-- Transcriptions
-- ==============================================
-- Stores transcription metadata and full text.
CREATE TABLE IF NOT EXISTS transcriptions (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
recording_id UUID NOT NULL REFERENCES recordings(id) ON DELETE CASCADE,
language VARCHAR(10) NOT NULL DEFAULT 'de',
model VARCHAR(100) NOT NULL DEFAULT 'large-v3',
status VARCHAR(50) NOT NULL DEFAULT 'pending' CHECK (status IN ('pending', 'queued', 'processing', 'completed', 'failed')),
full_text TEXT,
word_count INTEGER DEFAULT 0,
confidence_score FLOAT,
vtt_path VARCHAR(1000),
srt_path VARCHAR(1000),
json_path VARCHAR(1000),
error_message TEXT,
processing_started_at TIMESTAMPTZ,
processing_completed_at TIMESTAMPTZ,
processing_duration_seconds INTEGER,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- Indexes
CREATE INDEX IF NOT EXISTS idx_transcriptions_recording ON transcriptions(recording_id);
CREATE INDEX IF NOT EXISTS idx_transcriptions_status ON transcriptions(status);
CREATE INDEX IF NOT EXISTS idx_transcriptions_language ON transcriptions(language);
-- Full-text search index for transcription content
CREATE INDEX IF NOT EXISTS idx_transcriptions_fulltext ON transcriptions USING gin(to_tsvector('german', COALESCE(full_text, '')));
-- Comments
COMMENT ON TABLE transcriptions IS 'Whisper transcriptions with speaker diarization';
COMMENT ON COLUMN transcriptions.model IS 'Whisper model used: tiny, base, small, medium, large-v3';
COMMENT ON COLUMN transcriptions.vtt_path IS 'WebVTT subtitle file path in MinIO';
COMMENT ON COLUMN transcriptions.srt_path IS 'SRT subtitle file path in MinIO';
COMMENT ON COLUMN transcriptions.json_path IS 'Full JSON with segments and speakers in MinIO';
-- ==============================================
-- Transcription Segments
-- ==============================================
-- Individual speech segments with speaker identification.
CREATE TABLE IF NOT EXISTS transcription_segments (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
transcription_id UUID NOT NULL REFERENCES transcriptions(id) ON DELETE CASCADE,
segment_index INTEGER NOT NULL,
start_time_ms INTEGER NOT NULL,
end_time_ms INTEGER NOT NULL,
text TEXT NOT NULL,
speaker_id VARCHAR(50),
speaker_name VARCHAR(255),
confidence FLOAT,
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- Indexes
CREATE INDEX IF NOT EXISTS idx_segments_transcription ON transcription_segments(transcription_id);
CREATE INDEX IF NOT EXISTS idx_segments_speaker ON transcription_segments(speaker_id);
CREATE INDEX IF NOT EXISTS idx_segments_time ON transcription_segments(start_time_ms, end_time_ms);
-- Full-text search on segments
CREATE INDEX IF NOT EXISTS idx_segments_fulltext ON transcription_segments USING gin(to_tsvector('german', text));
-- Comments
COMMENT ON TABLE transcription_segments IS 'Individual speech segments with timestamps and speaker IDs';
COMMENT ON COLUMN transcription_segments.speaker_id IS 'pyannote speaker ID: SPEAKER_00, SPEAKER_01, etc.';
COMMENT ON COLUMN transcription_segments.speaker_name IS 'Optionally mapped to actual participant name';
-- ==============================================
-- Recording Audit Log (DSGVO)
-- ==============================================
-- Tracks all access and modifications for compliance.
CREATE TABLE IF NOT EXISTS recording_audit_log (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
recording_id UUID,
transcription_id UUID,
user_id UUID,
action VARCHAR(100) NOT NULL CHECK (action IN (
'created', 'viewed', 'downloaded', 'shared',
'transcription_started', 'transcription_completed',
'deleted', 'retention_expired', 'consent_withdrawn'
)),
ip_address INET,
user_agent TEXT,
metadata JSONB,
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- Indexes
CREATE INDEX IF NOT EXISTS idx_audit_recording ON recording_audit_log(recording_id);
CREATE INDEX IF NOT EXISTS idx_audit_transcription ON recording_audit_log(transcription_id);
CREATE INDEX IF NOT EXISTS idx_audit_user ON recording_audit_log(user_id);
CREATE INDEX IF NOT EXISTS idx_audit_action ON recording_audit_log(action);
CREATE INDEX IF NOT EXISTS idx_audit_created ON recording_audit_log(created_at);
-- Comments
COMMENT ON TABLE recording_audit_log IS 'DSGVO audit trail for all recording access';
COMMENT ON COLUMN recording_audit_log.action IS 'Type of action performed';
COMMENT ON COLUMN recording_audit_log.metadata IS 'Additional context (e.g., reason for deletion)';
-- ==============================================
-- Transcription Queue (RQ Job Tracking)
-- ==============================================
-- Tracks pending and completed transcription jobs.
CREATE TABLE IF NOT EXISTS transcription_queue (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
transcription_id UUID NOT NULL REFERENCES transcriptions(id) ON DELETE CASCADE,
job_id VARCHAR(255),
priority INTEGER DEFAULT 0,
status VARCHAR(50) NOT NULL DEFAULT 'pending' CHECK (status IN ('pending', 'queued', 'processing', 'completed', 'failed', 'cancelled')),
worker_id VARCHAR(255),
attempts INTEGER DEFAULT 0,
max_attempts INTEGER DEFAULT 3,
error_message TEXT,
queued_at TIMESTAMPTZ,
started_at TIMESTAMPTZ,
completed_at TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- Indexes
CREATE INDEX IF NOT EXISTS idx_queue_status ON transcription_queue(status);
CREATE INDEX IF NOT EXISTS idx_queue_priority ON transcription_queue(priority DESC, created_at ASC);
CREATE INDEX IF NOT EXISTS idx_queue_job ON transcription_queue(job_id);
-- Comments
COMMENT ON TABLE transcription_queue IS 'RQ job queue tracking for transcription workers';
-- ==============================================
-- Trigger: Update updated_at timestamp
-- ==============================================
CREATE OR REPLACE FUNCTION update_recording_updated_at()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE 'plpgsql';
-- Apply triggers
DROP TRIGGER IF EXISTS update_recordings_updated_at ON recordings;
CREATE TRIGGER update_recordings_updated_at
BEFORE UPDATE ON recordings
FOR EACH ROW
EXECUTE FUNCTION update_recording_updated_at();
DROP TRIGGER IF EXISTS update_transcriptions_updated_at ON transcriptions;
CREATE TRIGGER update_transcriptions_updated_at
BEFORE UPDATE ON transcriptions
FOR EACH ROW
EXECUTE FUNCTION update_recording_updated_at();
DROP TRIGGER IF EXISTS update_consents_updated_at ON meeting_recording_consents;
CREATE TRIGGER update_consents_updated_at
BEFORE UPDATE ON meeting_recording_consents
FOR EACH ROW
EXECUTE FUNCTION update_recording_updated_at();
DROP TRIGGER IF EXISTS update_queue_updated_at ON transcription_queue;
CREATE TRIGGER update_queue_updated_at
BEFORE UPDATE ON transcription_queue
FOR EACH ROW
EXECUTE FUNCTION update_recording_updated_at();
-- ==============================================
-- View: Recording Overview
-- ==============================================
CREATE OR REPLACE VIEW recording_overview AS
SELECT
r.id,
r.meeting_id,
r.title,
r.status as recording_status,
r.duration_seconds,
r.participant_count,
r.recorded_at,
r.retention_days,
r.recorded_at + (r.retention_days || ' days')::INTERVAL as retention_expires_at,
t.id as transcription_id,
t.status as transcription_status,
t.language,
t.word_count,
t.confidence_score,
c.all_participants_consented,
c.consent_type
FROM recordings r
LEFT JOIN transcriptions t ON t.recording_id = r.id
LEFT JOIN meeting_recording_consents c ON c.meeting_id = r.meeting_id
WHERE r.deleted_at IS NULL;
COMMENT ON VIEW recording_overview IS 'Combined view of recordings with transcription and consent status';
-- ==============================================
-- View: Pending Transcriptions
-- ==============================================
CREATE OR REPLACE VIEW pending_transcriptions AS
SELECT
t.id,
t.recording_id,
r.storage_path,
r.audio_path,
t.language,
t.model,
q.priority,
q.attempts,
q.max_attempts,
q.created_at as queued_at
FROM transcriptions t
JOIN recordings r ON r.id = t.recording_id
LEFT JOIN transcription_queue q ON q.transcription_id = t.id
WHERE t.status IN ('pending', 'queued')
AND r.status = 'uploaded'
AND r.deleted_at IS NULL
ORDER BY q.priority DESC, q.created_at ASC;
COMMENT ON VIEW pending_transcriptions IS 'Queue of transcriptions waiting to be processed';
-- ==============================================
-- Function: Search Transcripts
-- ==============================================
CREATE OR REPLACE FUNCTION search_transcripts(
p_query TEXT,
p_language VARCHAR(10) DEFAULT 'de',
p_limit INTEGER DEFAULT 20
)
RETURNS TABLE (
transcription_id UUID,
recording_id UUID,
meeting_id VARCHAR(255),
title VARCHAR(500),
segment_id UUID,
segment_text TEXT,
start_time_ms INTEGER,
end_time_ms INTEGER,
speaker_id VARCHAR(50),
relevance FLOAT
) AS $$
BEGIN
RETURN QUERY
SELECT
t.id as transcription_id,
r.id as recording_id,
r.meeting_id,
r.title,
s.id as segment_id,
s.text as segment_text,
s.start_time_ms,
s.end_time_ms,
s.speaker_id,
ts_rank(to_tsvector('german', s.text), plainto_tsquery('german', p_query))::FLOAT as relevance
FROM transcription_segments s
JOIN transcriptions t ON t.id = s.transcription_id
JOIN recordings r ON r.id = t.recording_id
WHERE t.language = p_language
AND t.status = 'completed'
AND r.deleted_at IS NULL
AND to_tsvector('german', s.text) @@ plainto_tsquery('german', p_query)
ORDER BY relevance DESC
LIMIT p_limit;
END;
$$ LANGUAGE plpgsql;
COMMENT ON FUNCTION search_transcripts IS 'Full-text search across all transcription segments';
-- ==============================================
-- Function: Cleanup Expired Recordings
-- ==============================================
-- Call periodically to soft-delete recordings past retention.
CREATE OR REPLACE FUNCTION cleanup_expired_recordings()
RETURNS INTEGER AS $$
DECLARE
v_count INTEGER;
BEGIN
WITH expired AS (
UPDATE recordings
SET status = 'deleted',
deleted_at = NOW()
WHERE deleted_at IS NULL
AND status != 'deleted'
AND recorded_at + (retention_days || ' days')::INTERVAL < NOW()
RETURNING id
)
SELECT COUNT(*) INTO v_count FROM expired;
-- Log the cleanup action
IF v_count > 0 THEN
INSERT INTO recording_audit_log (action, metadata)
VALUES ('retention_expired', jsonb_build_object('count', v_count, 'timestamp', NOW()));
END IF;
RETURN v_count;
END;
$$ LANGUAGE plpgsql;
COMMENT ON FUNCTION cleanup_expired_recordings IS 'Soft-deletes recordings past their retention period';
-- ==============================================
-- Grant Permissions
-- ==============================================
-- GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO breakpilot;
-- GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public TO breakpilot;
-- GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA public TO breakpilot;
-- ==============================================
-- Migration Complete Message
-- ==============================================
DO $$
BEGIN
RAISE NOTICE 'Recording & Transcription tables created successfully!';
RAISE NOTICE 'Tables: meeting_recording_consents, recordings, transcriptions, transcription_segments, recording_audit_log, transcription_queue';
RAISE NOTICE 'Views: recording_overview, pending_transcriptions';
RAISE NOTICE 'Functions: search_transcripts, cleanup_expired_recordings';
END $$;

View File

@@ -0,0 +1,410 @@
-- ==============================================
-- Breakpilot Drive - Educational Unit Tables Migration
-- ==============================================
-- Adds tables for the contextual learning unit system.
-- Supports FlightPath and StationLoop templates.
--
-- Execute with:
-- psql -h localhost -U breakpilot -d breakpilot -f add_unit_tables.sql
-- Enable UUID extension if not already enabled
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
-- ==============================================
-- Unit Definitions
-- ==============================================
-- Stores the configuration for each learning unit.
-- JSON definition contains stops, interactions, vocab, etc.
CREATE TABLE IF NOT EXISTS unit_definitions (
unit_id VARCHAR(100) PRIMARY KEY,
template VARCHAR(50) NOT NULL CHECK (template IN ('flight_path', 'station_loop')),
version VARCHAR(20) NOT NULL,
locale VARCHAR(10)[] DEFAULT ARRAY['de-DE'],
grade_band VARCHAR(10)[] DEFAULT ARRAY['5', '6'],
duration_minutes INTEGER NOT NULL CHECK (duration_minutes >= 3 AND duration_minutes <= 20),
difficulty VARCHAR(20) DEFAULT 'base' CHECK (difficulty IN ('base', 'advanced')),
definition JSONB NOT NULL,
is_published BOOLEAN DEFAULT false,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- Index for filtering
CREATE INDEX IF NOT EXISTS idx_unit_definitions_template ON unit_definitions(template);
CREATE INDEX IF NOT EXISTS idx_unit_definitions_published ON unit_definitions(is_published);
CREATE INDEX IF NOT EXISTS idx_unit_definitions_locale ON unit_definitions USING GIN(locale);
CREATE INDEX IF NOT EXISTS idx_unit_definitions_grade ON unit_definitions USING GIN(grade_band);
-- Comments
COMMENT ON TABLE unit_definitions IS 'Stores unit configurations for contextual learning experiences';
COMMENT ON COLUMN unit_definitions.template IS 'Unit template type: flight_path (linear) or station_loop (hub-based)';
COMMENT ON COLUMN unit_definitions.definition IS 'Complete JSON definition including stops, interactions, vocab, etc.';
COMMENT ON COLUMN unit_definitions.grade_band IS 'Target grade levels (e.g., ["5", "6", "7"])';
-- ==============================================
-- Unit Sessions
-- ==============================================
-- Records each unit session played by a student.
CREATE TABLE IF NOT EXISTS unit_sessions (
session_id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
unit_id VARCHAR(100) NOT NULL REFERENCES unit_definitions(unit_id),
student_id UUID NOT NULL,
locale VARCHAR(10) DEFAULT 'de-DE',
difficulty VARCHAR(20) DEFAULT 'base',
started_at TIMESTAMPTZ DEFAULT NOW(),
completed_at TIMESTAMPTZ,
aborted_at TIMESTAMPTZ,
duration_seconds INTEGER,
completion_rate DECIMAL(3,2) CHECK (completion_rate >= 0 AND completion_rate <= 1),
precheck_score DECIMAL(3,2) CHECK (precheck_score >= 0 AND precheck_score <= 1),
postcheck_score DECIMAL(3,2) CHECK (postcheck_score >= 0 AND postcheck_score <= 1),
stops_completed INTEGER DEFAULT 0,
total_stops INTEGER DEFAULT 0,
session_token VARCHAR(500),
metadata JSONB,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- Indexes for common queries
CREATE INDEX IF NOT EXISTS idx_unit_sessions_student ON unit_sessions(student_id);
CREATE INDEX IF NOT EXISTS idx_unit_sessions_unit ON unit_sessions(unit_id);
CREATE INDEX IF NOT EXISTS idx_unit_sessions_completed ON unit_sessions(completed_at);
CREATE INDEX IF NOT EXISTS idx_unit_sessions_started ON unit_sessions(started_at DESC);
-- Comments
COMMENT ON TABLE unit_sessions IS 'Records individual unit playthrough sessions';
COMMENT ON COLUMN unit_sessions.completion_rate IS 'Percentage of stops completed (0.0 to 1.0)';
COMMENT ON COLUMN unit_sessions.precheck_score IS 'Score from pre-unit diagnostic quiz (0.0 to 1.0)';
COMMENT ON COLUMN unit_sessions.postcheck_score IS 'Score from post-unit diagnostic quiz (0.0 to 1.0)';
-- ==============================================
-- Unit Telemetry Events
-- ==============================================
-- Stores detailed telemetry events from unit sessions.
CREATE TABLE IF NOT EXISTS unit_telemetry (
id BIGSERIAL PRIMARY KEY,
session_id UUID NOT NULL REFERENCES unit_sessions(session_id) ON DELETE CASCADE,
event_type VARCHAR(50) NOT NULL,
stop_id VARCHAR(100),
event_timestamp TIMESTAMPTZ DEFAULT NOW(),
metrics JSONB,
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- Indexes for analytics
CREATE INDEX IF NOT EXISTS idx_unit_telemetry_session ON unit_telemetry(session_id);
CREATE INDEX IF NOT EXISTS idx_unit_telemetry_type ON unit_telemetry(event_type);
CREATE INDEX IF NOT EXISTS idx_unit_telemetry_stop ON unit_telemetry(stop_id);
CREATE INDEX IF NOT EXISTS idx_unit_telemetry_timestamp ON unit_telemetry(event_timestamp);
-- Partitioning hint (for production with high volume)
-- Consider partitioning by created_at for older data cleanup
-- Comments
COMMENT ON TABLE unit_telemetry IS 'Detailed telemetry events from unit sessions';
COMMENT ON COLUMN unit_telemetry.event_type IS 'Event type: stop_completed, hint_used, state_change, etc.';
COMMENT ON COLUMN unit_telemetry.metrics IS 'Event-specific metrics in JSON format';
-- ==============================================
-- Unit Stop Metrics
-- ==============================================
-- Aggregated metrics per stop per session.
CREATE TABLE IF NOT EXISTS unit_stop_metrics (
id BIGSERIAL PRIMARY KEY,
session_id UUID NOT NULL REFERENCES unit_sessions(session_id) ON DELETE CASCADE,
stop_id VARCHAR(100) NOT NULL,
completed BOOLEAN DEFAULT false,
success BOOLEAN,
attempts INTEGER DEFAULT 0,
time_seconds DECIMAL(10,2),
hints_used TEXT[],
completed_at TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT NOW(),
CONSTRAINT unique_session_stop UNIQUE(session_id, stop_id)
);
-- Indexes
CREATE INDEX IF NOT EXISTS idx_stop_metrics_session ON unit_stop_metrics(session_id);
CREATE INDEX IF NOT EXISTS idx_stop_metrics_stop ON unit_stop_metrics(stop_id);
CREATE INDEX IF NOT EXISTS idx_stop_metrics_success ON unit_stop_metrics(success);
-- Comments
COMMENT ON TABLE unit_stop_metrics IS 'Aggregated metrics for each stop in a unit session';
-- ==============================================
-- Unit Misconceptions
-- ==============================================
-- Tracks detected misconceptions per student.
CREATE TABLE IF NOT EXISTS unit_misconceptions (
id BIGSERIAL PRIMARY KEY,
student_id UUID NOT NULL,
unit_id VARCHAR(100) NOT NULL REFERENCES unit_definitions(unit_id),
misconception_id VARCHAR(100) NOT NULL,
stop_id VARCHAR(100),
detected_at TIMESTAMPTZ DEFAULT NOW(),
addressed BOOLEAN DEFAULT false,
addressed_at TIMESTAMPTZ,
session_id UUID REFERENCES unit_sessions(session_id),
CONSTRAINT unique_student_misconception UNIQUE(student_id, unit_id, misconception_id)
);
-- Indexes
CREATE INDEX IF NOT EXISTS idx_misconceptions_student ON unit_misconceptions(student_id);
CREATE INDEX IF NOT EXISTS idx_misconceptions_unit ON unit_misconceptions(unit_id);
CREATE INDEX IF NOT EXISTS idx_misconceptions_addressed ON unit_misconceptions(addressed);
-- Comments
COMMENT ON TABLE unit_misconceptions IS 'Tracks detected misconceptions for targeted remediation';
-- ==============================================
-- Trigger: Update updated_at timestamp
-- ==============================================
-- Reuse existing function if available, otherwise create
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM pg_proc WHERE proname = 'update_updated_at_column') THEN
CREATE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $func$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$func$ LANGUAGE plpgsql;
END IF;
END $$;
-- Apply triggers
DROP TRIGGER IF EXISTS update_unit_definitions_updated_at ON unit_definitions;
CREATE TRIGGER update_unit_definitions_updated_at
BEFORE UPDATE ON unit_definitions
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
DROP TRIGGER IF EXISTS update_unit_sessions_updated_at ON unit_sessions;
CREATE TRIGGER update_unit_sessions_updated_at
BEFORE UPDATE ON unit_sessions
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
-- ==============================================
-- View: Unit Session Summary
-- ==============================================
CREATE OR REPLACE VIEW unit_session_summary AS
SELECT
us.session_id,
us.unit_id,
ud.template,
us.student_id,
us.started_at,
us.completed_at,
us.duration_seconds,
us.completion_rate,
us.precheck_score,
us.postcheck_score,
CASE
WHEN us.precheck_score IS NOT NULL AND us.postcheck_score IS NOT NULL
THEN us.postcheck_score - us.precheck_score
ELSE NULL
END as learning_gain,
us.stops_completed,
us.total_stops,
CASE WHEN us.completed_at IS NOT NULL THEN true ELSE false END as is_completed
FROM unit_sessions us
JOIN unit_definitions ud ON us.unit_id = ud.unit_id;
COMMENT ON VIEW unit_session_summary IS 'Summary view of unit sessions with learning gain calculation';
-- ==============================================
-- View: Unit Analytics by Student
-- ==============================================
CREATE OR REPLACE VIEW unit_student_analytics AS
SELECT
student_id,
COUNT(DISTINCT unit_id) as units_attempted,
COUNT(*) as total_sessions,
COUNT(*) FILTER (WHERE completed_at IS NOT NULL) as completed_sessions,
AVG(completion_rate) as avg_completion_rate,
AVG(precheck_score) as avg_precheck_score,
AVG(postcheck_score) as avg_postcheck_score,
AVG(CASE
WHEN precheck_score IS NOT NULL AND postcheck_score IS NOT NULL
THEN postcheck_score - precheck_score
ELSE NULL
END) as avg_learning_gain,
SUM(duration_seconds) / 60 as total_minutes_played,
MAX(completed_at) as last_completed_at
FROM unit_sessions
GROUP BY student_id;
COMMENT ON VIEW unit_student_analytics IS 'Aggregated analytics per student across all units';
-- ==============================================
-- View: Unit Performance
-- ==============================================
CREATE OR REPLACE VIEW unit_performance AS
SELECT
ud.unit_id,
ud.template,
ud.difficulty,
COUNT(us.session_id) as total_sessions,
COUNT(*) FILTER (WHERE us.completed_at IS NOT NULL) as completed_sessions,
ROUND(
COUNT(*) FILTER (WHERE us.completed_at IS NOT NULL)::DECIMAL /
NULLIF(COUNT(us.session_id), 0) * 100,
1
) as completion_percent,
AVG(us.duration_seconds) / 60 as avg_duration_minutes,
AVG(us.completion_rate) as avg_completion_rate,
AVG(CASE
WHEN us.precheck_score IS NOT NULL AND us.postcheck_score IS NOT NULL
THEN us.postcheck_score - us.precheck_score
ELSE NULL
END) as avg_learning_gain
FROM unit_definitions ud
LEFT JOIN unit_sessions us ON ud.unit_id = us.unit_id
GROUP BY ud.unit_id, ud.template, ud.difficulty;
COMMENT ON VIEW unit_performance IS 'Performance metrics per unit for content optimization';
-- ==============================================
-- Function: Get Recommended Units for Student
-- ==============================================
CREATE OR REPLACE FUNCTION get_recommended_units(
p_student_id UUID,
p_grade VARCHAR(10) DEFAULT NULL,
p_locale VARCHAR(10) DEFAULT 'de-DE',
p_limit INTEGER DEFAULT 5
)
RETURNS TABLE (
unit_id VARCHAR(100),
template VARCHAR(50),
difficulty VARCHAR(20),
reason TEXT
) AS $$
BEGIN
RETURN QUERY
SELECT
ud.unit_id,
ud.template,
ud.difficulty,
CASE
-- Never played
WHEN NOT EXISTS (
SELECT 1 FROM unit_sessions us
WHERE us.student_id = p_student_id AND us.unit_id = ud.unit_id
) THEN 'Neu: Noch nicht gespielt'
-- Played but not completed
WHEN NOT EXISTS (
SELECT 1 FROM unit_sessions us
WHERE us.student_id = p_student_id
AND us.unit_id = ud.unit_id
AND us.completed_at IS NOT NULL
) THEN 'Fortsetzen: Noch nicht abgeschlossen'
-- Completed with low postcheck score
WHEN EXISTS (
SELECT 1 FROM unit_sessions us
WHERE us.student_id = p_student_id
AND us.unit_id = ud.unit_id
AND us.postcheck_score < 0.6
) THEN 'Wiederholen: Verständnis vertiefen'
ELSE 'Abgeschlossen'
END as reason
FROM unit_definitions ud
WHERE ud.is_published = true
AND (p_locale = ANY(ud.locale) OR p_locale IS NULL)
AND (p_grade = ANY(ud.grade_band) OR p_grade IS NULL)
ORDER BY
-- Prioritize: new > incomplete > low score > completed
CASE
WHEN NOT EXISTS (
SELECT 1 FROM unit_sessions us
WHERE us.student_id = p_student_id AND us.unit_id = ud.unit_id
) THEN 1
WHEN NOT EXISTS (
SELECT 1 FROM unit_sessions us
WHERE us.student_id = p_student_id
AND us.unit_id = ud.unit_id
AND us.completed_at IS NOT NULL
) THEN 2
WHEN EXISTS (
SELECT 1 FROM unit_sessions us
WHERE us.student_id = p_student_id
AND us.unit_id = ud.unit_id
AND us.postcheck_score < 0.6
) THEN 3
ELSE 4
END,
ud.unit_id
LIMIT p_limit;
END;
$$ LANGUAGE plpgsql;
COMMENT ON FUNCTION get_recommended_units IS 'Returns recommended units for a student based on completion status';
-- ==============================================
-- Sample Data: Demo Unit Definition
-- ==============================================
-- Insert a demo unit for testing (will be replaced by real content)
INSERT INTO unit_definitions (unit_id, template, version, locale, grade_band, duration_minutes, difficulty, definition, is_published)
VALUES (
'demo_unit_v1',
'flight_path',
'1.0.0',
ARRAY['de-DE'],
ARRAY['5', '6', '7'],
5,
'base',
'{
"unit_id": "demo_unit_v1",
"template": "flight_path",
"version": "1.0.0",
"learning_objectives": ["Demo: Grundfunktion testen", "Demo: Navigation verstehen"],
"stops": [
{"stop_id": "stop_1", "label": {"de-DE": "Start"}, "interaction": {"type": "aim_and_pass"}},
{"stop_id": "stop_2", "label": {"de-DE": "Mitte"}, "interaction": {"type": "aim_and_pass"}},
{"stop_id": "stop_3", "label": {"de-DE": "Ende"}, "interaction": {"type": "aim_and_pass"}}
],
"teacher_controls": {"allow_skip": true, "allow_replay": true}
}'::jsonb,
true
)
ON CONFLICT (unit_id) DO UPDATE SET
definition = EXCLUDED.definition,
updated_at = NOW();
-- ==============================================
-- Grant Permissions (adjust user as needed)
-- ==============================================
-- GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO breakpilot;
-- GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public TO breakpilot;
-- GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA public TO breakpilot;
-- ==============================================
-- Migration Complete Message
-- ==============================================
DO $$
BEGIN
RAISE NOTICE 'Breakpilot Drive unit tables created successfully!';
RAISE NOTICE 'Tables: unit_definitions, unit_sessions, unit_telemetry, unit_stop_metrics, unit_misconceptions';
RAISE NOTICE 'Views: unit_session_summary, unit_student_analytics, unit_performance';
RAISE NOTICE 'Functions: get_recommended_units';
END $$;