Files
breakpilot-lehrer/edu-search-service/internal/policy/models.go
Benjamin Boenisch 414e0f5ec0
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Successful in 1m45s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 21s
feat: edu-search-service migriert, voice-service/geo-service entfernt
- edu-search-service von breakpilot-pwa nach breakpilot-lehrer kopiert (ohne vendor)
- opensearch + edu-search-service in docker-compose.yml hinzugefuegt
- voice-service aus docker-compose.yml entfernt (jetzt in breakpilot-core)
- geo-service aus docker-compose.yml entfernt (nicht mehr benoetigt)
- CI/CD: edu-search-service zu Gitea Actions und Woodpecker hinzugefuegt
  (Go lint, test mit go mod download, build, SBOM)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 18:36:38 +01:00

446 lines
18 KiB
Go

// Package policy provides whitelist-based data source management for the edu-search-service.
// It implements source policies, operation permissions, PII detection, and audit logging
// for compliance with German data protection regulations.
package policy
import (
"encoding/json"
"time"
"github.com/google/uuid"
)
// =============================================================================
// ENUMS AND CONSTANTS
// =============================================================================
// Bundesland represents German federal states (2-letter codes).
type Bundesland string
const (
BundeslandBW Bundesland = "BW" // Baden-Wuerttemberg
BundeslandBY Bundesland = "BY" // Bayern
BundeslandBE Bundesland = "BE" // Berlin
BundeslandBB Bundesland = "BB" // Brandenburg
BundeslandHB Bundesland = "HB" // Bremen
BundeslandHH Bundesland = "HH" // Hamburg
BundeslandHE Bundesland = "HE" // Hessen
BundeslandMV Bundesland = "MV" // Mecklenburg-Vorpommern
BundeslandNI Bundesland = "NI" // Niedersachsen
BundeslandNW Bundesland = "NW" // Nordrhein-Westfalen
BundeslandRP Bundesland = "RP" // Rheinland-Pfalz
BundeslandSL Bundesland = "SL" // Saarland
BundeslandSN Bundesland = "SN" // Sachsen
BundeslandST Bundesland = "ST" // Sachsen-Anhalt
BundeslandSH Bundesland = "SH" // Schleswig-Holstein
BundeslandTH Bundesland = "TH" // Thueringen
)
// ValidBundeslaender contains all valid German federal state codes.
var ValidBundeslaender = []Bundesland{
BundeslandBW, BundeslandBY, BundeslandBE, BundeslandBB,
BundeslandHB, BundeslandHH, BundeslandHE, BundeslandMV,
BundeslandNI, BundeslandNW, BundeslandRP, BundeslandSL,
BundeslandSN, BundeslandST, BundeslandSH, BundeslandTH,
}
// License represents allowed license types for data sources.
type License string
const (
LicenseDLDEBY20 License = "DL-DE-BY-2.0" // Datenlizenz Deutschland - Namensnennung
LicenseCCBY License = "CC-BY" // Creative Commons Attribution
LicenseCCBYSA License = "CC-BY-SA" // Creative Commons Attribution-ShareAlike
LicenseCCBYNC License = "CC-BY-NC" // Creative Commons Attribution-NonCommercial
LicenseCCBYNCSA License = "CC-BY-NC-SA" // Creative Commons Attribution-NonCommercial-ShareAlike
LicenseCC0 License = "CC0" // Public Domain
LicenseParagraph5 License = "§5 UrhG" // Amtliche Werke (German Copyright Act)
LicenseCustom License = "Custom" // Custom license (requires legal basis)
)
// Operation represents the types of operations that can be performed on data.
type Operation string
const (
OperationLookup Operation = "lookup" // Display/Search
OperationRAG Operation = "rag" // RAG (Retrieval-Augmented Generation)
OperationTraining Operation = "training" // Model Training (VERBOTEN by default)
OperationExport Operation = "export" // Data Export
)
// ValidOperations contains all valid operation types.
var ValidOperations = []Operation{
OperationLookup,
OperationRAG,
OperationTraining,
OperationExport,
}
// PIIRuleType represents the type of PII detection rule.
type PIIRuleType string
const (
PIIRuleTypeRegex PIIRuleType = "regex" // Regular expression pattern
PIIRuleTypeKeyword PIIRuleType = "keyword" // Keyword matching
)
// PIISeverity represents the severity level of a PII match.
type PIISeverity string
const (
PIISeverityBlock PIISeverity = "block" // Block content completely
PIISeverityWarn PIISeverity = "warn" // Warn but allow
PIISeverityRedact PIISeverity = "redact" // Redact matched content
)
// AuditAction represents the type of action logged in the audit trail.
type AuditAction string
const (
AuditActionCreate AuditAction = "create"
AuditActionUpdate AuditAction = "update"
AuditActionDelete AuditAction = "delete"
AuditActionActivate AuditAction = "activate"
AuditActionDeactivate AuditAction = "deactivate"
AuditActionApprove AuditAction = "approve"
)
// AuditEntityType represents the type of entity being audited.
type AuditEntityType string
const (
AuditEntitySourcePolicy AuditEntityType = "source_policy"
AuditEntityAllowedSource AuditEntityType = "allowed_source"
AuditEntityOperationPermission AuditEntityType = "operation_permission"
AuditEntityPIIRule AuditEntityType = "pii_rule"
)
// BlockReason represents the reason why content was blocked.
type BlockReason string
const (
BlockReasonNotWhitelisted BlockReason = "not_whitelisted"
BlockReasonPIIDetected BlockReason = "pii_detected"
BlockReasonTrainingForbidden BlockReason = "training_forbidden"
BlockReasonLicenseViolation BlockReason = "license_violation"
BlockReasonManualBlock BlockReason = "manual_block"
)
// =============================================================================
// CORE MODELS
// =============================================================================
// SourcePolicy represents a versioned policy for data source management.
// Policies can be scoped to a specific Bundesland or apply federally (bundesland = nil).
type SourcePolicy struct {
ID uuid.UUID `json:"id" db:"id"`
Version int `json:"version" db:"version"`
Name string `json:"name" db:"name"`
Description *string `json:"description,omitempty" db:"description"`
Bundesland *Bundesland `json:"bundesland,omitempty" db:"bundesland"`
IsActive bool `json:"is_active" db:"is_active"`
CreatedAt time.Time `json:"created_at" db:"created_at"`
UpdatedAt time.Time `json:"updated_at" db:"updated_at"`
ApprovedBy *uuid.UUID `json:"approved_by,omitempty" db:"approved_by"`
ApprovedAt *time.Time `json:"approved_at,omitempty" db:"approved_at"`
// Joined fields (populated by queries)
Sources []AllowedSource `json:"sources,omitempty"`
}
// AllowedSource represents a whitelisted data source with license information.
type AllowedSource struct {
ID uuid.UUID `json:"id" db:"id"`
PolicyID uuid.UUID `json:"policy_id" db:"policy_id"`
Domain string `json:"domain" db:"domain"`
Name string `json:"name" db:"name"`
Description *string `json:"description,omitempty" db:"description"`
License License `json:"license" db:"license"`
LegalBasis *string `json:"legal_basis,omitempty" db:"legal_basis"`
CitationTemplate *string `json:"citation_template,omitempty" db:"citation_template"`
TrustBoost float64 `json:"trust_boost" db:"trust_boost"`
IsActive bool `json:"is_active" db:"is_active"`
CreatedAt time.Time `json:"created_at" db:"created_at"`
UpdatedAt time.Time `json:"updated_at" db:"updated_at"`
// Joined fields (populated by queries)
Operations []OperationPermission `json:"operations,omitempty"`
PolicyName *string `json:"policy_name,omitempty"`
}
// OperationPermission represents the permission matrix for a specific source.
type OperationPermission struct {
ID uuid.UUID `json:"id" db:"id"`
SourceID uuid.UUID `json:"source_id" db:"source_id"`
Operation Operation `json:"operation" db:"operation"`
IsAllowed bool `json:"is_allowed" db:"is_allowed"`
RequiresCitation bool `json:"requires_citation" db:"requires_citation"`
Notes *string `json:"notes,omitempty" db:"notes"`
CreatedAt time.Time `json:"created_at" db:"created_at"`
UpdatedAt time.Time `json:"updated_at" db:"updated_at"`
}
// PIIRule represents a rule for detecting personally identifiable information.
type PIIRule struct {
ID uuid.UUID `json:"id" db:"id"`
Name string `json:"name" db:"name"`
Description *string `json:"description,omitempty" db:"description"`
RuleType PIIRuleType `json:"rule_type" db:"rule_type"`
Pattern string `json:"pattern" db:"pattern"`
Severity PIISeverity `json:"severity" db:"severity"`
IsActive bool `json:"is_active" db:"is_active"`
CreatedAt time.Time `json:"created_at" db:"created_at"`
UpdatedAt time.Time `json:"updated_at" db:"updated_at"`
}
// =============================================================================
// AUDIT AND LOGGING MODELS
// =============================================================================
// PolicyAuditLog represents an immutable audit log entry for policy changes.
type PolicyAuditLog struct {
ID uuid.UUID `json:"id" db:"id"`
Action AuditAction `json:"action" db:"action"`
EntityType AuditEntityType `json:"entity_type" db:"entity_type"`
EntityID *uuid.UUID `json:"entity_id,omitempty" db:"entity_id"`
OldValue json.RawMessage `json:"old_value,omitempty" db:"old_value"`
NewValue json.RawMessage `json:"new_value,omitempty" db:"new_value"`
UserID *uuid.UUID `json:"user_id,omitempty" db:"user_id"`
UserEmail *string `json:"user_email,omitempty" db:"user_email"`
IPAddress *string `json:"ip_address,omitempty" db:"ip_address"`
UserAgent *string `json:"user_agent,omitempty" db:"user_agent"`
CreatedAt time.Time `json:"created_at" db:"created_at"`
}
// BlockedContentLog represents a log entry for blocked URLs.
type BlockedContentLog struct {
ID uuid.UUID `json:"id" db:"id"`
URL string `json:"url" db:"url"`
Domain string `json:"domain" db:"domain"`
BlockReason BlockReason `json:"block_reason" db:"block_reason"`
MatchedRuleID *uuid.UUID `json:"matched_rule_id,omitempty" db:"matched_rule_id"`
Details json.RawMessage `json:"details,omitempty" db:"details"`
CreatedAt time.Time `json:"created_at" db:"created_at"`
}
// =============================================================================
// REQUEST/RESPONSE MODELS
// =============================================================================
// CreateSourcePolicyRequest represents a request to create a new policy.
type CreateSourcePolicyRequest struct {
Name string `json:"name" binding:"required"`
Description *string `json:"description"`
Bundesland *Bundesland `json:"bundesland"`
}
// UpdateSourcePolicyRequest represents a request to update a policy.
type UpdateSourcePolicyRequest struct {
Name *string `json:"name"`
Description *string `json:"description"`
Bundesland *Bundesland `json:"bundesland"`
IsActive *bool `json:"is_active"`
}
// CreateAllowedSourceRequest represents a request to create a new allowed source.
type CreateAllowedSourceRequest struct {
PolicyID uuid.UUID `json:"policy_id" binding:"required"`
Domain string `json:"domain" binding:"required"`
Name string `json:"name" binding:"required"`
Description *string `json:"description"`
License License `json:"license" binding:"required"`
LegalBasis *string `json:"legal_basis"`
CitationTemplate *string `json:"citation_template"`
TrustBoost *float64 `json:"trust_boost"`
}
// UpdateAllowedSourceRequest represents a request to update an allowed source.
type UpdateAllowedSourceRequest struct {
Domain *string `json:"domain"`
Name *string `json:"name"`
Description *string `json:"description"`
License *License `json:"license"`
LegalBasis *string `json:"legal_basis"`
CitationTemplate *string `json:"citation_template"`
TrustBoost *float64 `json:"trust_boost"`
IsActive *bool `json:"is_active"`
}
// UpdateOperationPermissionRequest represents a request to update operation permissions.
type UpdateOperationPermissionRequest struct {
IsAllowed *bool `json:"is_allowed"`
RequiresCitation *bool `json:"requires_citation"`
Notes *string `json:"notes"`
}
// CreatePIIRuleRequest represents a request to create a new PII rule.
type CreatePIIRuleRequest struct {
Name string `json:"name" binding:"required"`
Description *string `json:"description"`
RuleType PIIRuleType `json:"rule_type" binding:"required"`
Pattern string `json:"pattern" binding:"required"`
Severity PIISeverity `json:"severity"`
}
// UpdatePIIRuleRequest represents a request to update a PII rule.
type UpdatePIIRuleRequest struct {
Name *string `json:"name"`
Description *string `json:"description"`
RuleType *PIIRuleType `json:"rule_type"`
Pattern *string `json:"pattern"`
Severity *PIISeverity `json:"severity"`
IsActive *bool `json:"is_active"`
}
// CheckComplianceRequest represents a request to check URL compliance.
type CheckComplianceRequest struct {
URL string `json:"url" binding:"required"`
Operation Operation `json:"operation" binding:"required"`
Bundesland *Bundesland `json:"bundesland"`
}
// CheckComplianceResponse represents the compliance check result.
type CheckComplianceResponse struct {
IsAllowed bool `json:"is_allowed"`
Source *AllowedSource `json:"source,omitempty"`
BlockReason *BlockReason `json:"block_reason,omitempty"`
RequiresCitation bool `json:"requires_citation"`
CitationTemplate *string `json:"citation_template,omitempty"`
License *License `json:"license,omitempty"`
}
// PIITestRequest represents a request to test PII detection.
type PIITestRequest struct {
Text string `json:"text" binding:"required"`
}
// PIIMatch represents a single PII match in text.
type PIIMatch struct {
RuleID uuid.UUID `json:"rule_id"`
RuleName string `json:"rule_name"`
RuleType PIIRuleType `json:"rule_type"`
Severity PIISeverity `json:"severity"`
Match string `json:"match"`
StartIndex int `json:"start_index"`
EndIndex int `json:"end_index"`
}
// PIITestResponse represents the result of PII detection test.
type PIITestResponse struct {
HasPII bool `json:"has_pii"`
Matches []PIIMatch `json:"matches"`
BlockLevel PIISeverity `json:"block_level"`
ShouldBlock bool `json:"should_block"`
}
// =============================================================================
// LIST/FILTER MODELS
// =============================================================================
// PolicyListFilter represents filters for listing policies.
type PolicyListFilter struct {
Bundesland *Bundesland `form:"bundesland"`
IsActive *bool `form:"is_active"`
Limit int `form:"limit"`
Offset int `form:"offset"`
}
// SourceListFilter represents filters for listing sources.
type SourceListFilter struct {
PolicyID *uuid.UUID `form:"policy_id"`
Domain *string `form:"domain"`
License *License `form:"license"`
IsActive *bool `form:"is_active"`
Limit int `form:"limit"`
Offset int `form:"offset"`
}
// AuditLogFilter represents filters for querying audit logs.
type AuditLogFilter struct {
EntityType *AuditEntityType `form:"entity_type"`
EntityID *uuid.UUID `form:"entity_id"`
Action *AuditAction `form:"action"`
UserEmail *string `form:"user_email"`
FromDate *time.Time `form:"from"`
ToDate *time.Time `form:"to"`
Limit int `form:"limit"`
Offset int `form:"offset"`
}
// BlockedContentFilter represents filters for querying blocked content logs.
type BlockedContentFilter struct {
Domain *string `form:"domain"`
BlockReason *BlockReason `form:"block_reason"`
FromDate *time.Time `form:"from"`
ToDate *time.Time `form:"to"`
Limit int `form:"limit"`
Offset int `form:"offset"`
}
// =============================================================================
// STATISTICS MODELS
// =============================================================================
// PolicyStats represents aggregated statistics for the policy system.
type PolicyStats struct {
ActivePolicies int `json:"active_policies"`
TotalSources int `json:"total_sources"`
ActiveSources int `json:"active_sources"`
BlockedToday int `json:"blocked_today"`
BlockedTotal int `json:"blocked_total"`
PIIRulesActive int `json:"pii_rules_active"`
SourcesByLicense map[string]int `json:"sources_by_license"`
BlocksByReason map[string]int `json:"blocks_by_reason"`
ComplianceScore float64 `json:"compliance_score"`
}
// =============================================================================
// YAML CONFIGURATION MODELS
// =============================================================================
// BundeslaenderConfig represents the YAML configuration for initial data loading.
type BundeslaenderConfig struct {
Federal PolicyConfig `yaml:"federal"`
Bundeslaender map[string]PolicyConfig `yaml:",inline"`
DefaultOperations OperationsConfig `yaml:"default_operations"`
PIIRules []PIIRuleConfig `yaml:"pii_rules"`
}
// PolicyConfig represents a policy configuration in YAML.
type PolicyConfig struct {
Name string `yaml:"name"`
Sources []SourceConfig `yaml:"sources"`
}
// SourceConfig represents a source configuration in YAML.
type SourceConfig struct {
Domain string `yaml:"domain"`
Name string `yaml:"name"`
License string `yaml:"license"`
LegalBasis string `yaml:"legal_basis,omitempty"`
CitationTemplate string `yaml:"citation_template,omitempty"`
TrustBoost float64 `yaml:"trust_boost,omitempty"`
}
// OperationsConfig represents default operation permissions in YAML.
type OperationsConfig struct {
Lookup OperationConfig `yaml:"lookup"`
RAG OperationConfig `yaml:"rag"`
Training OperationConfig `yaml:"training"`
Export OperationConfig `yaml:"export"`
}
// OperationConfig represents a single operation permission in YAML.
type OperationConfig struct {
Allowed bool `yaml:"allowed"`
RequiresCitation bool `yaml:"requires_citation"`
}
// PIIRuleConfig represents a PII rule configuration in YAML.
type PIIRuleConfig struct {
Name string `yaml:"name"`
Type string `yaml:"type"`
Pattern string `yaml:"pattern"`
Severity string `yaml:"severity"`
}