All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Successful in 1m45s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 21s
- edu-search-service von breakpilot-pwa nach breakpilot-lehrer kopiert (ohne vendor) - opensearch + edu-search-service in docker-compose.yml hinzugefuegt - voice-service aus docker-compose.yml entfernt (jetzt in breakpilot-core) - geo-service aus docker-compose.yml entfernt (nicht mehr benoetigt) - CI/CD: edu-search-service zu Gitea Actions und Woodpecker hinzugefuegt (Go lint, test mit go mod download, build, SBOM) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
446 lines
18 KiB
Go
446 lines
18 KiB
Go
// Package policy provides whitelist-based data source management for the edu-search-service.
|
|
// It implements source policies, operation permissions, PII detection, and audit logging
|
|
// for compliance with German data protection regulations.
|
|
package policy
|
|
|
|
import (
|
|
"encoding/json"
|
|
"time"
|
|
|
|
"github.com/google/uuid"
|
|
)
|
|
|
|
// =============================================================================
|
|
// ENUMS AND CONSTANTS
|
|
// =============================================================================
|
|
|
|
// Bundesland represents German federal states (2-letter codes).
|
|
type Bundesland string
|
|
|
|
const (
|
|
BundeslandBW Bundesland = "BW" // Baden-Wuerttemberg
|
|
BundeslandBY Bundesland = "BY" // Bayern
|
|
BundeslandBE Bundesland = "BE" // Berlin
|
|
BundeslandBB Bundesland = "BB" // Brandenburg
|
|
BundeslandHB Bundesland = "HB" // Bremen
|
|
BundeslandHH Bundesland = "HH" // Hamburg
|
|
BundeslandHE Bundesland = "HE" // Hessen
|
|
BundeslandMV Bundesland = "MV" // Mecklenburg-Vorpommern
|
|
BundeslandNI Bundesland = "NI" // Niedersachsen
|
|
BundeslandNW Bundesland = "NW" // Nordrhein-Westfalen
|
|
BundeslandRP Bundesland = "RP" // Rheinland-Pfalz
|
|
BundeslandSL Bundesland = "SL" // Saarland
|
|
BundeslandSN Bundesland = "SN" // Sachsen
|
|
BundeslandST Bundesland = "ST" // Sachsen-Anhalt
|
|
BundeslandSH Bundesland = "SH" // Schleswig-Holstein
|
|
BundeslandTH Bundesland = "TH" // Thueringen
|
|
)
|
|
|
|
// ValidBundeslaender contains all valid German federal state codes.
|
|
var ValidBundeslaender = []Bundesland{
|
|
BundeslandBW, BundeslandBY, BundeslandBE, BundeslandBB,
|
|
BundeslandHB, BundeslandHH, BundeslandHE, BundeslandMV,
|
|
BundeslandNI, BundeslandNW, BundeslandRP, BundeslandSL,
|
|
BundeslandSN, BundeslandST, BundeslandSH, BundeslandTH,
|
|
}
|
|
|
|
// License represents allowed license types for data sources.
|
|
type License string
|
|
|
|
const (
|
|
LicenseDLDEBY20 License = "DL-DE-BY-2.0" // Datenlizenz Deutschland - Namensnennung
|
|
LicenseCCBY License = "CC-BY" // Creative Commons Attribution
|
|
LicenseCCBYSA License = "CC-BY-SA" // Creative Commons Attribution-ShareAlike
|
|
LicenseCCBYNC License = "CC-BY-NC" // Creative Commons Attribution-NonCommercial
|
|
LicenseCCBYNCSA License = "CC-BY-NC-SA" // Creative Commons Attribution-NonCommercial-ShareAlike
|
|
LicenseCC0 License = "CC0" // Public Domain
|
|
LicenseParagraph5 License = "§5 UrhG" // Amtliche Werke (German Copyright Act)
|
|
LicenseCustom License = "Custom" // Custom license (requires legal basis)
|
|
)
|
|
|
|
// Operation represents the types of operations that can be performed on data.
|
|
type Operation string
|
|
|
|
const (
|
|
OperationLookup Operation = "lookup" // Display/Search
|
|
OperationRAG Operation = "rag" // RAG (Retrieval-Augmented Generation)
|
|
OperationTraining Operation = "training" // Model Training (VERBOTEN by default)
|
|
OperationExport Operation = "export" // Data Export
|
|
)
|
|
|
|
// ValidOperations contains all valid operation types.
|
|
var ValidOperations = []Operation{
|
|
OperationLookup,
|
|
OperationRAG,
|
|
OperationTraining,
|
|
OperationExport,
|
|
}
|
|
|
|
// PIIRuleType represents the type of PII detection rule.
|
|
type PIIRuleType string
|
|
|
|
const (
|
|
PIIRuleTypeRegex PIIRuleType = "regex" // Regular expression pattern
|
|
PIIRuleTypeKeyword PIIRuleType = "keyword" // Keyword matching
|
|
)
|
|
|
|
// PIISeverity represents the severity level of a PII match.
|
|
type PIISeverity string
|
|
|
|
const (
|
|
PIISeverityBlock PIISeverity = "block" // Block content completely
|
|
PIISeverityWarn PIISeverity = "warn" // Warn but allow
|
|
PIISeverityRedact PIISeverity = "redact" // Redact matched content
|
|
)
|
|
|
|
// AuditAction represents the type of action logged in the audit trail.
|
|
type AuditAction string
|
|
|
|
const (
|
|
AuditActionCreate AuditAction = "create"
|
|
AuditActionUpdate AuditAction = "update"
|
|
AuditActionDelete AuditAction = "delete"
|
|
AuditActionActivate AuditAction = "activate"
|
|
AuditActionDeactivate AuditAction = "deactivate"
|
|
AuditActionApprove AuditAction = "approve"
|
|
)
|
|
|
|
// AuditEntityType represents the type of entity being audited.
|
|
type AuditEntityType string
|
|
|
|
const (
|
|
AuditEntitySourcePolicy AuditEntityType = "source_policy"
|
|
AuditEntityAllowedSource AuditEntityType = "allowed_source"
|
|
AuditEntityOperationPermission AuditEntityType = "operation_permission"
|
|
AuditEntityPIIRule AuditEntityType = "pii_rule"
|
|
)
|
|
|
|
// BlockReason represents the reason why content was blocked.
|
|
type BlockReason string
|
|
|
|
const (
|
|
BlockReasonNotWhitelisted BlockReason = "not_whitelisted"
|
|
BlockReasonPIIDetected BlockReason = "pii_detected"
|
|
BlockReasonTrainingForbidden BlockReason = "training_forbidden"
|
|
BlockReasonLicenseViolation BlockReason = "license_violation"
|
|
BlockReasonManualBlock BlockReason = "manual_block"
|
|
)
|
|
|
|
// =============================================================================
|
|
// CORE MODELS
|
|
// =============================================================================
|
|
|
|
// SourcePolicy represents a versioned policy for data source management.
|
|
// Policies can be scoped to a specific Bundesland or apply federally (bundesland = nil).
|
|
type SourcePolicy struct {
|
|
ID uuid.UUID `json:"id" db:"id"`
|
|
Version int `json:"version" db:"version"`
|
|
Name string `json:"name" db:"name"`
|
|
Description *string `json:"description,omitempty" db:"description"`
|
|
Bundesland *Bundesland `json:"bundesland,omitempty" db:"bundesland"`
|
|
IsActive bool `json:"is_active" db:"is_active"`
|
|
CreatedAt time.Time `json:"created_at" db:"created_at"`
|
|
UpdatedAt time.Time `json:"updated_at" db:"updated_at"`
|
|
ApprovedBy *uuid.UUID `json:"approved_by,omitempty" db:"approved_by"`
|
|
ApprovedAt *time.Time `json:"approved_at,omitempty" db:"approved_at"`
|
|
|
|
// Joined fields (populated by queries)
|
|
Sources []AllowedSource `json:"sources,omitempty"`
|
|
}
|
|
|
|
// AllowedSource represents a whitelisted data source with license information.
|
|
type AllowedSource struct {
|
|
ID uuid.UUID `json:"id" db:"id"`
|
|
PolicyID uuid.UUID `json:"policy_id" db:"policy_id"`
|
|
Domain string `json:"domain" db:"domain"`
|
|
Name string `json:"name" db:"name"`
|
|
Description *string `json:"description,omitempty" db:"description"`
|
|
License License `json:"license" db:"license"`
|
|
LegalBasis *string `json:"legal_basis,omitempty" db:"legal_basis"`
|
|
CitationTemplate *string `json:"citation_template,omitempty" db:"citation_template"`
|
|
TrustBoost float64 `json:"trust_boost" db:"trust_boost"`
|
|
IsActive bool `json:"is_active" db:"is_active"`
|
|
CreatedAt time.Time `json:"created_at" db:"created_at"`
|
|
UpdatedAt time.Time `json:"updated_at" db:"updated_at"`
|
|
|
|
// Joined fields (populated by queries)
|
|
Operations []OperationPermission `json:"operations,omitempty"`
|
|
PolicyName *string `json:"policy_name,omitempty"`
|
|
}
|
|
|
|
// OperationPermission represents the permission matrix for a specific source.
|
|
type OperationPermission struct {
|
|
ID uuid.UUID `json:"id" db:"id"`
|
|
SourceID uuid.UUID `json:"source_id" db:"source_id"`
|
|
Operation Operation `json:"operation" db:"operation"`
|
|
IsAllowed bool `json:"is_allowed" db:"is_allowed"`
|
|
RequiresCitation bool `json:"requires_citation" db:"requires_citation"`
|
|
Notes *string `json:"notes,omitempty" db:"notes"`
|
|
CreatedAt time.Time `json:"created_at" db:"created_at"`
|
|
UpdatedAt time.Time `json:"updated_at" db:"updated_at"`
|
|
}
|
|
|
|
// PIIRule represents a rule for detecting personally identifiable information.
|
|
type PIIRule struct {
|
|
ID uuid.UUID `json:"id" db:"id"`
|
|
Name string `json:"name" db:"name"`
|
|
Description *string `json:"description,omitempty" db:"description"`
|
|
RuleType PIIRuleType `json:"rule_type" db:"rule_type"`
|
|
Pattern string `json:"pattern" db:"pattern"`
|
|
Severity PIISeverity `json:"severity" db:"severity"`
|
|
IsActive bool `json:"is_active" db:"is_active"`
|
|
CreatedAt time.Time `json:"created_at" db:"created_at"`
|
|
UpdatedAt time.Time `json:"updated_at" db:"updated_at"`
|
|
}
|
|
|
|
// =============================================================================
|
|
// AUDIT AND LOGGING MODELS
|
|
// =============================================================================
|
|
|
|
// PolicyAuditLog represents an immutable audit log entry for policy changes.
|
|
type PolicyAuditLog struct {
|
|
ID uuid.UUID `json:"id" db:"id"`
|
|
Action AuditAction `json:"action" db:"action"`
|
|
EntityType AuditEntityType `json:"entity_type" db:"entity_type"`
|
|
EntityID *uuid.UUID `json:"entity_id,omitempty" db:"entity_id"`
|
|
OldValue json.RawMessage `json:"old_value,omitempty" db:"old_value"`
|
|
NewValue json.RawMessage `json:"new_value,omitempty" db:"new_value"`
|
|
UserID *uuid.UUID `json:"user_id,omitempty" db:"user_id"`
|
|
UserEmail *string `json:"user_email,omitempty" db:"user_email"`
|
|
IPAddress *string `json:"ip_address,omitempty" db:"ip_address"`
|
|
UserAgent *string `json:"user_agent,omitempty" db:"user_agent"`
|
|
CreatedAt time.Time `json:"created_at" db:"created_at"`
|
|
}
|
|
|
|
// BlockedContentLog represents a log entry for blocked URLs.
|
|
type BlockedContentLog struct {
|
|
ID uuid.UUID `json:"id" db:"id"`
|
|
URL string `json:"url" db:"url"`
|
|
Domain string `json:"domain" db:"domain"`
|
|
BlockReason BlockReason `json:"block_reason" db:"block_reason"`
|
|
MatchedRuleID *uuid.UUID `json:"matched_rule_id,omitempty" db:"matched_rule_id"`
|
|
Details json.RawMessage `json:"details,omitempty" db:"details"`
|
|
CreatedAt time.Time `json:"created_at" db:"created_at"`
|
|
}
|
|
|
|
// =============================================================================
|
|
// REQUEST/RESPONSE MODELS
|
|
// =============================================================================
|
|
|
|
// CreateSourcePolicyRequest represents a request to create a new policy.
|
|
type CreateSourcePolicyRequest struct {
|
|
Name string `json:"name" binding:"required"`
|
|
Description *string `json:"description"`
|
|
Bundesland *Bundesland `json:"bundesland"`
|
|
}
|
|
|
|
// UpdateSourcePolicyRequest represents a request to update a policy.
|
|
type UpdateSourcePolicyRequest struct {
|
|
Name *string `json:"name"`
|
|
Description *string `json:"description"`
|
|
Bundesland *Bundesland `json:"bundesland"`
|
|
IsActive *bool `json:"is_active"`
|
|
}
|
|
|
|
// CreateAllowedSourceRequest represents a request to create a new allowed source.
|
|
type CreateAllowedSourceRequest struct {
|
|
PolicyID uuid.UUID `json:"policy_id" binding:"required"`
|
|
Domain string `json:"domain" binding:"required"`
|
|
Name string `json:"name" binding:"required"`
|
|
Description *string `json:"description"`
|
|
License License `json:"license" binding:"required"`
|
|
LegalBasis *string `json:"legal_basis"`
|
|
CitationTemplate *string `json:"citation_template"`
|
|
TrustBoost *float64 `json:"trust_boost"`
|
|
}
|
|
|
|
// UpdateAllowedSourceRequest represents a request to update an allowed source.
|
|
type UpdateAllowedSourceRequest struct {
|
|
Domain *string `json:"domain"`
|
|
Name *string `json:"name"`
|
|
Description *string `json:"description"`
|
|
License *License `json:"license"`
|
|
LegalBasis *string `json:"legal_basis"`
|
|
CitationTemplate *string `json:"citation_template"`
|
|
TrustBoost *float64 `json:"trust_boost"`
|
|
IsActive *bool `json:"is_active"`
|
|
}
|
|
|
|
// UpdateOperationPermissionRequest represents a request to update operation permissions.
|
|
type UpdateOperationPermissionRequest struct {
|
|
IsAllowed *bool `json:"is_allowed"`
|
|
RequiresCitation *bool `json:"requires_citation"`
|
|
Notes *string `json:"notes"`
|
|
}
|
|
|
|
// CreatePIIRuleRequest represents a request to create a new PII rule.
|
|
type CreatePIIRuleRequest struct {
|
|
Name string `json:"name" binding:"required"`
|
|
Description *string `json:"description"`
|
|
RuleType PIIRuleType `json:"rule_type" binding:"required"`
|
|
Pattern string `json:"pattern" binding:"required"`
|
|
Severity PIISeverity `json:"severity"`
|
|
}
|
|
|
|
// UpdatePIIRuleRequest represents a request to update a PII rule.
|
|
type UpdatePIIRuleRequest struct {
|
|
Name *string `json:"name"`
|
|
Description *string `json:"description"`
|
|
RuleType *PIIRuleType `json:"rule_type"`
|
|
Pattern *string `json:"pattern"`
|
|
Severity *PIISeverity `json:"severity"`
|
|
IsActive *bool `json:"is_active"`
|
|
}
|
|
|
|
// CheckComplianceRequest represents a request to check URL compliance.
|
|
type CheckComplianceRequest struct {
|
|
URL string `json:"url" binding:"required"`
|
|
Operation Operation `json:"operation" binding:"required"`
|
|
Bundesland *Bundesland `json:"bundesland"`
|
|
}
|
|
|
|
// CheckComplianceResponse represents the compliance check result.
|
|
type CheckComplianceResponse struct {
|
|
IsAllowed bool `json:"is_allowed"`
|
|
Source *AllowedSource `json:"source,omitempty"`
|
|
BlockReason *BlockReason `json:"block_reason,omitempty"`
|
|
RequiresCitation bool `json:"requires_citation"`
|
|
CitationTemplate *string `json:"citation_template,omitempty"`
|
|
License *License `json:"license,omitempty"`
|
|
}
|
|
|
|
// PIITestRequest represents a request to test PII detection.
|
|
type PIITestRequest struct {
|
|
Text string `json:"text" binding:"required"`
|
|
}
|
|
|
|
// PIIMatch represents a single PII match in text.
|
|
type PIIMatch struct {
|
|
RuleID uuid.UUID `json:"rule_id"`
|
|
RuleName string `json:"rule_name"`
|
|
RuleType PIIRuleType `json:"rule_type"`
|
|
Severity PIISeverity `json:"severity"`
|
|
Match string `json:"match"`
|
|
StartIndex int `json:"start_index"`
|
|
EndIndex int `json:"end_index"`
|
|
}
|
|
|
|
// PIITestResponse represents the result of PII detection test.
|
|
type PIITestResponse struct {
|
|
HasPII bool `json:"has_pii"`
|
|
Matches []PIIMatch `json:"matches"`
|
|
BlockLevel PIISeverity `json:"block_level"`
|
|
ShouldBlock bool `json:"should_block"`
|
|
}
|
|
|
|
// =============================================================================
|
|
// LIST/FILTER MODELS
|
|
// =============================================================================
|
|
|
|
// PolicyListFilter represents filters for listing policies.
|
|
type PolicyListFilter struct {
|
|
Bundesland *Bundesland `form:"bundesland"`
|
|
IsActive *bool `form:"is_active"`
|
|
Limit int `form:"limit"`
|
|
Offset int `form:"offset"`
|
|
}
|
|
|
|
// SourceListFilter represents filters for listing sources.
|
|
type SourceListFilter struct {
|
|
PolicyID *uuid.UUID `form:"policy_id"`
|
|
Domain *string `form:"domain"`
|
|
License *License `form:"license"`
|
|
IsActive *bool `form:"is_active"`
|
|
Limit int `form:"limit"`
|
|
Offset int `form:"offset"`
|
|
}
|
|
|
|
// AuditLogFilter represents filters for querying audit logs.
|
|
type AuditLogFilter struct {
|
|
EntityType *AuditEntityType `form:"entity_type"`
|
|
EntityID *uuid.UUID `form:"entity_id"`
|
|
Action *AuditAction `form:"action"`
|
|
UserEmail *string `form:"user_email"`
|
|
FromDate *time.Time `form:"from"`
|
|
ToDate *time.Time `form:"to"`
|
|
Limit int `form:"limit"`
|
|
Offset int `form:"offset"`
|
|
}
|
|
|
|
// BlockedContentFilter represents filters for querying blocked content logs.
|
|
type BlockedContentFilter struct {
|
|
Domain *string `form:"domain"`
|
|
BlockReason *BlockReason `form:"block_reason"`
|
|
FromDate *time.Time `form:"from"`
|
|
ToDate *time.Time `form:"to"`
|
|
Limit int `form:"limit"`
|
|
Offset int `form:"offset"`
|
|
}
|
|
|
|
// =============================================================================
|
|
// STATISTICS MODELS
|
|
// =============================================================================
|
|
|
|
// PolicyStats represents aggregated statistics for the policy system.
|
|
type PolicyStats struct {
|
|
ActivePolicies int `json:"active_policies"`
|
|
TotalSources int `json:"total_sources"`
|
|
ActiveSources int `json:"active_sources"`
|
|
BlockedToday int `json:"blocked_today"`
|
|
BlockedTotal int `json:"blocked_total"`
|
|
PIIRulesActive int `json:"pii_rules_active"`
|
|
SourcesByLicense map[string]int `json:"sources_by_license"`
|
|
BlocksByReason map[string]int `json:"blocks_by_reason"`
|
|
ComplianceScore float64 `json:"compliance_score"`
|
|
}
|
|
|
|
// =============================================================================
|
|
// YAML CONFIGURATION MODELS
|
|
// =============================================================================
|
|
|
|
// BundeslaenderConfig represents the YAML configuration for initial data loading.
|
|
type BundeslaenderConfig struct {
|
|
Federal PolicyConfig `yaml:"federal"`
|
|
Bundeslaender map[string]PolicyConfig `yaml:",inline"`
|
|
DefaultOperations OperationsConfig `yaml:"default_operations"`
|
|
PIIRules []PIIRuleConfig `yaml:"pii_rules"`
|
|
}
|
|
|
|
// PolicyConfig represents a policy configuration in YAML.
|
|
type PolicyConfig struct {
|
|
Name string `yaml:"name"`
|
|
Sources []SourceConfig `yaml:"sources"`
|
|
}
|
|
|
|
// SourceConfig represents a source configuration in YAML.
|
|
type SourceConfig struct {
|
|
Domain string `yaml:"domain"`
|
|
Name string `yaml:"name"`
|
|
License string `yaml:"license"`
|
|
LegalBasis string `yaml:"legal_basis,omitempty"`
|
|
CitationTemplate string `yaml:"citation_template,omitempty"`
|
|
TrustBoost float64 `yaml:"trust_boost,omitempty"`
|
|
}
|
|
|
|
// OperationsConfig represents default operation permissions in YAML.
|
|
type OperationsConfig struct {
|
|
Lookup OperationConfig `yaml:"lookup"`
|
|
RAG OperationConfig `yaml:"rag"`
|
|
Training OperationConfig `yaml:"training"`
|
|
Export OperationConfig `yaml:"export"`
|
|
}
|
|
|
|
// OperationConfig represents a single operation permission in YAML.
|
|
type OperationConfig struct {
|
|
Allowed bool `yaml:"allowed"`
|
|
RequiresCitation bool `yaml:"requires_citation"`
|
|
}
|
|
|
|
// PIIRuleConfig represents a PII rule configuration in YAML.
|
|
type PIIRuleConfig struct {
|
|
Name string `yaml:"name"`
|
|
Type string `yaml:"type"`
|
|
Pattern string `yaml:"pattern"`
|
|
Severity string `yaml:"severity"`
|
|
}
|