feat: edu-search-service migriert, voice-service/geo-service entfernt
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Successful in 1m45s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 21s
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Successful in 1m45s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 21s
- edu-search-service von breakpilot-pwa nach breakpilot-lehrer kopiert (ohne vendor) - opensearch + edu-search-service in docker-compose.yml hinzugefuegt - voice-service aus docker-compose.yml entfernt (jetzt in breakpilot-core) - geo-service aus docker-compose.yml entfernt (nicht mehr benoetigt) - CI/CD: edu-search-service zu Gitea Actions und Woodpecker hinzugefuegt (Go lint, test mit go mod download, build, SBOM) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
255
edu-search-service/internal/policy/audit.go
Normal file
255
edu-search-service/internal/policy/audit.go
Normal file
@@ -0,0 +1,255 @@
|
||||
package policy
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
// Auditor provides audit logging functionality for the policy system.
|
||||
type Auditor struct {
|
||||
store *Store
|
||||
}
|
||||
|
||||
// NewAuditor creates a new Auditor instance.
|
||||
func NewAuditor(store *Store) *Auditor {
|
||||
return &Auditor{store: store}
|
||||
}
|
||||
|
||||
// LogChange logs a policy change to the audit trail.
|
||||
func (a *Auditor) LogChange(ctx context.Context, action AuditAction, entityType AuditEntityType, entityID *uuid.UUID, oldValue, newValue interface{}, userEmail, ipAddress, userAgent *string) error {
|
||||
entry := &PolicyAuditLog{
|
||||
Action: action,
|
||||
EntityType: entityType,
|
||||
EntityID: entityID,
|
||||
UserEmail: userEmail,
|
||||
IPAddress: ipAddress,
|
||||
UserAgent: userAgent,
|
||||
}
|
||||
|
||||
if oldValue != nil {
|
||||
entry.OldValue = toJSON(oldValue)
|
||||
}
|
||||
if newValue != nil {
|
||||
entry.NewValue = toJSON(newValue)
|
||||
}
|
||||
|
||||
return a.store.CreateAuditLog(ctx, entry)
|
||||
}
|
||||
|
||||
// LogBlocked logs a blocked URL to the blocked content log.
|
||||
func (a *Auditor) LogBlocked(ctx context.Context, url, domain string, reason BlockReason, ruleID *uuid.UUID, details map[string]interface{}) error {
|
||||
entry := &BlockedContentLog{
|
||||
URL: url,
|
||||
Domain: domain,
|
||||
BlockReason: reason,
|
||||
MatchedRuleID: ruleID,
|
||||
}
|
||||
|
||||
if details != nil {
|
||||
entry.Details = toJSON(details)
|
||||
}
|
||||
|
||||
return a.store.CreateBlockedContentLog(ctx, entry)
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// CONVENIENCE METHODS
|
||||
// =============================================================================
|
||||
|
||||
// LogPolicyCreated logs a policy creation event.
|
||||
func (a *Auditor) LogPolicyCreated(ctx context.Context, policy *SourcePolicy, userEmail *string) error {
|
||||
return a.LogChange(ctx, AuditActionCreate, AuditEntitySourcePolicy, &policy.ID, nil, policy, userEmail, nil, nil)
|
||||
}
|
||||
|
||||
// LogPolicyUpdated logs a policy update event.
|
||||
func (a *Auditor) LogPolicyUpdated(ctx context.Context, oldPolicy, newPolicy *SourcePolicy, userEmail *string) error {
|
||||
return a.LogChange(ctx, AuditActionUpdate, AuditEntitySourcePolicy, &newPolicy.ID, oldPolicy, newPolicy, userEmail, nil, nil)
|
||||
}
|
||||
|
||||
// LogPolicyDeleted logs a policy deletion event.
|
||||
func (a *Auditor) LogPolicyDeleted(ctx context.Context, policy *SourcePolicy, userEmail *string) error {
|
||||
return a.LogChange(ctx, AuditActionDelete, AuditEntitySourcePolicy, &policy.ID, policy, nil, userEmail, nil, nil)
|
||||
}
|
||||
|
||||
// LogPolicyActivated logs a policy activation event.
|
||||
func (a *Auditor) LogPolicyActivated(ctx context.Context, policy *SourcePolicy, userEmail *string) error {
|
||||
return a.LogChange(ctx, AuditActionActivate, AuditEntitySourcePolicy, &policy.ID, nil, policy, userEmail, nil, nil)
|
||||
}
|
||||
|
||||
// LogPolicyDeactivated logs a policy deactivation event.
|
||||
func (a *Auditor) LogPolicyDeactivated(ctx context.Context, policy *SourcePolicy, userEmail *string) error {
|
||||
return a.LogChange(ctx, AuditActionDeactivate, AuditEntitySourcePolicy, &policy.ID, policy, nil, userEmail, nil, nil)
|
||||
}
|
||||
|
||||
// LogSourceCreated logs a source creation event.
|
||||
func (a *Auditor) LogSourceCreated(ctx context.Context, source *AllowedSource, userEmail *string) error {
|
||||
return a.LogChange(ctx, AuditActionCreate, AuditEntityAllowedSource, &source.ID, nil, source, userEmail, nil, nil)
|
||||
}
|
||||
|
||||
// LogSourceUpdated logs a source update event.
|
||||
func (a *Auditor) LogSourceUpdated(ctx context.Context, oldSource, newSource *AllowedSource, userEmail *string) error {
|
||||
return a.LogChange(ctx, AuditActionUpdate, AuditEntityAllowedSource, &newSource.ID, oldSource, newSource, userEmail, nil, nil)
|
||||
}
|
||||
|
||||
// LogSourceDeleted logs a source deletion event.
|
||||
func (a *Auditor) LogSourceDeleted(ctx context.Context, source *AllowedSource, userEmail *string) error {
|
||||
return a.LogChange(ctx, AuditActionDelete, AuditEntityAllowedSource, &source.ID, source, nil, userEmail, nil, nil)
|
||||
}
|
||||
|
||||
// LogOperationUpdated logs an operation permission update event.
|
||||
func (a *Auditor) LogOperationUpdated(ctx context.Context, oldOp, newOp *OperationPermission, userEmail *string) error {
|
||||
return a.LogChange(ctx, AuditActionUpdate, AuditEntityOperationPermission, &newOp.ID, oldOp, newOp, userEmail, nil, nil)
|
||||
}
|
||||
|
||||
// LogPIIRuleCreated logs a PII rule creation event.
|
||||
func (a *Auditor) LogPIIRuleCreated(ctx context.Context, rule *PIIRule, userEmail *string) error {
|
||||
return a.LogChange(ctx, AuditActionCreate, AuditEntityPIIRule, &rule.ID, nil, rule, userEmail, nil, nil)
|
||||
}
|
||||
|
||||
// LogPIIRuleUpdated logs a PII rule update event.
|
||||
func (a *Auditor) LogPIIRuleUpdated(ctx context.Context, oldRule, newRule *PIIRule, userEmail *string) error {
|
||||
return a.LogChange(ctx, AuditActionUpdate, AuditEntityPIIRule, &newRule.ID, oldRule, newRule, userEmail, nil, nil)
|
||||
}
|
||||
|
||||
// LogPIIRuleDeleted logs a PII rule deletion event.
|
||||
func (a *Auditor) LogPIIRuleDeleted(ctx context.Context, rule *PIIRule, userEmail *string) error {
|
||||
return a.LogChange(ctx, AuditActionDelete, AuditEntityPIIRule, &rule.ID, rule, nil, userEmail, nil, nil)
|
||||
}
|
||||
|
||||
// LogContentBlocked logs a blocked content event with details.
|
||||
func (a *Auditor) LogContentBlocked(ctx context.Context, url, domain string, reason BlockReason, matchedPatterns []string, ruleID *uuid.UUID) error {
|
||||
details := map[string]interface{}{
|
||||
"matched_patterns": matchedPatterns,
|
||||
}
|
||||
return a.LogBlocked(ctx, url, domain, reason, ruleID, details)
|
||||
}
|
||||
|
||||
// LogPIIBlocked logs content blocked due to PII detection.
|
||||
func (a *Auditor) LogPIIBlocked(ctx context.Context, url, domain string, matches []PIIMatch) error {
|
||||
matchDetails := make([]map[string]interface{}, len(matches))
|
||||
var ruleID *uuid.UUID
|
||||
|
||||
for i, m := range matches {
|
||||
matchDetails[i] = map[string]interface{}{
|
||||
"rule_name": m.RuleName,
|
||||
"severity": m.Severity,
|
||||
"match": maskPII(m.Match), // Mask the actual PII in logs
|
||||
}
|
||||
if ruleID == nil {
|
||||
ruleID = &m.RuleID
|
||||
}
|
||||
}
|
||||
|
||||
details := map[string]interface{}{
|
||||
"pii_matches": matchDetails,
|
||||
"match_count": len(matches),
|
||||
}
|
||||
|
||||
return a.LogBlocked(ctx, url, domain, BlockReasonPIIDetected, ruleID, details)
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// HELPERS
|
||||
// =============================================================================
|
||||
|
||||
// toJSON converts a value to JSON.
|
||||
func toJSON(v interface{}) json.RawMessage {
|
||||
data, err := json.Marshal(v)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
return data
|
||||
}
|
||||
|
||||
// maskPII masks PII data for safe logging.
|
||||
func maskPII(pii string) string {
|
||||
if len(pii) <= 4 {
|
||||
return "****"
|
||||
}
|
||||
// Show first 2 and last 2 characters
|
||||
return pii[:2] + "****" + pii[len(pii)-2:]
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// AUDIT REPORT GENERATION
|
||||
// =============================================================================
|
||||
|
||||
// AuditReport represents an audit report for compliance.
|
||||
type AuditReport struct {
|
||||
GeneratedAt string `json:"generated_at"`
|
||||
PeriodStart string `json:"period_start"`
|
||||
PeriodEnd string `json:"period_end"`
|
||||
Summary AuditReportSummary `json:"summary"`
|
||||
PolicyChanges []PolicyAuditLog `json:"policy_changes"`
|
||||
BlockedContent []BlockedContentLog `json:"blocked_content"`
|
||||
Stats *PolicyStats `json:"stats"`
|
||||
}
|
||||
|
||||
// AuditReportSummary contains summary statistics for the audit report.
|
||||
type AuditReportSummary struct {
|
||||
TotalPolicyChanges int `json:"total_policy_changes"`
|
||||
TotalBlocked int `json:"total_blocked"`
|
||||
ChangesByAction map[string]int `json:"changes_by_action"`
|
||||
BlocksByReason map[string]int `json:"blocks_by_reason"`
|
||||
}
|
||||
|
||||
// GenerateAuditReport generates a compliance audit report.
|
||||
func (a *Auditor) GenerateAuditReport(ctx context.Context, filter *AuditLogFilter, blockedFilter *BlockedContentFilter) (*AuditReport, error) {
|
||||
// Get audit logs
|
||||
auditLogs, _, err := a.store.ListAuditLogs(ctx, filter)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Get blocked content
|
||||
blockedLogs, _, err := a.store.ListBlockedContent(ctx, blockedFilter)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Get stats
|
||||
stats, err := a.store.GetStats(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Build summary
|
||||
summary := AuditReportSummary{
|
||||
TotalPolicyChanges: len(auditLogs),
|
||||
TotalBlocked: len(blockedLogs),
|
||||
ChangesByAction: make(map[string]int),
|
||||
BlocksByReason: make(map[string]int),
|
||||
}
|
||||
|
||||
for _, log := range auditLogs {
|
||||
summary.ChangesByAction[string(log.Action)]++
|
||||
}
|
||||
|
||||
for _, log := range blockedLogs {
|
||||
summary.BlocksByReason[string(log.BlockReason)]++
|
||||
}
|
||||
|
||||
// Build report
|
||||
periodStart := ""
|
||||
periodEnd := ""
|
||||
if filter.FromDate != nil {
|
||||
periodStart = filter.FromDate.Format("2006-01-02")
|
||||
}
|
||||
if filter.ToDate != nil {
|
||||
periodEnd = filter.ToDate.Format("2006-01-02")
|
||||
}
|
||||
|
||||
report := &AuditReport{
|
||||
GeneratedAt: uuid.New().String()[:19], // Timestamp placeholder
|
||||
PeriodStart: periodStart,
|
||||
PeriodEnd: periodEnd,
|
||||
Summary: summary,
|
||||
PolicyChanges: auditLogs,
|
||||
BlockedContent: blockedLogs,
|
||||
Stats: stats,
|
||||
}
|
||||
|
||||
return report, nil
|
||||
}
|
||||
281
edu-search-service/internal/policy/enforcer.go
Normal file
281
edu-search-service/internal/policy/enforcer.go
Normal file
@@ -0,0 +1,281 @@
|
||||
package policy
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/url"
|
||||
"strings"
|
||||
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
// Enforcer provides policy enforcement for the crawler and pipeline.
|
||||
type Enforcer struct {
|
||||
store *Store
|
||||
piiDetector *PIIDetector
|
||||
auditor *Auditor
|
||||
}
|
||||
|
||||
// NewEnforcer creates a new Enforcer instance.
|
||||
func NewEnforcer(store *Store) *Enforcer {
|
||||
return &Enforcer{
|
||||
store: store,
|
||||
piiDetector: NewPIIDetector(store),
|
||||
auditor: NewAuditor(store),
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// SOURCE CHECKING
|
||||
// =============================================================================
|
||||
|
||||
// CheckSource verifies if a URL is allowed based on the whitelist.
|
||||
// Returns the AllowedSource if found, nil if not whitelisted.
|
||||
func (e *Enforcer) CheckSource(ctx context.Context, rawURL string, bundesland *Bundesland) (*AllowedSource, error) {
|
||||
domain, err := extractDomain(rawURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
source, err := e.store.GetSourceByDomain(ctx, domain, bundesland)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return source, nil
|
||||
}
|
||||
|
||||
// CheckOperation verifies if a specific operation is allowed for a source.
|
||||
func (e *Enforcer) CheckOperation(ctx context.Context, source *AllowedSource, operation Operation) (*OperationPermission, error) {
|
||||
for _, op := range source.Operations {
|
||||
if op.Operation == operation {
|
||||
return &op, nil
|
||||
}
|
||||
}
|
||||
|
||||
// If not found in loaded operations, query directly
|
||||
ops, err := e.store.GetOperationsBySourceID(ctx, source.ID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, op := range ops {
|
||||
if op.Operation == operation {
|
||||
return &op, nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// CheckCompliance performs a full compliance check for a URL and operation.
|
||||
func (e *Enforcer) CheckCompliance(ctx context.Context, req *CheckComplianceRequest) (*CheckComplianceResponse, error) {
|
||||
response := &CheckComplianceResponse{
|
||||
IsAllowed: false,
|
||||
RequiresCitation: false,
|
||||
}
|
||||
|
||||
// Check if source is whitelisted
|
||||
source, err := e.CheckSource(ctx, req.URL, req.Bundesland)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if source == nil {
|
||||
reason := BlockReasonNotWhitelisted
|
||||
response.BlockReason = &reason
|
||||
return response, nil
|
||||
}
|
||||
|
||||
response.Source = source
|
||||
response.License = &source.License
|
||||
response.CitationTemplate = source.CitationTemplate
|
||||
|
||||
// Check operation permission
|
||||
opPerm, err := e.CheckOperation(ctx, source, req.Operation)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if opPerm == nil || !opPerm.IsAllowed {
|
||||
var reason BlockReason
|
||||
if req.Operation == OperationTraining {
|
||||
reason = BlockReasonTrainingForbidden
|
||||
} else {
|
||||
reason = BlockReasonLicenseViolation
|
||||
}
|
||||
response.BlockReason = &reason
|
||||
return response, nil
|
||||
}
|
||||
|
||||
response.IsAllowed = true
|
||||
response.RequiresCitation = opPerm.RequiresCitation
|
||||
|
||||
return response, nil
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// PII CHECKING
|
||||
// =============================================================================
|
||||
|
||||
// DetectPII scans text for PII patterns and returns matches.
|
||||
func (e *Enforcer) DetectPII(ctx context.Context, text string) (*PIITestResponse, error) {
|
||||
return e.piiDetector.Detect(ctx, text)
|
||||
}
|
||||
|
||||
// ShouldBlockForPII determines if content should be blocked based on PII matches.
|
||||
func (e *Enforcer) ShouldBlockForPII(response *PIITestResponse) bool {
|
||||
if response == nil {
|
||||
return false
|
||||
}
|
||||
return response.ShouldBlock
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// LOGGING
|
||||
// =============================================================================
|
||||
|
||||
// LogBlocked logs a blocked URL to the blocked content log.
|
||||
func (e *Enforcer) LogBlocked(ctx context.Context, rawURL string, reason BlockReason, ruleID *uuid.UUID, details map[string]interface{}) error {
|
||||
domain, _ := extractDomain(rawURL)
|
||||
return e.auditor.LogBlocked(ctx, rawURL, domain, reason, ruleID, details)
|
||||
}
|
||||
|
||||
// LogChange logs a policy change to the audit log.
|
||||
func (e *Enforcer) LogChange(ctx context.Context, action AuditAction, entityType AuditEntityType, entityID *uuid.UUID, oldValue, newValue interface{}, userEmail *string) error {
|
||||
return e.auditor.LogChange(ctx, action, entityType, entityID, oldValue, newValue, userEmail, nil, nil)
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// BATCH OPERATIONS
|
||||
// =============================================================================
|
||||
|
||||
// FilterURLs filters a list of URLs, returning only whitelisted ones.
|
||||
func (e *Enforcer) FilterURLs(ctx context.Context, urls []string, bundesland *Bundesland, operation Operation) ([]FilteredURL, error) {
|
||||
results := make([]FilteredURL, 0, len(urls))
|
||||
|
||||
for _, u := range urls {
|
||||
result := FilteredURL{
|
||||
URL: u,
|
||||
IsAllowed: false,
|
||||
}
|
||||
|
||||
source, err := e.CheckSource(ctx, u, bundesland)
|
||||
if err != nil {
|
||||
result.Error = err.Error()
|
||||
results = append(results, result)
|
||||
continue
|
||||
}
|
||||
|
||||
if source == nil {
|
||||
result.BlockReason = BlockReasonNotWhitelisted
|
||||
results = append(results, result)
|
||||
continue
|
||||
}
|
||||
|
||||
opPerm, err := e.CheckOperation(ctx, source, operation)
|
||||
if err != nil {
|
||||
result.Error = err.Error()
|
||||
results = append(results, result)
|
||||
continue
|
||||
}
|
||||
|
||||
if opPerm == nil || !opPerm.IsAllowed {
|
||||
if operation == OperationTraining {
|
||||
result.BlockReason = BlockReasonTrainingForbidden
|
||||
} else {
|
||||
result.BlockReason = BlockReasonLicenseViolation
|
||||
}
|
||||
results = append(results, result)
|
||||
continue
|
||||
}
|
||||
|
||||
result.IsAllowed = true
|
||||
result.Source = source
|
||||
result.RequiresCitation = opPerm.RequiresCitation
|
||||
results = append(results, result)
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
// FilteredURL represents the result of filtering a single URL.
|
||||
type FilteredURL struct {
|
||||
URL string `json:"url"`
|
||||
IsAllowed bool `json:"is_allowed"`
|
||||
Source *AllowedSource `json:"source,omitempty"`
|
||||
BlockReason BlockReason `json:"block_reason,omitempty"`
|
||||
RequiresCitation bool `json:"requires_citation"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// HELPERS
|
||||
// =============================================================================
|
||||
|
||||
// extractDomain extracts the domain from a URL.
|
||||
func extractDomain(rawURL string) (string, error) {
|
||||
// Handle URLs without scheme
|
||||
if !strings.Contains(rawURL, "://") {
|
||||
rawURL = "https://" + rawURL
|
||||
}
|
||||
|
||||
parsed, err := url.Parse(rawURL)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
host := parsed.Hostname()
|
||||
|
||||
// Remove www. prefix
|
||||
host = strings.TrimPrefix(host, "www.")
|
||||
|
||||
return host, nil
|
||||
}
|
||||
|
||||
// IsTrainingAllowed checks if training is allowed for any source (should always be false).
|
||||
func (e *Enforcer) IsTrainingAllowed(ctx context.Context) (bool, error) {
|
||||
// Training should NEVER be allowed - this is a safeguard
|
||||
matrix, err := e.store.GetOperationsMatrix(ctx)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
for _, source := range matrix {
|
||||
for _, op := range source.Operations {
|
||||
if op.Operation == OperationTraining && op.IsAllowed {
|
||||
// This should never happen - log a warning
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// GetSourceByURL is a convenience method to get a source by URL.
|
||||
func (e *Enforcer) GetSourceByURL(ctx context.Context, rawURL string, bundesland *Bundesland) (*AllowedSource, error) {
|
||||
return e.CheckSource(ctx, rawURL, bundesland)
|
||||
}
|
||||
|
||||
// GetCitationForURL generates a citation for a URL if required.
|
||||
func (e *Enforcer) GetCitationForURL(ctx context.Context, rawURL string, bundesland *Bundesland, title string, date string) (string, error) {
|
||||
source, err := e.CheckSource(ctx, rawURL, bundesland)
|
||||
if err != nil || source == nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if source.CitationTemplate == nil || *source.CitationTemplate == "" {
|
||||
// Default citation format
|
||||
return "Quelle: " + source.Name + ", " + title + ", " + date, nil
|
||||
}
|
||||
|
||||
// Replace placeholders in template
|
||||
citation := *source.CitationTemplate
|
||||
citation = strings.ReplaceAll(citation, "{title}", title)
|
||||
citation = strings.ReplaceAll(citation, "{date}", date)
|
||||
citation = strings.ReplaceAll(citation, "{url}", rawURL)
|
||||
citation = strings.ReplaceAll(citation, "{domain}", source.Domain)
|
||||
citation = strings.ReplaceAll(citation, "{source}", source.Name)
|
||||
|
||||
return citation, nil
|
||||
}
|
||||
255
edu-search-service/internal/policy/loader.go
Normal file
255
edu-search-service/internal/policy/loader.go
Normal file
@@ -0,0 +1,255 @@
|
||||
package policy
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// Loader handles loading policy configuration from YAML files.
|
||||
type Loader struct {
|
||||
store *Store
|
||||
}
|
||||
|
||||
// NewLoader creates a new Loader instance.
|
||||
func NewLoader(store *Store) *Loader {
|
||||
return &Loader{store: store}
|
||||
}
|
||||
|
||||
// LoadFromFile loads policy configuration from a YAML file.
|
||||
func (l *Loader) LoadFromFile(ctx context.Context, path string) error {
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read YAML file: %w", err)
|
||||
}
|
||||
|
||||
config, err := ParseYAML(data)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse YAML: %w", err)
|
||||
}
|
||||
|
||||
return l.store.LoadFromYAML(ctx, config)
|
||||
}
|
||||
|
||||
// ParseYAML parses YAML configuration data.
|
||||
func ParseYAML(data []byte) (*BundeslaenderConfig, error) {
|
||||
// First, parse as a generic map to handle the inline Bundeslaender
|
||||
var rawConfig map[string]interface{}
|
||||
if err := yaml.Unmarshal(data, &rawConfig); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse YAML: %w", err)
|
||||
}
|
||||
|
||||
config := &BundeslaenderConfig{
|
||||
Bundeslaender: make(map[string]PolicyConfig),
|
||||
}
|
||||
|
||||
// Parse federal
|
||||
if federal, ok := rawConfig["federal"]; ok {
|
||||
if federalMap, ok := federal.(map[string]interface{}); ok {
|
||||
config.Federal = parsePolicyConfig(federalMap)
|
||||
}
|
||||
}
|
||||
|
||||
// Parse default_operations
|
||||
if ops, ok := rawConfig["default_operations"]; ok {
|
||||
if opsMap, ok := ops.(map[string]interface{}); ok {
|
||||
config.DefaultOperations = parseOperationsConfig(opsMap)
|
||||
}
|
||||
}
|
||||
|
||||
// Parse pii_rules
|
||||
if rules, ok := rawConfig["pii_rules"]; ok {
|
||||
if rulesSlice, ok := rules.([]interface{}); ok {
|
||||
for _, rule := range rulesSlice {
|
||||
if ruleMap, ok := rule.(map[string]interface{}); ok {
|
||||
config.PIIRules = append(config.PIIRules, parsePIIRuleConfig(ruleMap))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Parse Bundeslaender (2-letter codes)
|
||||
bundeslaender := []string{"BW", "BY", "BE", "BB", "HB", "HH", "HE", "MV", "NI", "NW", "RP", "SL", "SN", "ST", "SH", "TH"}
|
||||
for _, bl := range bundeslaender {
|
||||
if blConfig, ok := rawConfig[bl]; ok {
|
||||
if blMap, ok := blConfig.(map[string]interface{}); ok {
|
||||
config.Bundeslaender[bl] = parsePolicyConfig(blMap)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return config, nil
|
||||
}
|
||||
|
||||
func parsePolicyConfig(m map[string]interface{}) PolicyConfig {
|
||||
pc := PolicyConfig{}
|
||||
|
||||
if name, ok := m["name"].(string); ok {
|
||||
pc.Name = name
|
||||
}
|
||||
|
||||
if sources, ok := m["sources"].([]interface{}); ok {
|
||||
for _, src := range sources {
|
||||
if srcMap, ok := src.(map[string]interface{}); ok {
|
||||
pc.Sources = append(pc.Sources, parseSourceConfig(srcMap))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return pc
|
||||
}
|
||||
|
||||
func parseSourceConfig(m map[string]interface{}) SourceConfig {
|
||||
sc := SourceConfig{
|
||||
TrustBoost: 0.5, // Default
|
||||
}
|
||||
|
||||
if domain, ok := m["domain"].(string); ok {
|
||||
sc.Domain = domain
|
||||
}
|
||||
if name, ok := m["name"].(string); ok {
|
||||
sc.Name = name
|
||||
}
|
||||
if license, ok := m["license"].(string); ok {
|
||||
sc.License = license
|
||||
}
|
||||
if legalBasis, ok := m["legal_basis"].(string); ok {
|
||||
sc.LegalBasis = legalBasis
|
||||
}
|
||||
if citation, ok := m["citation_template"].(string); ok {
|
||||
sc.CitationTemplate = citation
|
||||
}
|
||||
if trustBoost, ok := m["trust_boost"].(float64); ok {
|
||||
sc.TrustBoost = trustBoost
|
||||
}
|
||||
|
||||
return sc
|
||||
}
|
||||
|
||||
func parseOperationsConfig(m map[string]interface{}) OperationsConfig {
|
||||
oc := OperationsConfig{}
|
||||
|
||||
if lookup, ok := m["lookup"].(map[string]interface{}); ok {
|
||||
oc.Lookup = parseOperationConfig(lookup)
|
||||
}
|
||||
if rag, ok := m["rag"].(map[string]interface{}); ok {
|
||||
oc.RAG = parseOperationConfig(rag)
|
||||
}
|
||||
if training, ok := m["training"].(map[string]interface{}); ok {
|
||||
oc.Training = parseOperationConfig(training)
|
||||
}
|
||||
if export, ok := m["export"].(map[string]interface{}); ok {
|
||||
oc.Export = parseOperationConfig(export)
|
||||
}
|
||||
|
||||
return oc
|
||||
}
|
||||
|
||||
func parseOperationConfig(m map[string]interface{}) OperationConfig {
|
||||
oc := OperationConfig{}
|
||||
|
||||
if allowed, ok := m["allowed"].(bool); ok {
|
||||
oc.Allowed = allowed
|
||||
}
|
||||
if requiresCitation, ok := m["requires_citation"].(bool); ok {
|
||||
oc.RequiresCitation = requiresCitation
|
||||
}
|
||||
|
||||
return oc
|
||||
}
|
||||
|
||||
func parsePIIRuleConfig(m map[string]interface{}) PIIRuleConfig {
|
||||
rc := PIIRuleConfig{
|
||||
Severity: "block", // Default
|
||||
}
|
||||
|
||||
if name, ok := m["name"].(string); ok {
|
||||
rc.Name = name
|
||||
}
|
||||
if ruleType, ok := m["type"].(string); ok {
|
||||
rc.Type = ruleType
|
||||
}
|
||||
if pattern, ok := m["pattern"].(string); ok {
|
||||
rc.Pattern = pattern
|
||||
}
|
||||
if severity, ok := m["severity"].(string); ok {
|
||||
rc.Severity = severity
|
||||
}
|
||||
|
||||
return rc
|
||||
}
|
||||
|
||||
// LoadDefaults loads a minimal set of default data (for testing or when no YAML exists).
|
||||
func (l *Loader) LoadDefaults(ctx context.Context) error {
|
||||
// Create federal policy with KMK
|
||||
federalPolicy, err := l.store.CreatePolicy(ctx, &CreateSourcePolicyRequest{
|
||||
Name: "KMK & Bundesebene",
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create federal policy: %w", err)
|
||||
}
|
||||
|
||||
trustBoost := 0.95
|
||||
legalBasis := "Amtliche Werke (§5 UrhG)"
|
||||
citation := "Quelle: KMK, {title}, {date}"
|
||||
|
||||
_, err = l.store.CreateSource(ctx, &CreateAllowedSourceRequest{
|
||||
PolicyID: federalPolicy.ID,
|
||||
Domain: "kmk.org",
|
||||
Name: "Kultusministerkonferenz",
|
||||
License: LicenseParagraph5,
|
||||
LegalBasis: &legalBasis,
|
||||
CitationTemplate: &citation,
|
||||
TrustBoost: &trustBoost,
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create KMK source: %w", err)
|
||||
}
|
||||
|
||||
// Create default PII rules
|
||||
defaultRules := DefaultPIIRules()
|
||||
for _, rule := range defaultRules {
|
||||
_, err := l.store.CreatePIIRule(ctx, &CreatePIIRuleRequest{
|
||||
Name: rule.Name,
|
||||
RuleType: PIIRuleType(rule.Type),
|
||||
Pattern: rule.Pattern,
|
||||
Severity: PIISeverity(rule.Severity),
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create PII rule %s: %w", rule.Name, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// HasData checks if the policy tables already have data.
|
||||
func (l *Loader) HasData(ctx context.Context) (bool, error) {
|
||||
policies, _, err := l.store.ListPolicies(ctx, &PolicyListFilter{Limit: 1})
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return len(policies) > 0, nil
|
||||
}
|
||||
|
||||
// LoadIfEmpty loads data from YAML only if tables are empty.
|
||||
func (l *Loader) LoadIfEmpty(ctx context.Context, path string) error {
|
||||
hasData, err := l.HasData(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if hasData {
|
||||
return nil // Already has data, skip loading
|
||||
}
|
||||
|
||||
// Check if file exists
|
||||
if _, err := os.Stat(path); os.IsNotExist(err) {
|
||||
// File doesn't exist, load defaults
|
||||
return l.LoadDefaults(ctx)
|
||||
}
|
||||
|
||||
return l.LoadFromFile(ctx, path)
|
||||
}
|
||||
445
edu-search-service/internal/policy/models.go
Normal file
445
edu-search-service/internal/policy/models.go
Normal file
@@ -0,0 +1,445 @@
|
||||
// Package policy provides whitelist-based data source management for the edu-search-service.
|
||||
// It implements source policies, operation permissions, PII detection, and audit logging
|
||||
// for compliance with German data protection regulations.
|
||||
package policy
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
// =============================================================================
|
||||
// ENUMS AND CONSTANTS
|
||||
// =============================================================================
|
||||
|
||||
// Bundesland represents German federal states (2-letter codes).
|
||||
type Bundesland string
|
||||
|
||||
const (
|
||||
BundeslandBW Bundesland = "BW" // Baden-Wuerttemberg
|
||||
BundeslandBY Bundesland = "BY" // Bayern
|
||||
BundeslandBE Bundesland = "BE" // Berlin
|
||||
BundeslandBB Bundesland = "BB" // Brandenburg
|
||||
BundeslandHB Bundesland = "HB" // Bremen
|
||||
BundeslandHH Bundesland = "HH" // Hamburg
|
||||
BundeslandHE Bundesland = "HE" // Hessen
|
||||
BundeslandMV Bundesland = "MV" // Mecklenburg-Vorpommern
|
||||
BundeslandNI Bundesland = "NI" // Niedersachsen
|
||||
BundeslandNW Bundesland = "NW" // Nordrhein-Westfalen
|
||||
BundeslandRP Bundesland = "RP" // Rheinland-Pfalz
|
||||
BundeslandSL Bundesland = "SL" // Saarland
|
||||
BundeslandSN Bundesland = "SN" // Sachsen
|
||||
BundeslandST Bundesland = "ST" // Sachsen-Anhalt
|
||||
BundeslandSH Bundesland = "SH" // Schleswig-Holstein
|
||||
BundeslandTH Bundesland = "TH" // Thueringen
|
||||
)
|
||||
|
||||
// ValidBundeslaender contains all valid German federal state codes.
|
||||
var ValidBundeslaender = []Bundesland{
|
||||
BundeslandBW, BundeslandBY, BundeslandBE, BundeslandBB,
|
||||
BundeslandHB, BundeslandHH, BundeslandHE, BundeslandMV,
|
||||
BundeslandNI, BundeslandNW, BundeslandRP, BundeslandSL,
|
||||
BundeslandSN, BundeslandST, BundeslandSH, BundeslandTH,
|
||||
}
|
||||
|
||||
// License represents allowed license types for data sources.
|
||||
type License string
|
||||
|
||||
const (
|
||||
LicenseDLDEBY20 License = "DL-DE-BY-2.0" // Datenlizenz Deutschland - Namensnennung
|
||||
LicenseCCBY License = "CC-BY" // Creative Commons Attribution
|
||||
LicenseCCBYSA License = "CC-BY-SA" // Creative Commons Attribution-ShareAlike
|
||||
LicenseCCBYNC License = "CC-BY-NC" // Creative Commons Attribution-NonCommercial
|
||||
LicenseCCBYNCSA License = "CC-BY-NC-SA" // Creative Commons Attribution-NonCommercial-ShareAlike
|
||||
LicenseCC0 License = "CC0" // Public Domain
|
||||
LicenseParagraph5 License = "§5 UrhG" // Amtliche Werke (German Copyright Act)
|
||||
LicenseCustom License = "Custom" // Custom license (requires legal basis)
|
||||
)
|
||||
|
||||
// Operation represents the types of operations that can be performed on data.
|
||||
type Operation string
|
||||
|
||||
const (
|
||||
OperationLookup Operation = "lookup" // Display/Search
|
||||
OperationRAG Operation = "rag" // RAG (Retrieval-Augmented Generation)
|
||||
OperationTraining Operation = "training" // Model Training (VERBOTEN by default)
|
||||
OperationExport Operation = "export" // Data Export
|
||||
)
|
||||
|
||||
// ValidOperations contains all valid operation types.
|
||||
var ValidOperations = []Operation{
|
||||
OperationLookup,
|
||||
OperationRAG,
|
||||
OperationTraining,
|
||||
OperationExport,
|
||||
}
|
||||
|
||||
// PIIRuleType represents the type of PII detection rule.
|
||||
type PIIRuleType string
|
||||
|
||||
const (
|
||||
PIIRuleTypeRegex PIIRuleType = "regex" // Regular expression pattern
|
||||
PIIRuleTypeKeyword PIIRuleType = "keyword" // Keyword matching
|
||||
)
|
||||
|
||||
// PIISeverity represents the severity level of a PII match.
|
||||
type PIISeverity string
|
||||
|
||||
const (
|
||||
PIISeverityBlock PIISeverity = "block" // Block content completely
|
||||
PIISeverityWarn PIISeverity = "warn" // Warn but allow
|
||||
PIISeverityRedact PIISeverity = "redact" // Redact matched content
|
||||
)
|
||||
|
||||
// AuditAction represents the type of action logged in the audit trail.
|
||||
type AuditAction string
|
||||
|
||||
const (
|
||||
AuditActionCreate AuditAction = "create"
|
||||
AuditActionUpdate AuditAction = "update"
|
||||
AuditActionDelete AuditAction = "delete"
|
||||
AuditActionActivate AuditAction = "activate"
|
||||
AuditActionDeactivate AuditAction = "deactivate"
|
||||
AuditActionApprove AuditAction = "approve"
|
||||
)
|
||||
|
||||
// AuditEntityType represents the type of entity being audited.
|
||||
type AuditEntityType string
|
||||
|
||||
const (
|
||||
AuditEntitySourcePolicy AuditEntityType = "source_policy"
|
||||
AuditEntityAllowedSource AuditEntityType = "allowed_source"
|
||||
AuditEntityOperationPermission AuditEntityType = "operation_permission"
|
||||
AuditEntityPIIRule AuditEntityType = "pii_rule"
|
||||
)
|
||||
|
||||
// BlockReason represents the reason why content was blocked.
|
||||
type BlockReason string
|
||||
|
||||
const (
|
||||
BlockReasonNotWhitelisted BlockReason = "not_whitelisted"
|
||||
BlockReasonPIIDetected BlockReason = "pii_detected"
|
||||
BlockReasonTrainingForbidden BlockReason = "training_forbidden"
|
||||
BlockReasonLicenseViolation BlockReason = "license_violation"
|
||||
BlockReasonManualBlock BlockReason = "manual_block"
|
||||
)
|
||||
|
||||
// =============================================================================
|
||||
// CORE MODELS
|
||||
// =============================================================================
|
||||
|
||||
// SourcePolicy represents a versioned policy for data source management.
|
||||
// Policies can be scoped to a specific Bundesland or apply federally (bundesland = nil).
|
||||
type SourcePolicy struct {
|
||||
ID uuid.UUID `json:"id" db:"id"`
|
||||
Version int `json:"version" db:"version"`
|
||||
Name string `json:"name" db:"name"`
|
||||
Description *string `json:"description,omitempty" db:"description"`
|
||||
Bundesland *Bundesland `json:"bundesland,omitempty" db:"bundesland"`
|
||||
IsActive bool `json:"is_active" db:"is_active"`
|
||||
CreatedAt time.Time `json:"created_at" db:"created_at"`
|
||||
UpdatedAt time.Time `json:"updated_at" db:"updated_at"`
|
||||
ApprovedBy *uuid.UUID `json:"approved_by,omitempty" db:"approved_by"`
|
||||
ApprovedAt *time.Time `json:"approved_at,omitempty" db:"approved_at"`
|
||||
|
||||
// Joined fields (populated by queries)
|
||||
Sources []AllowedSource `json:"sources,omitempty"`
|
||||
}
|
||||
|
||||
// AllowedSource represents a whitelisted data source with license information.
|
||||
type AllowedSource struct {
|
||||
ID uuid.UUID `json:"id" db:"id"`
|
||||
PolicyID uuid.UUID `json:"policy_id" db:"policy_id"`
|
||||
Domain string `json:"domain" db:"domain"`
|
||||
Name string `json:"name" db:"name"`
|
||||
Description *string `json:"description,omitempty" db:"description"`
|
||||
License License `json:"license" db:"license"`
|
||||
LegalBasis *string `json:"legal_basis,omitempty" db:"legal_basis"`
|
||||
CitationTemplate *string `json:"citation_template,omitempty" db:"citation_template"`
|
||||
TrustBoost float64 `json:"trust_boost" db:"trust_boost"`
|
||||
IsActive bool `json:"is_active" db:"is_active"`
|
||||
CreatedAt time.Time `json:"created_at" db:"created_at"`
|
||||
UpdatedAt time.Time `json:"updated_at" db:"updated_at"`
|
||||
|
||||
// Joined fields (populated by queries)
|
||||
Operations []OperationPermission `json:"operations,omitempty"`
|
||||
PolicyName *string `json:"policy_name,omitempty"`
|
||||
}
|
||||
|
||||
// OperationPermission represents the permission matrix for a specific source.
|
||||
type OperationPermission struct {
|
||||
ID uuid.UUID `json:"id" db:"id"`
|
||||
SourceID uuid.UUID `json:"source_id" db:"source_id"`
|
||||
Operation Operation `json:"operation" db:"operation"`
|
||||
IsAllowed bool `json:"is_allowed" db:"is_allowed"`
|
||||
RequiresCitation bool `json:"requires_citation" db:"requires_citation"`
|
||||
Notes *string `json:"notes,omitempty" db:"notes"`
|
||||
CreatedAt time.Time `json:"created_at" db:"created_at"`
|
||||
UpdatedAt time.Time `json:"updated_at" db:"updated_at"`
|
||||
}
|
||||
|
||||
// PIIRule represents a rule for detecting personally identifiable information.
|
||||
type PIIRule struct {
|
||||
ID uuid.UUID `json:"id" db:"id"`
|
||||
Name string `json:"name" db:"name"`
|
||||
Description *string `json:"description,omitempty" db:"description"`
|
||||
RuleType PIIRuleType `json:"rule_type" db:"rule_type"`
|
||||
Pattern string `json:"pattern" db:"pattern"`
|
||||
Severity PIISeverity `json:"severity" db:"severity"`
|
||||
IsActive bool `json:"is_active" db:"is_active"`
|
||||
CreatedAt time.Time `json:"created_at" db:"created_at"`
|
||||
UpdatedAt time.Time `json:"updated_at" db:"updated_at"`
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// AUDIT AND LOGGING MODELS
|
||||
// =============================================================================
|
||||
|
||||
// PolicyAuditLog represents an immutable audit log entry for policy changes.
|
||||
type PolicyAuditLog struct {
|
||||
ID uuid.UUID `json:"id" db:"id"`
|
||||
Action AuditAction `json:"action" db:"action"`
|
||||
EntityType AuditEntityType `json:"entity_type" db:"entity_type"`
|
||||
EntityID *uuid.UUID `json:"entity_id,omitempty" db:"entity_id"`
|
||||
OldValue json.RawMessage `json:"old_value,omitempty" db:"old_value"`
|
||||
NewValue json.RawMessage `json:"new_value,omitempty" db:"new_value"`
|
||||
UserID *uuid.UUID `json:"user_id,omitempty" db:"user_id"`
|
||||
UserEmail *string `json:"user_email,omitempty" db:"user_email"`
|
||||
IPAddress *string `json:"ip_address,omitempty" db:"ip_address"`
|
||||
UserAgent *string `json:"user_agent,omitempty" db:"user_agent"`
|
||||
CreatedAt time.Time `json:"created_at" db:"created_at"`
|
||||
}
|
||||
|
||||
// BlockedContentLog represents a log entry for blocked URLs.
|
||||
type BlockedContentLog struct {
|
||||
ID uuid.UUID `json:"id" db:"id"`
|
||||
URL string `json:"url" db:"url"`
|
||||
Domain string `json:"domain" db:"domain"`
|
||||
BlockReason BlockReason `json:"block_reason" db:"block_reason"`
|
||||
MatchedRuleID *uuid.UUID `json:"matched_rule_id,omitempty" db:"matched_rule_id"`
|
||||
Details json.RawMessage `json:"details,omitempty" db:"details"`
|
||||
CreatedAt time.Time `json:"created_at" db:"created_at"`
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// REQUEST/RESPONSE MODELS
|
||||
// =============================================================================
|
||||
|
||||
// CreateSourcePolicyRequest represents a request to create a new policy.
|
||||
type CreateSourcePolicyRequest struct {
|
||||
Name string `json:"name" binding:"required"`
|
||||
Description *string `json:"description"`
|
||||
Bundesland *Bundesland `json:"bundesland"`
|
||||
}
|
||||
|
||||
// UpdateSourcePolicyRequest represents a request to update a policy.
|
||||
type UpdateSourcePolicyRequest struct {
|
||||
Name *string `json:"name"`
|
||||
Description *string `json:"description"`
|
||||
Bundesland *Bundesland `json:"bundesland"`
|
||||
IsActive *bool `json:"is_active"`
|
||||
}
|
||||
|
||||
// CreateAllowedSourceRequest represents a request to create a new allowed source.
|
||||
type CreateAllowedSourceRequest struct {
|
||||
PolicyID uuid.UUID `json:"policy_id" binding:"required"`
|
||||
Domain string `json:"domain" binding:"required"`
|
||||
Name string `json:"name" binding:"required"`
|
||||
Description *string `json:"description"`
|
||||
License License `json:"license" binding:"required"`
|
||||
LegalBasis *string `json:"legal_basis"`
|
||||
CitationTemplate *string `json:"citation_template"`
|
||||
TrustBoost *float64 `json:"trust_boost"`
|
||||
}
|
||||
|
||||
// UpdateAllowedSourceRequest represents a request to update an allowed source.
|
||||
type UpdateAllowedSourceRequest struct {
|
||||
Domain *string `json:"domain"`
|
||||
Name *string `json:"name"`
|
||||
Description *string `json:"description"`
|
||||
License *License `json:"license"`
|
||||
LegalBasis *string `json:"legal_basis"`
|
||||
CitationTemplate *string `json:"citation_template"`
|
||||
TrustBoost *float64 `json:"trust_boost"`
|
||||
IsActive *bool `json:"is_active"`
|
||||
}
|
||||
|
||||
// UpdateOperationPermissionRequest represents a request to update operation permissions.
|
||||
type UpdateOperationPermissionRequest struct {
|
||||
IsAllowed *bool `json:"is_allowed"`
|
||||
RequiresCitation *bool `json:"requires_citation"`
|
||||
Notes *string `json:"notes"`
|
||||
}
|
||||
|
||||
// CreatePIIRuleRequest represents a request to create a new PII rule.
|
||||
type CreatePIIRuleRequest struct {
|
||||
Name string `json:"name" binding:"required"`
|
||||
Description *string `json:"description"`
|
||||
RuleType PIIRuleType `json:"rule_type" binding:"required"`
|
||||
Pattern string `json:"pattern" binding:"required"`
|
||||
Severity PIISeverity `json:"severity"`
|
||||
}
|
||||
|
||||
// UpdatePIIRuleRequest represents a request to update a PII rule.
|
||||
type UpdatePIIRuleRequest struct {
|
||||
Name *string `json:"name"`
|
||||
Description *string `json:"description"`
|
||||
RuleType *PIIRuleType `json:"rule_type"`
|
||||
Pattern *string `json:"pattern"`
|
||||
Severity *PIISeverity `json:"severity"`
|
||||
IsActive *bool `json:"is_active"`
|
||||
}
|
||||
|
||||
// CheckComplianceRequest represents a request to check URL compliance.
|
||||
type CheckComplianceRequest struct {
|
||||
URL string `json:"url" binding:"required"`
|
||||
Operation Operation `json:"operation" binding:"required"`
|
||||
Bundesland *Bundesland `json:"bundesland"`
|
||||
}
|
||||
|
||||
// CheckComplianceResponse represents the compliance check result.
|
||||
type CheckComplianceResponse struct {
|
||||
IsAllowed bool `json:"is_allowed"`
|
||||
Source *AllowedSource `json:"source,omitempty"`
|
||||
BlockReason *BlockReason `json:"block_reason,omitempty"`
|
||||
RequiresCitation bool `json:"requires_citation"`
|
||||
CitationTemplate *string `json:"citation_template,omitempty"`
|
||||
License *License `json:"license,omitempty"`
|
||||
}
|
||||
|
||||
// PIITestRequest represents a request to test PII detection.
|
||||
type PIITestRequest struct {
|
||||
Text string `json:"text" binding:"required"`
|
||||
}
|
||||
|
||||
// PIIMatch represents a single PII match in text.
|
||||
type PIIMatch struct {
|
||||
RuleID uuid.UUID `json:"rule_id"`
|
||||
RuleName string `json:"rule_name"`
|
||||
RuleType PIIRuleType `json:"rule_type"`
|
||||
Severity PIISeverity `json:"severity"`
|
||||
Match string `json:"match"`
|
||||
StartIndex int `json:"start_index"`
|
||||
EndIndex int `json:"end_index"`
|
||||
}
|
||||
|
||||
// PIITestResponse represents the result of PII detection test.
|
||||
type PIITestResponse struct {
|
||||
HasPII bool `json:"has_pii"`
|
||||
Matches []PIIMatch `json:"matches"`
|
||||
BlockLevel PIISeverity `json:"block_level"`
|
||||
ShouldBlock bool `json:"should_block"`
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// LIST/FILTER MODELS
|
||||
// =============================================================================
|
||||
|
||||
// PolicyListFilter represents filters for listing policies.
|
||||
type PolicyListFilter struct {
|
||||
Bundesland *Bundesland `form:"bundesland"`
|
||||
IsActive *bool `form:"is_active"`
|
||||
Limit int `form:"limit"`
|
||||
Offset int `form:"offset"`
|
||||
}
|
||||
|
||||
// SourceListFilter represents filters for listing sources.
|
||||
type SourceListFilter struct {
|
||||
PolicyID *uuid.UUID `form:"policy_id"`
|
||||
Domain *string `form:"domain"`
|
||||
License *License `form:"license"`
|
||||
IsActive *bool `form:"is_active"`
|
||||
Limit int `form:"limit"`
|
||||
Offset int `form:"offset"`
|
||||
}
|
||||
|
||||
// AuditLogFilter represents filters for querying audit logs.
|
||||
type AuditLogFilter struct {
|
||||
EntityType *AuditEntityType `form:"entity_type"`
|
||||
EntityID *uuid.UUID `form:"entity_id"`
|
||||
Action *AuditAction `form:"action"`
|
||||
UserEmail *string `form:"user_email"`
|
||||
FromDate *time.Time `form:"from"`
|
||||
ToDate *time.Time `form:"to"`
|
||||
Limit int `form:"limit"`
|
||||
Offset int `form:"offset"`
|
||||
}
|
||||
|
||||
// BlockedContentFilter represents filters for querying blocked content logs.
|
||||
type BlockedContentFilter struct {
|
||||
Domain *string `form:"domain"`
|
||||
BlockReason *BlockReason `form:"block_reason"`
|
||||
FromDate *time.Time `form:"from"`
|
||||
ToDate *time.Time `form:"to"`
|
||||
Limit int `form:"limit"`
|
||||
Offset int `form:"offset"`
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// STATISTICS MODELS
|
||||
// =============================================================================
|
||||
|
||||
// PolicyStats represents aggregated statistics for the policy system.
|
||||
type PolicyStats struct {
|
||||
ActivePolicies int `json:"active_policies"`
|
||||
TotalSources int `json:"total_sources"`
|
||||
ActiveSources int `json:"active_sources"`
|
||||
BlockedToday int `json:"blocked_today"`
|
||||
BlockedTotal int `json:"blocked_total"`
|
||||
PIIRulesActive int `json:"pii_rules_active"`
|
||||
SourcesByLicense map[string]int `json:"sources_by_license"`
|
||||
BlocksByReason map[string]int `json:"blocks_by_reason"`
|
||||
ComplianceScore float64 `json:"compliance_score"`
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// YAML CONFIGURATION MODELS
|
||||
// =============================================================================
|
||||
|
||||
// BundeslaenderConfig represents the YAML configuration for initial data loading.
|
||||
type BundeslaenderConfig struct {
|
||||
Federal PolicyConfig `yaml:"federal"`
|
||||
Bundeslaender map[string]PolicyConfig `yaml:",inline"`
|
||||
DefaultOperations OperationsConfig `yaml:"default_operations"`
|
||||
PIIRules []PIIRuleConfig `yaml:"pii_rules"`
|
||||
}
|
||||
|
||||
// PolicyConfig represents a policy configuration in YAML.
|
||||
type PolicyConfig struct {
|
||||
Name string `yaml:"name"`
|
||||
Sources []SourceConfig `yaml:"sources"`
|
||||
}
|
||||
|
||||
// SourceConfig represents a source configuration in YAML.
|
||||
type SourceConfig struct {
|
||||
Domain string `yaml:"domain"`
|
||||
Name string `yaml:"name"`
|
||||
License string `yaml:"license"`
|
||||
LegalBasis string `yaml:"legal_basis,omitempty"`
|
||||
CitationTemplate string `yaml:"citation_template,omitempty"`
|
||||
TrustBoost float64 `yaml:"trust_boost,omitempty"`
|
||||
}
|
||||
|
||||
// OperationsConfig represents default operation permissions in YAML.
|
||||
type OperationsConfig struct {
|
||||
Lookup OperationConfig `yaml:"lookup"`
|
||||
RAG OperationConfig `yaml:"rag"`
|
||||
Training OperationConfig `yaml:"training"`
|
||||
Export OperationConfig `yaml:"export"`
|
||||
}
|
||||
|
||||
// OperationConfig represents a single operation permission in YAML.
|
||||
type OperationConfig struct {
|
||||
Allowed bool `yaml:"allowed"`
|
||||
RequiresCitation bool `yaml:"requires_citation"`
|
||||
}
|
||||
|
||||
// PIIRuleConfig represents a PII rule configuration in YAML.
|
||||
type PIIRuleConfig struct {
|
||||
Name string `yaml:"name"`
|
||||
Type string `yaml:"type"`
|
||||
Pattern string `yaml:"pattern"`
|
||||
Severity string `yaml:"severity"`
|
||||
}
|
||||
350
edu-search-service/internal/policy/pii_detector.go
Normal file
350
edu-search-service/internal/policy/pii_detector.go
Normal file
@@ -0,0 +1,350 @@
|
||||
package policy
|
||||
|
||||
import (
|
||||
"context"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// PIIDetector detects personally identifiable information in text.
|
||||
type PIIDetector struct {
|
||||
store *Store
|
||||
compiledRules map[string]*regexp.Regexp
|
||||
rulesMu sync.RWMutex
|
||||
}
|
||||
|
||||
// NewPIIDetector creates a new PIIDetector instance.
|
||||
func NewPIIDetector(store *Store) *PIIDetector {
|
||||
return &PIIDetector{
|
||||
store: store,
|
||||
compiledRules: make(map[string]*regexp.Regexp),
|
||||
}
|
||||
}
|
||||
|
||||
// Detect scans text for PII patterns and returns all matches.
|
||||
func (d *PIIDetector) Detect(ctx context.Context, text string) (*PIITestResponse, error) {
|
||||
rules, err := d.store.ListPIIRules(ctx, true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
response := &PIITestResponse{
|
||||
HasPII: false,
|
||||
Matches: []PIIMatch{},
|
||||
ShouldBlock: false,
|
||||
}
|
||||
|
||||
highestSeverity := PIISeverity("")
|
||||
|
||||
for _, rule := range rules {
|
||||
matches := d.findMatches(text, &rule)
|
||||
if len(matches) > 0 {
|
||||
response.HasPII = true
|
||||
response.Matches = append(response.Matches, matches...)
|
||||
|
||||
// Track highest severity
|
||||
if compareSeverity(rule.Severity, highestSeverity) > 0 {
|
||||
highestSeverity = rule.Severity
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
response.BlockLevel = highestSeverity
|
||||
response.ShouldBlock = highestSeverity == PIISeverityBlock
|
||||
|
||||
return response, nil
|
||||
}
|
||||
|
||||
// findMatches finds all matches for a single rule in the text.
|
||||
func (d *PIIDetector) findMatches(text string, rule *PIIRule) []PIIMatch {
|
||||
var matches []PIIMatch
|
||||
|
||||
switch rule.RuleType {
|
||||
case PIIRuleTypeRegex:
|
||||
matches = d.findRegexMatches(text, rule)
|
||||
case PIIRuleTypeKeyword:
|
||||
matches = d.findKeywordMatches(text, rule)
|
||||
}
|
||||
|
||||
return matches
|
||||
}
|
||||
|
||||
// findRegexMatches finds all regex pattern matches in text.
|
||||
func (d *PIIDetector) findRegexMatches(text string, rule *PIIRule) []PIIMatch {
|
||||
re := d.getCompiledRegex(rule.ID.String(), rule.Pattern)
|
||||
if re == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
var matches []PIIMatch
|
||||
allMatches := re.FindAllStringIndex(text, -1)
|
||||
|
||||
for _, loc := range allMatches {
|
||||
matches = append(matches, PIIMatch{
|
||||
RuleID: rule.ID,
|
||||
RuleName: rule.Name,
|
||||
RuleType: rule.RuleType,
|
||||
Severity: rule.Severity,
|
||||
Match: text[loc[0]:loc[1]],
|
||||
StartIndex: loc[0],
|
||||
EndIndex: loc[1],
|
||||
})
|
||||
}
|
||||
|
||||
return matches
|
||||
}
|
||||
|
||||
// findKeywordMatches finds all keyword matches in text (case-insensitive).
|
||||
func (d *PIIDetector) findKeywordMatches(text string, rule *PIIRule) []PIIMatch {
|
||||
var matches []PIIMatch
|
||||
lowerText := strings.ToLower(text)
|
||||
|
||||
// Split pattern by commas or pipes for multiple keywords
|
||||
keywords := strings.FieldsFunc(rule.Pattern, func(r rune) bool {
|
||||
return r == ',' || r == '|'
|
||||
})
|
||||
|
||||
for _, keyword := range keywords {
|
||||
keyword = strings.TrimSpace(keyword)
|
||||
if keyword == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
lowerKeyword := strings.ToLower(keyword)
|
||||
startIdx := 0
|
||||
|
||||
for {
|
||||
idx := strings.Index(lowerText[startIdx:], lowerKeyword)
|
||||
if idx == -1 {
|
||||
break
|
||||
}
|
||||
|
||||
actualIdx := startIdx + idx
|
||||
matches = append(matches, PIIMatch{
|
||||
RuleID: rule.ID,
|
||||
RuleName: rule.Name,
|
||||
RuleType: rule.RuleType,
|
||||
Severity: rule.Severity,
|
||||
Match: text[actualIdx : actualIdx+len(keyword)],
|
||||
StartIndex: actualIdx,
|
||||
EndIndex: actualIdx + len(keyword),
|
||||
})
|
||||
|
||||
startIdx = actualIdx + len(keyword)
|
||||
}
|
||||
}
|
||||
|
||||
return matches
|
||||
}
|
||||
|
||||
// getCompiledRegex returns a compiled regex, caching for performance.
|
||||
func (d *PIIDetector) getCompiledRegex(ruleID, pattern string) *regexp.Regexp {
|
||||
d.rulesMu.RLock()
|
||||
re, ok := d.compiledRules[ruleID]
|
||||
d.rulesMu.RUnlock()
|
||||
|
||||
if ok {
|
||||
return re
|
||||
}
|
||||
|
||||
// Compile and cache
|
||||
d.rulesMu.Lock()
|
||||
defer d.rulesMu.Unlock()
|
||||
|
||||
// Double-check after acquiring write lock
|
||||
if re, ok = d.compiledRules[ruleID]; ok {
|
||||
return re
|
||||
}
|
||||
|
||||
compiled, err := regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
// Invalid regex - don't cache
|
||||
return nil
|
||||
}
|
||||
|
||||
d.compiledRules[ruleID] = compiled
|
||||
return compiled
|
||||
}
|
||||
|
||||
// ClearCache clears the compiled regex cache (call after rule updates).
|
||||
func (d *PIIDetector) ClearCache() {
|
||||
d.rulesMu.Lock()
|
||||
defer d.rulesMu.Unlock()
|
||||
d.compiledRules = make(map[string]*regexp.Regexp)
|
||||
}
|
||||
|
||||
// RefreshRules reloads rules and clears the cache.
|
||||
func (d *PIIDetector) RefreshRules() {
|
||||
d.ClearCache()
|
||||
}
|
||||
|
||||
// compareSeverity compares two severity levels.
|
||||
// Returns: 1 if a > b, -1 if a < b, 0 if equal.
|
||||
func compareSeverity(a, b PIISeverity) int {
|
||||
severityOrder := map[PIISeverity]int{
|
||||
"": 0,
|
||||
PIISeverityWarn: 1,
|
||||
PIISeverityRedact: 2,
|
||||
PIISeverityBlock: 3,
|
||||
}
|
||||
|
||||
aOrder := severityOrder[a]
|
||||
bOrder := severityOrder[b]
|
||||
|
||||
if aOrder > bOrder {
|
||||
return 1
|
||||
} else if aOrder < bOrder {
|
||||
return -1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// PREDEFINED PII PATTERNS (German Context)
|
||||
// =============================================================================
|
||||
|
||||
// DefaultPIIRules returns a set of default PII detection rules for German context.
|
||||
func DefaultPIIRules() []PIIRuleConfig {
|
||||
return []PIIRuleConfig{
|
||||
// Email Addresses
|
||||
{
|
||||
Name: "Email Addresses",
|
||||
Type: "regex",
|
||||
Pattern: `[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}`,
|
||||
Severity: "block",
|
||||
},
|
||||
// German Phone Numbers
|
||||
{
|
||||
Name: "German Phone Numbers",
|
||||
Type: "regex",
|
||||
Pattern: `(?:\+49|0)[\s.-]?\d{2,4}[\s.-]?\d{3,}[\s.-]?\d{2,}`,
|
||||
Severity: "block",
|
||||
},
|
||||
// German Mobile Numbers
|
||||
{
|
||||
Name: "German Mobile Numbers",
|
||||
Type: "regex",
|
||||
Pattern: `(?:\+49|0)1[567]\d[\s.-]?\d{3,}[\s.-]?\d{2,}`,
|
||||
Severity: "block",
|
||||
},
|
||||
// IBAN (German)
|
||||
{
|
||||
Name: "German IBAN",
|
||||
Type: "regex",
|
||||
Pattern: `DE\d{2}\s?\d{4}\s?\d{4}\s?\d{4}\s?\d{4}\s?\d{2}`,
|
||||
Severity: "block",
|
||||
},
|
||||
// German Social Security Number (Sozialversicherungsnummer)
|
||||
{
|
||||
Name: "German Social Security Number",
|
||||
Type: "regex",
|
||||
Pattern: `\d{2}[0-3]\d[01]\d{2}[A-Z]\d{3}`,
|
||||
Severity: "block",
|
||||
},
|
||||
// German Tax ID (Steuer-ID)
|
||||
{
|
||||
Name: "German Tax ID",
|
||||
Type: "regex",
|
||||
Pattern: `\d{2}\s?\d{3}\s?\d{3}\s?\d{3}`,
|
||||
Severity: "block",
|
||||
},
|
||||
// Credit Card Numbers (Luhn-compatible patterns)
|
||||
{
|
||||
Name: "Credit Card Numbers",
|
||||
Type: "regex",
|
||||
Pattern: `(?:\d{4}[\s.-]?){3}\d{4}`,
|
||||
Severity: "block",
|
||||
},
|
||||
// German Postal Code + City Pattern (potential address)
|
||||
{
|
||||
Name: "German Address Pattern",
|
||||
Type: "regex",
|
||||
Pattern: `\d{5}\s+[A-ZÄÖÜ][a-zäöüß]+`,
|
||||
Severity: "warn",
|
||||
},
|
||||
// Date of Birth Patterns (DD.MM.YYYY)
|
||||
{
|
||||
Name: "Date of Birth",
|
||||
Type: "regex",
|
||||
Pattern: `(?:geboren|geb\.|Geburtsdatum|DoB)[\s:]*\d{1,2}[\./]\d{1,2}[\./]\d{2,4}`,
|
||||
Severity: "warn",
|
||||
},
|
||||
// Personal Names with Titles
|
||||
{
|
||||
Name: "Personal Names with Titles",
|
||||
Type: "regex",
|
||||
Pattern: `(?:Herr|Frau|Dr\.|Prof\.)\s+[A-ZÄÖÜ][a-zäöüß]+\s+[A-ZÄÖÜ][a-zäöüß]+`,
|
||||
Severity: "warn",
|
||||
},
|
||||
// German Health Insurance Number
|
||||
{
|
||||
Name: "Health Insurance Number",
|
||||
Type: "regex",
|
||||
Pattern: `[A-Z]\d{9}`,
|
||||
Severity: "block",
|
||||
},
|
||||
// Vehicle Registration (German)
|
||||
{
|
||||
Name: "German Vehicle Registration",
|
||||
Type: "regex",
|
||||
Pattern: `[A-ZÄÖÜ]{1,3}[\s-]?[A-Z]{1,2}[\s-]?\d{1,4}[HE]?`,
|
||||
Severity: "warn",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// REDACTION
|
||||
// =============================================================================
|
||||
|
||||
// RedactText redacts PII from text based on the matches.
|
||||
func (d *PIIDetector) RedactText(text string, matches []PIIMatch) string {
|
||||
if len(matches) == 0 {
|
||||
return text
|
||||
}
|
||||
|
||||
// Sort matches by start index (descending) to replace from end
|
||||
sortedMatches := make([]PIIMatch, len(matches))
|
||||
copy(sortedMatches, matches)
|
||||
|
||||
// Simple bubble sort for small number of matches
|
||||
for i := 0; i < len(sortedMatches)-1; i++ {
|
||||
for j := 0; j < len(sortedMatches)-i-1; j++ {
|
||||
if sortedMatches[j].StartIndex < sortedMatches[j+1].StartIndex {
|
||||
sortedMatches[j], sortedMatches[j+1] = sortedMatches[j+1], sortedMatches[j]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result := text
|
||||
for _, match := range sortedMatches {
|
||||
if match.Severity == PIISeverityRedact || match.Severity == PIISeverityBlock {
|
||||
replacement := strings.Repeat("*", match.EndIndex-match.StartIndex)
|
||||
result = result[:match.StartIndex] + replacement + result[match.EndIndex:]
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// FilterContent filters content based on PII detection.
|
||||
// Returns the filtered content and whether it should be blocked.
|
||||
func (d *PIIDetector) FilterContent(ctx context.Context, content string) (string, bool, error) {
|
||||
response, err := d.Detect(ctx, content)
|
||||
if err != nil {
|
||||
return content, false, err
|
||||
}
|
||||
|
||||
if !response.HasPII {
|
||||
return content, false, nil
|
||||
}
|
||||
|
||||
if response.ShouldBlock {
|
||||
return "", true, nil
|
||||
}
|
||||
|
||||
// Redact content for warn/redact severity
|
||||
redacted := d.RedactText(content, response.Matches)
|
||||
return redacted, false, nil
|
||||
}
|
||||
489
edu-search-service/internal/policy/policy_test.go
Normal file
489
edu-search-service/internal/policy/policy_test.go
Normal file
@@ -0,0 +1,489 @@
|
||||
package policy
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// =============================================================================
|
||||
// MODEL TESTS
|
||||
// =============================================================================
|
||||
|
||||
func TestBundeslandValidation(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
bl Bundesland
|
||||
expected bool
|
||||
}{
|
||||
{"valid NI", BundeslandNI, true},
|
||||
{"valid BY", BundeslandBY, true},
|
||||
{"valid BW", BundeslandBW, true},
|
||||
{"valid NW", BundeslandNW, true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
found := false
|
||||
for _, valid := range ValidBundeslaender {
|
||||
if valid == tt.bl {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if found != tt.expected {
|
||||
t.Errorf("Expected %v to be valid=%v, got valid=%v", tt.bl, tt.expected, found)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestLicenseValues(t *testing.T) {
|
||||
licenses := []License{
|
||||
LicenseDLDEBY20,
|
||||
LicenseCCBY,
|
||||
LicenseCCBYSA,
|
||||
LicenseCC0,
|
||||
LicenseParagraph5,
|
||||
}
|
||||
|
||||
for _, l := range licenses {
|
||||
if l == "" {
|
||||
t.Errorf("License should not be empty")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestOperationValues(t *testing.T) {
|
||||
if len(ValidOperations) != 4 {
|
||||
t.Errorf("Expected 4 operations, got %d", len(ValidOperations))
|
||||
}
|
||||
|
||||
expectedOps := []Operation{OperationLookup, OperationRAG, OperationTraining, OperationExport}
|
||||
for _, expected := range expectedOps {
|
||||
found := false
|
||||
for _, op := range ValidOperations {
|
||||
if op == expected {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Errorf("Expected operation %s not found in ValidOperations", expected)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// PII DETECTOR TESTS
|
||||
// =============================================================================
|
||||
|
||||
func TestPIIDetector_EmailDetection(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
text string
|
||||
hasEmail bool
|
||||
}{
|
||||
{"simple email", "Contact: test@example.com", true},
|
||||
{"email with plus", "Email: user+tag@domain.org", true},
|
||||
{"no email", "This is plain text", false},
|
||||
{"partial email", "user@ is not an email", false},
|
||||
{"multiple emails", "Send to a@b.com and x@y.de", true},
|
||||
}
|
||||
|
||||
// Test using regex pattern directly since we don't have a store
|
||||
emailPattern := `[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}`
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
// Simple test without database
|
||||
rule := &PIIRule{
|
||||
Name: "Email",
|
||||
RuleType: PIIRuleTypeRegex,
|
||||
Pattern: emailPattern,
|
||||
Severity: PIISeverityBlock,
|
||||
}
|
||||
|
||||
detector := &PIIDetector{
|
||||
compiledRules: make(map[string]*regexp.Regexp),
|
||||
}
|
||||
|
||||
matches := detector.findMatches(tt.text, rule)
|
||||
hasMatch := len(matches) > 0
|
||||
|
||||
if hasMatch != tt.hasEmail {
|
||||
t.Errorf("Expected hasEmail=%v, got %v for text: %s", tt.hasEmail, hasMatch, tt.text)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestPIIDetector_PhoneDetection(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
text string
|
||||
hasPhone bool
|
||||
}{
|
||||
{"german mobile", "Call +49 170 1234567", true},
|
||||
{"german landline", "Tel: 030-12345678", true},
|
||||
{"with spaces", "Phone: 0170 123 4567", true},
|
||||
{"no phone", "This is just text", false},
|
||||
{"US format", "Call 555-123-4567", false}, // Should not match German pattern
|
||||
}
|
||||
|
||||
phonePattern := `(?:\+49|0)[\s.-]?\d{2,4}[\s.-]?\d{3,}[\s.-]?\d{2,}`
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
rule := &PIIRule{
|
||||
Name: "Phone",
|
||||
RuleType: PIIRuleTypeRegex,
|
||||
Pattern: phonePattern,
|
||||
Severity: PIISeverityBlock,
|
||||
}
|
||||
|
||||
detector := &PIIDetector{
|
||||
compiledRules: make(map[string]*regexp.Regexp),
|
||||
}
|
||||
|
||||
matches := detector.findMatches(tt.text, rule)
|
||||
hasMatch := len(matches) > 0
|
||||
|
||||
if hasMatch != tt.hasPhone {
|
||||
t.Errorf("Expected hasPhone=%v, got %v for text: %s", tt.hasPhone, hasMatch, tt.text)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestPIIDetector_IBANDetection(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
text string
|
||||
hasIBAN bool
|
||||
}{
|
||||
{"valid IBAN", "IBAN: DE89 3704 0044 0532 0130 00", true},
|
||||
{"compact IBAN", "DE89370400440532013000", true},
|
||||
{"no IBAN", "Just a number: 12345678", false},
|
||||
{"partial", "DE12 is not complete", false},
|
||||
}
|
||||
|
||||
ibanPattern := `DE\d{2}\s?\d{4}\s?\d{4}\s?\d{4}\s?\d{4}\s?\d{2}`
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
rule := &PIIRule{
|
||||
Name: "IBAN",
|
||||
RuleType: PIIRuleTypeRegex,
|
||||
Pattern: ibanPattern,
|
||||
Severity: PIISeverityBlock,
|
||||
}
|
||||
|
||||
detector := &PIIDetector{
|
||||
compiledRules: make(map[string]*regexp.Regexp),
|
||||
}
|
||||
|
||||
matches := detector.findMatches(tt.text, rule)
|
||||
hasMatch := len(matches) > 0
|
||||
|
||||
if hasMatch != tt.hasIBAN {
|
||||
t.Errorf("Expected hasIBAN=%v, got %v for text: %s", tt.hasIBAN, hasMatch, tt.text)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestPIIDetector_KeywordMatching(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
text string
|
||||
keywords string
|
||||
expected int
|
||||
}{
|
||||
{"single keyword", "The password is secret", "password", 1},
|
||||
{"multiple keywords", "Password and secret", "password,secret", 2},
|
||||
{"case insensitive", "PASSWORD and Secret", "password,secret", 2},
|
||||
{"no match", "This is safe text", "password,secret", 0},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
rule := &PIIRule{
|
||||
Name: "Keywords",
|
||||
RuleType: PIIRuleTypeKeyword,
|
||||
Pattern: tt.keywords,
|
||||
Severity: PIISeverityWarn,
|
||||
}
|
||||
|
||||
detector := &PIIDetector{
|
||||
compiledRules: make(map[string]*regexp.Regexp),
|
||||
}
|
||||
|
||||
matches := detector.findKeywordMatches(tt.text, rule)
|
||||
|
||||
if len(matches) != tt.expected {
|
||||
t.Errorf("Expected %d matches, got %d for text: %s", tt.expected, len(matches), tt.text)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestPIIDetector_Redaction(t *testing.T) {
|
||||
detector := &PIIDetector{
|
||||
compiledRules: make(map[string]*regexp.Regexp),
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
text string
|
||||
matches []PIIMatch
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
"single redaction",
|
||||
"Email: test@example.com",
|
||||
[]PIIMatch{{StartIndex: 7, EndIndex: 23, Severity: PIISeverityBlock}},
|
||||
"Email: ****************",
|
||||
},
|
||||
{
|
||||
"no matches",
|
||||
"Plain text",
|
||||
[]PIIMatch{},
|
||||
"Plain text",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := detector.RedactText(tt.text, tt.matches)
|
||||
if result != tt.expected {
|
||||
t.Errorf("Expected '%s', got '%s'", tt.expected, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCompareSeverity(t *testing.T) {
|
||||
tests := []struct {
|
||||
a, b PIISeverity
|
||||
expected int
|
||||
}{
|
||||
{PIISeverityBlock, PIISeverityWarn, 1},
|
||||
{PIISeverityWarn, PIISeverityBlock, -1},
|
||||
{PIISeverityBlock, PIISeverityBlock, 0},
|
||||
{PIISeverityRedact, PIISeverityWarn, 1},
|
||||
{PIISeverityRedact, PIISeverityBlock, -1},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(string(tt.a)+"_vs_"+string(tt.b), func(t *testing.T) {
|
||||
result := compareSeverity(tt.a, tt.b)
|
||||
if result != tt.expected {
|
||||
t.Errorf("Expected %d, got %d for %s vs %s", tt.expected, result, tt.a, tt.b)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// ENFORCER TESTS
|
||||
// =============================================================================
|
||||
|
||||
func TestExtractDomain(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
url string
|
||||
expected string
|
||||
hasError bool
|
||||
}{
|
||||
{"full URL", "https://www.example.com/path", "example.com", false},
|
||||
{"with port", "http://example.com:8080/path", "example.com", false},
|
||||
{"subdomain", "https://sub.domain.example.com", "sub.domain.example.com", false},
|
||||
{"no scheme", "example.com/path", "example.com", false},
|
||||
{"www prefix", "https://www.test.de", "test.de", false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result, err := extractDomain(tt.url)
|
||||
if tt.hasError && err == nil {
|
||||
t.Error("Expected error, got nil")
|
||||
}
|
||||
if !tt.hasError && err != nil {
|
||||
t.Errorf("Expected no error, got %v", err)
|
||||
}
|
||||
if result != tt.expected {
|
||||
t.Errorf("Expected '%s', got '%s'", tt.expected, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// YAML LOADER TESTS
|
||||
// =============================================================================
|
||||
|
||||
func TestParseYAML(t *testing.T) {
|
||||
yamlData := `
|
||||
federal:
|
||||
name: "Test Federal"
|
||||
sources:
|
||||
- domain: "test.gov"
|
||||
name: "Test Source"
|
||||
license: "§5 UrhG"
|
||||
trust_boost: 0.9
|
||||
|
||||
NI:
|
||||
name: "Niedersachsen"
|
||||
sources:
|
||||
- domain: "ni.gov"
|
||||
name: "NI Source"
|
||||
license: "DL-DE-BY-2.0"
|
||||
|
||||
default_operations:
|
||||
lookup:
|
||||
allowed: true
|
||||
requires_citation: true
|
||||
training:
|
||||
allowed: false
|
||||
requires_citation: false
|
||||
|
||||
pii_rules:
|
||||
- name: "Test Rule"
|
||||
type: "regex"
|
||||
pattern: "test.*pattern"
|
||||
severity: "block"
|
||||
`
|
||||
|
||||
config, err := ParseYAML([]byte(yamlData))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse YAML: %v", err)
|
||||
}
|
||||
|
||||
// Test federal
|
||||
if config.Federal.Name != "Test Federal" {
|
||||
t.Errorf("Expected federal name 'Test Federal', got '%s'", config.Federal.Name)
|
||||
}
|
||||
if len(config.Federal.Sources) != 1 {
|
||||
t.Errorf("Expected 1 federal source, got %d", len(config.Federal.Sources))
|
||||
}
|
||||
if config.Federal.Sources[0].Domain != "test.gov" {
|
||||
t.Errorf("Expected domain 'test.gov', got '%s'", config.Federal.Sources[0].Domain)
|
||||
}
|
||||
if config.Federal.Sources[0].TrustBoost != 0.9 {
|
||||
t.Errorf("Expected trust_boost 0.9, got %f", config.Federal.Sources[0].TrustBoost)
|
||||
}
|
||||
|
||||
// Test Bundesland
|
||||
if len(config.Bundeslaender) != 1 {
|
||||
t.Errorf("Expected 1 Bundesland, got %d", len(config.Bundeslaender))
|
||||
}
|
||||
ni, ok := config.Bundeslaender["NI"]
|
||||
if !ok {
|
||||
t.Error("Expected NI in Bundeslaender")
|
||||
}
|
||||
if ni.Name != "Niedersachsen" {
|
||||
t.Errorf("Expected name 'Niedersachsen', got '%s'", ni.Name)
|
||||
}
|
||||
|
||||
// Test operations
|
||||
if !config.DefaultOperations.Lookup.Allowed {
|
||||
t.Error("Expected lookup to be allowed")
|
||||
}
|
||||
if config.DefaultOperations.Training.Allowed {
|
||||
t.Error("Expected training to be NOT allowed")
|
||||
}
|
||||
|
||||
// Test PII rules
|
||||
if len(config.PIIRules) != 1 {
|
||||
t.Errorf("Expected 1 PII rule, got %d", len(config.PIIRules))
|
||||
}
|
||||
if config.PIIRules[0].Name != "Test Rule" {
|
||||
t.Errorf("Expected rule name 'Test Rule', got '%s'", config.PIIRules[0].Name)
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// AUDIT TESTS
|
||||
// =============================================================================
|
||||
|
||||
func TestMaskPII(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{"short", "ab", "****"},
|
||||
{"medium", "test@email.com", "te****om"},
|
||||
{"long", "very-long-email@example.com", "ve****om"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := maskPII(tt.input)
|
||||
if result != tt.expected {
|
||||
t.Errorf("Expected '%s', got '%s'", tt.expected, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// DEFAULT PII RULES TEST
|
||||
// =============================================================================
|
||||
|
||||
func TestDefaultPIIRules(t *testing.T) {
|
||||
rules := DefaultPIIRules()
|
||||
|
||||
if len(rules) == 0 {
|
||||
t.Error("Expected default PII rules, got none")
|
||||
}
|
||||
|
||||
// Check that each rule has required fields
|
||||
for _, rule := range rules {
|
||||
if rule.Name == "" {
|
||||
t.Error("Rule name should not be empty")
|
||||
}
|
||||
if rule.Type == "" {
|
||||
t.Error("Rule type should not be empty")
|
||||
}
|
||||
if rule.Pattern == "" {
|
||||
t.Error("Rule pattern should not be empty")
|
||||
}
|
||||
}
|
||||
|
||||
// Check for email rule
|
||||
hasEmailRule := false
|
||||
for _, rule := range rules {
|
||||
if rule.Name == "Email Addresses" {
|
||||
hasEmailRule = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !hasEmailRule {
|
||||
t.Error("Expected email addresses rule in defaults")
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// INTEGRATION TEST HELPERS
|
||||
// =============================================================================
|
||||
|
||||
// TestFilteredURL tests the FilteredURL struct.
|
||||
func TestFilteredURL(t *testing.T) {
|
||||
fu := FilteredURL{
|
||||
URL: "https://example.com",
|
||||
IsAllowed: true,
|
||||
RequiresCitation: true,
|
||||
}
|
||||
|
||||
if fu.URL != "https://example.com" {
|
||||
t.Error("URL not set correctly")
|
||||
}
|
||||
if !fu.IsAllowed {
|
||||
t.Error("IsAllowed should be true")
|
||||
}
|
||||
if !fu.RequiresCitation {
|
||||
t.Error("RequiresCitation should be true")
|
||||
}
|
||||
}
|
||||
|
||||
1168
edu-search-service/internal/policy/store.go
Normal file
1168
edu-search-service/internal/policy/store.go
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user