Files
breakpilot-lehrer/edu-search-service/internal/policy/enforcer.go
Benjamin Boenisch 414e0f5ec0
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Successful in 1m45s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 21s
feat: edu-search-service migriert, voice-service/geo-service entfernt
- edu-search-service von breakpilot-pwa nach breakpilot-lehrer kopiert (ohne vendor)
- opensearch + edu-search-service in docker-compose.yml hinzugefuegt
- voice-service aus docker-compose.yml entfernt (jetzt in breakpilot-core)
- geo-service aus docker-compose.yml entfernt (nicht mehr benoetigt)
- CI/CD: edu-search-service zu Gitea Actions und Woodpecker hinzugefuegt
  (Go lint, test mit go mod download, build, SBOM)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 18:36:38 +01:00

282 lines
8.1 KiB
Go

package policy
import (
"context"
"net/url"
"strings"
"github.com/google/uuid"
)
// Enforcer provides policy enforcement for the crawler and pipeline.
type Enforcer struct {
store *Store
piiDetector *PIIDetector
auditor *Auditor
}
// NewEnforcer creates a new Enforcer instance.
func NewEnforcer(store *Store) *Enforcer {
return &Enforcer{
store: store,
piiDetector: NewPIIDetector(store),
auditor: NewAuditor(store),
}
}
// =============================================================================
// SOURCE CHECKING
// =============================================================================
// CheckSource verifies if a URL is allowed based on the whitelist.
// Returns the AllowedSource if found, nil if not whitelisted.
func (e *Enforcer) CheckSource(ctx context.Context, rawURL string, bundesland *Bundesland) (*AllowedSource, error) {
domain, err := extractDomain(rawURL)
if err != nil {
return nil, err
}
source, err := e.store.GetSourceByDomain(ctx, domain, bundesland)
if err != nil {
return nil, err
}
return source, nil
}
// CheckOperation verifies if a specific operation is allowed for a source.
func (e *Enforcer) CheckOperation(ctx context.Context, source *AllowedSource, operation Operation) (*OperationPermission, error) {
for _, op := range source.Operations {
if op.Operation == operation {
return &op, nil
}
}
// If not found in loaded operations, query directly
ops, err := e.store.GetOperationsBySourceID(ctx, source.ID)
if err != nil {
return nil, err
}
for _, op := range ops {
if op.Operation == operation {
return &op, nil
}
}
return nil, nil
}
// CheckCompliance performs a full compliance check for a URL and operation.
func (e *Enforcer) CheckCompliance(ctx context.Context, req *CheckComplianceRequest) (*CheckComplianceResponse, error) {
response := &CheckComplianceResponse{
IsAllowed: false,
RequiresCitation: false,
}
// Check if source is whitelisted
source, err := e.CheckSource(ctx, req.URL, req.Bundesland)
if err != nil {
return nil, err
}
if source == nil {
reason := BlockReasonNotWhitelisted
response.BlockReason = &reason
return response, nil
}
response.Source = source
response.License = &source.License
response.CitationTemplate = source.CitationTemplate
// Check operation permission
opPerm, err := e.CheckOperation(ctx, source, req.Operation)
if err != nil {
return nil, err
}
if opPerm == nil || !opPerm.IsAllowed {
var reason BlockReason
if req.Operation == OperationTraining {
reason = BlockReasonTrainingForbidden
} else {
reason = BlockReasonLicenseViolation
}
response.BlockReason = &reason
return response, nil
}
response.IsAllowed = true
response.RequiresCitation = opPerm.RequiresCitation
return response, nil
}
// =============================================================================
// PII CHECKING
// =============================================================================
// DetectPII scans text for PII patterns and returns matches.
func (e *Enforcer) DetectPII(ctx context.Context, text string) (*PIITestResponse, error) {
return e.piiDetector.Detect(ctx, text)
}
// ShouldBlockForPII determines if content should be blocked based on PII matches.
func (e *Enforcer) ShouldBlockForPII(response *PIITestResponse) bool {
if response == nil {
return false
}
return response.ShouldBlock
}
// =============================================================================
// LOGGING
// =============================================================================
// LogBlocked logs a blocked URL to the blocked content log.
func (e *Enforcer) LogBlocked(ctx context.Context, rawURL string, reason BlockReason, ruleID *uuid.UUID, details map[string]interface{}) error {
domain, _ := extractDomain(rawURL)
return e.auditor.LogBlocked(ctx, rawURL, domain, reason, ruleID, details)
}
// LogChange logs a policy change to the audit log.
func (e *Enforcer) LogChange(ctx context.Context, action AuditAction, entityType AuditEntityType, entityID *uuid.UUID, oldValue, newValue interface{}, userEmail *string) error {
return e.auditor.LogChange(ctx, action, entityType, entityID, oldValue, newValue, userEmail, nil, nil)
}
// =============================================================================
// BATCH OPERATIONS
// =============================================================================
// FilterURLs filters a list of URLs, returning only whitelisted ones.
func (e *Enforcer) FilterURLs(ctx context.Context, urls []string, bundesland *Bundesland, operation Operation) ([]FilteredURL, error) {
results := make([]FilteredURL, 0, len(urls))
for _, u := range urls {
result := FilteredURL{
URL: u,
IsAllowed: false,
}
source, err := e.CheckSource(ctx, u, bundesland)
if err != nil {
result.Error = err.Error()
results = append(results, result)
continue
}
if source == nil {
result.BlockReason = BlockReasonNotWhitelisted
results = append(results, result)
continue
}
opPerm, err := e.CheckOperation(ctx, source, operation)
if err != nil {
result.Error = err.Error()
results = append(results, result)
continue
}
if opPerm == nil || !opPerm.IsAllowed {
if operation == OperationTraining {
result.BlockReason = BlockReasonTrainingForbidden
} else {
result.BlockReason = BlockReasonLicenseViolation
}
results = append(results, result)
continue
}
result.IsAllowed = true
result.Source = source
result.RequiresCitation = opPerm.RequiresCitation
results = append(results, result)
}
return results, nil
}
// FilteredURL represents the result of filtering a single URL.
type FilteredURL struct {
URL string `json:"url"`
IsAllowed bool `json:"is_allowed"`
Source *AllowedSource `json:"source,omitempty"`
BlockReason BlockReason `json:"block_reason,omitempty"`
RequiresCitation bool `json:"requires_citation"`
Error string `json:"error,omitempty"`
}
// =============================================================================
// HELPERS
// =============================================================================
// extractDomain extracts the domain from a URL.
func extractDomain(rawURL string) (string, error) {
// Handle URLs without scheme
if !strings.Contains(rawURL, "://") {
rawURL = "https://" + rawURL
}
parsed, err := url.Parse(rawURL)
if err != nil {
return "", err
}
host := parsed.Hostname()
// Remove www. prefix
host = strings.TrimPrefix(host, "www.")
return host, nil
}
// IsTrainingAllowed checks if training is allowed for any source (should always be false).
func (e *Enforcer) IsTrainingAllowed(ctx context.Context) (bool, error) {
// Training should NEVER be allowed - this is a safeguard
matrix, err := e.store.GetOperationsMatrix(ctx)
if err != nil {
return false, err
}
for _, source := range matrix {
for _, op := range source.Operations {
if op.Operation == OperationTraining && op.IsAllowed {
// This should never happen - log a warning
return true, nil
}
}
}
return false, nil
}
// GetSourceByURL is a convenience method to get a source by URL.
func (e *Enforcer) GetSourceByURL(ctx context.Context, rawURL string, bundesland *Bundesland) (*AllowedSource, error) {
return e.CheckSource(ctx, rawURL, bundesland)
}
// GetCitationForURL generates a citation for a URL if required.
func (e *Enforcer) GetCitationForURL(ctx context.Context, rawURL string, bundesland *Bundesland, title string, date string) (string, error) {
source, err := e.CheckSource(ctx, rawURL, bundesland)
if err != nil || source == nil {
return "", err
}
if source.CitationTemplate == nil || *source.CitationTemplate == "" {
// Default citation format
return "Quelle: " + source.Name + ", " + title + ", " + date, nil
}
// Replace placeholders in template
citation := *source.CitationTemplate
citation = strings.ReplaceAll(citation, "{title}", title)
citation = strings.ReplaceAll(citation, "{date}", date)
citation = strings.ReplaceAll(citation, "{url}", rawURL)
citation = strings.ReplaceAll(citation, "{domain}", source.Domain)
citation = strings.ReplaceAll(citation, "{source}", source.Name)
return citation, nil
}