Some checks failed
Tests / Go Tests (push) Has been cancelled
Tests / Python Tests (push) Has been cancelled
Tests / Integration Tests (push) Has been cancelled
Tests / Go Lint (push) Has been cancelled
Tests / Python Lint (push) Has been cancelled
Tests / Security Scan (push) Has been cancelled
Tests / All Checks Passed (push) Has been cancelled
Security Scanning / Secret Scanning (push) Has been cancelled
Security Scanning / Dependency Vulnerability Scan (push) Has been cancelled
Security Scanning / Go Security Scan (push) Has been cancelled
Security Scanning / Python Security Scan (push) Has been cancelled
Security Scanning / Node.js Security Scan (push) Has been cancelled
Security Scanning / Docker Image Security (push) Has been cancelled
Security Scanning / Security Summary (push) Has been cancelled
CI/CD Pipeline / Go Tests (push) Has been cancelled
CI/CD Pipeline / Python Tests (push) Has been cancelled
CI/CD Pipeline / Website Tests (push) Has been cancelled
CI/CD Pipeline / Linting (push) Has been cancelled
CI/CD Pipeline / Security Scan (push) Has been cancelled
CI/CD Pipeline / Docker Build & Push (push) Has been cancelled
CI/CD Pipeline / Integration Tests (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / CI Summary (push) Has been cancelled
ci/woodpecker/manual/build-ci-image Pipeline was successful
ci/woodpecker/manual/main Pipeline failed
All services: admin-v2, studio-v2, website, ai-compliance-sdk, consent-service, klausur-service, voice-service, and infrastructure. Large PDFs and compiled binaries excluded via .gitignore.
282 lines
8.1 KiB
Go
282 lines
8.1 KiB
Go
package policy
|
|
|
|
import (
|
|
"context"
|
|
"net/url"
|
|
"strings"
|
|
|
|
"github.com/google/uuid"
|
|
)
|
|
|
|
// Enforcer provides policy enforcement for the crawler and pipeline.
|
|
type Enforcer struct {
|
|
store *Store
|
|
piiDetector *PIIDetector
|
|
auditor *Auditor
|
|
}
|
|
|
|
// NewEnforcer creates a new Enforcer instance.
|
|
func NewEnforcer(store *Store) *Enforcer {
|
|
return &Enforcer{
|
|
store: store,
|
|
piiDetector: NewPIIDetector(store),
|
|
auditor: NewAuditor(store),
|
|
}
|
|
}
|
|
|
|
// =============================================================================
|
|
// SOURCE CHECKING
|
|
// =============================================================================
|
|
|
|
// CheckSource verifies if a URL is allowed based on the whitelist.
|
|
// Returns the AllowedSource if found, nil if not whitelisted.
|
|
func (e *Enforcer) CheckSource(ctx context.Context, rawURL string, bundesland *Bundesland) (*AllowedSource, error) {
|
|
domain, err := extractDomain(rawURL)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
source, err := e.store.GetSourceByDomain(ctx, domain, bundesland)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return source, nil
|
|
}
|
|
|
|
// CheckOperation verifies if a specific operation is allowed for a source.
|
|
func (e *Enforcer) CheckOperation(ctx context.Context, source *AllowedSource, operation Operation) (*OperationPermission, error) {
|
|
for _, op := range source.Operations {
|
|
if op.Operation == operation {
|
|
return &op, nil
|
|
}
|
|
}
|
|
|
|
// If not found in loaded operations, query directly
|
|
ops, err := e.store.GetOperationsBySourceID(ctx, source.ID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
for _, op := range ops {
|
|
if op.Operation == operation {
|
|
return &op, nil
|
|
}
|
|
}
|
|
|
|
return nil, nil
|
|
}
|
|
|
|
// CheckCompliance performs a full compliance check for a URL and operation.
|
|
func (e *Enforcer) CheckCompliance(ctx context.Context, req *CheckComplianceRequest) (*CheckComplianceResponse, error) {
|
|
response := &CheckComplianceResponse{
|
|
IsAllowed: false,
|
|
RequiresCitation: false,
|
|
}
|
|
|
|
// Check if source is whitelisted
|
|
source, err := e.CheckSource(ctx, req.URL, req.Bundesland)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if source == nil {
|
|
reason := BlockReasonNotWhitelisted
|
|
response.BlockReason = &reason
|
|
return response, nil
|
|
}
|
|
|
|
response.Source = source
|
|
response.License = &source.License
|
|
response.CitationTemplate = source.CitationTemplate
|
|
|
|
// Check operation permission
|
|
opPerm, err := e.CheckOperation(ctx, source, req.Operation)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if opPerm == nil || !opPerm.IsAllowed {
|
|
var reason BlockReason
|
|
if req.Operation == OperationTraining {
|
|
reason = BlockReasonTrainingForbidden
|
|
} else {
|
|
reason = BlockReasonLicenseViolation
|
|
}
|
|
response.BlockReason = &reason
|
|
return response, nil
|
|
}
|
|
|
|
response.IsAllowed = true
|
|
response.RequiresCitation = opPerm.RequiresCitation
|
|
|
|
return response, nil
|
|
}
|
|
|
|
// =============================================================================
|
|
// PII CHECKING
|
|
// =============================================================================
|
|
|
|
// DetectPII scans text for PII patterns and returns matches.
|
|
func (e *Enforcer) DetectPII(ctx context.Context, text string) (*PIITestResponse, error) {
|
|
return e.piiDetector.Detect(ctx, text)
|
|
}
|
|
|
|
// ShouldBlockForPII determines if content should be blocked based on PII matches.
|
|
func (e *Enforcer) ShouldBlockForPII(response *PIITestResponse) bool {
|
|
if response == nil {
|
|
return false
|
|
}
|
|
return response.ShouldBlock
|
|
}
|
|
|
|
// =============================================================================
|
|
// LOGGING
|
|
// =============================================================================
|
|
|
|
// LogBlocked logs a blocked URL to the blocked content log.
|
|
func (e *Enforcer) LogBlocked(ctx context.Context, rawURL string, reason BlockReason, ruleID *uuid.UUID, details map[string]interface{}) error {
|
|
domain, _ := extractDomain(rawURL)
|
|
return e.auditor.LogBlocked(ctx, rawURL, domain, reason, ruleID, details)
|
|
}
|
|
|
|
// LogChange logs a policy change to the audit log.
|
|
func (e *Enforcer) LogChange(ctx context.Context, action AuditAction, entityType AuditEntityType, entityID *uuid.UUID, oldValue, newValue interface{}, userEmail *string) error {
|
|
return e.auditor.LogChange(ctx, action, entityType, entityID, oldValue, newValue, userEmail, nil, nil)
|
|
}
|
|
|
|
// =============================================================================
|
|
// BATCH OPERATIONS
|
|
// =============================================================================
|
|
|
|
// FilterURLs filters a list of URLs, returning only whitelisted ones.
|
|
func (e *Enforcer) FilterURLs(ctx context.Context, urls []string, bundesland *Bundesland, operation Operation) ([]FilteredURL, error) {
|
|
results := make([]FilteredURL, 0, len(urls))
|
|
|
|
for _, u := range urls {
|
|
result := FilteredURL{
|
|
URL: u,
|
|
IsAllowed: false,
|
|
}
|
|
|
|
source, err := e.CheckSource(ctx, u, bundesland)
|
|
if err != nil {
|
|
result.Error = err.Error()
|
|
results = append(results, result)
|
|
continue
|
|
}
|
|
|
|
if source == nil {
|
|
result.BlockReason = BlockReasonNotWhitelisted
|
|
results = append(results, result)
|
|
continue
|
|
}
|
|
|
|
opPerm, err := e.CheckOperation(ctx, source, operation)
|
|
if err != nil {
|
|
result.Error = err.Error()
|
|
results = append(results, result)
|
|
continue
|
|
}
|
|
|
|
if opPerm == nil || !opPerm.IsAllowed {
|
|
if operation == OperationTraining {
|
|
result.BlockReason = BlockReasonTrainingForbidden
|
|
} else {
|
|
result.BlockReason = BlockReasonLicenseViolation
|
|
}
|
|
results = append(results, result)
|
|
continue
|
|
}
|
|
|
|
result.IsAllowed = true
|
|
result.Source = source
|
|
result.RequiresCitation = opPerm.RequiresCitation
|
|
results = append(results, result)
|
|
}
|
|
|
|
return results, nil
|
|
}
|
|
|
|
// FilteredURL represents the result of filtering a single URL.
|
|
type FilteredURL struct {
|
|
URL string `json:"url"`
|
|
IsAllowed bool `json:"is_allowed"`
|
|
Source *AllowedSource `json:"source,omitempty"`
|
|
BlockReason BlockReason `json:"block_reason,omitempty"`
|
|
RequiresCitation bool `json:"requires_citation"`
|
|
Error string `json:"error,omitempty"`
|
|
}
|
|
|
|
// =============================================================================
|
|
// HELPERS
|
|
// =============================================================================
|
|
|
|
// extractDomain extracts the domain from a URL.
|
|
func extractDomain(rawURL string) (string, error) {
|
|
// Handle URLs without scheme
|
|
if !strings.Contains(rawURL, "://") {
|
|
rawURL = "https://" + rawURL
|
|
}
|
|
|
|
parsed, err := url.Parse(rawURL)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
host := parsed.Hostname()
|
|
|
|
// Remove www. prefix
|
|
host = strings.TrimPrefix(host, "www.")
|
|
|
|
return host, nil
|
|
}
|
|
|
|
// IsTrainingAllowed checks if training is allowed for any source (should always be false).
|
|
func (e *Enforcer) IsTrainingAllowed(ctx context.Context) (bool, error) {
|
|
// Training should NEVER be allowed - this is a safeguard
|
|
matrix, err := e.store.GetOperationsMatrix(ctx)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
|
|
for _, source := range matrix {
|
|
for _, op := range source.Operations {
|
|
if op.Operation == OperationTraining && op.IsAllowed {
|
|
// This should never happen - log a warning
|
|
return true, nil
|
|
}
|
|
}
|
|
}
|
|
|
|
return false, nil
|
|
}
|
|
|
|
// GetSourceByURL is a convenience method to get a source by URL.
|
|
func (e *Enforcer) GetSourceByURL(ctx context.Context, rawURL string, bundesland *Bundesland) (*AllowedSource, error) {
|
|
return e.CheckSource(ctx, rawURL, bundesland)
|
|
}
|
|
|
|
// GetCitationForURL generates a citation for a URL if required.
|
|
func (e *Enforcer) GetCitationForURL(ctx context.Context, rawURL string, bundesland *Bundesland, title string, date string) (string, error) {
|
|
source, err := e.CheckSource(ctx, rawURL, bundesland)
|
|
if err != nil || source == nil {
|
|
return "", err
|
|
}
|
|
|
|
if source.CitationTemplate == nil || *source.CitationTemplate == "" {
|
|
// Default citation format
|
|
return "Quelle: " + source.Name + ", " + title + ", " + date, nil
|
|
}
|
|
|
|
// Replace placeholders in template
|
|
citation := *source.CitationTemplate
|
|
citation = strings.ReplaceAll(citation, "{title}", title)
|
|
citation = strings.ReplaceAll(citation, "{date}", date)
|
|
citation = strings.ReplaceAll(citation, "{url}", rawURL)
|
|
citation = strings.ReplaceAll(citation, "{domain}", source.Domain)
|
|
citation = strings.ReplaceAll(citation, "{source}", source.Name)
|
|
|
|
return citation, nil
|
|
}
|