feat: edu-search-service migriert, voice-service/geo-service entfernt
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Successful in 1m45s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 21s
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Successful in 1m45s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 21s
- edu-search-service von breakpilot-pwa nach breakpilot-lehrer kopiert (ohne vendor) - opensearch + edu-search-service in docker-compose.yml hinzugefuegt - voice-service aus docker-compose.yml entfernt (jetzt in breakpilot-core) - geo-service aus docker-compose.yml entfernt (nicht mehr benoetigt) - CI/CD: edu-search-service zu Gitea Actions und Woodpecker hinzugefuegt (Go lint, test mit go mod download, build, SBOM) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
281
edu-search-service/internal/policy/enforcer.go
Normal file
281
edu-search-service/internal/policy/enforcer.go
Normal file
@@ -0,0 +1,281 @@
|
||||
package policy
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/url"
|
||||
"strings"
|
||||
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
// Enforcer provides policy enforcement for the crawler and pipeline.
|
||||
type Enforcer struct {
|
||||
store *Store
|
||||
piiDetector *PIIDetector
|
||||
auditor *Auditor
|
||||
}
|
||||
|
||||
// NewEnforcer creates a new Enforcer instance.
|
||||
func NewEnforcer(store *Store) *Enforcer {
|
||||
return &Enforcer{
|
||||
store: store,
|
||||
piiDetector: NewPIIDetector(store),
|
||||
auditor: NewAuditor(store),
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// SOURCE CHECKING
|
||||
// =============================================================================
|
||||
|
||||
// CheckSource verifies if a URL is allowed based on the whitelist.
|
||||
// Returns the AllowedSource if found, nil if not whitelisted.
|
||||
func (e *Enforcer) CheckSource(ctx context.Context, rawURL string, bundesland *Bundesland) (*AllowedSource, error) {
|
||||
domain, err := extractDomain(rawURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
source, err := e.store.GetSourceByDomain(ctx, domain, bundesland)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return source, nil
|
||||
}
|
||||
|
||||
// CheckOperation verifies if a specific operation is allowed for a source.
|
||||
func (e *Enforcer) CheckOperation(ctx context.Context, source *AllowedSource, operation Operation) (*OperationPermission, error) {
|
||||
for _, op := range source.Operations {
|
||||
if op.Operation == operation {
|
||||
return &op, nil
|
||||
}
|
||||
}
|
||||
|
||||
// If not found in loaded operations, query directly
|
||||
ops, err := e.store.GetOperationsBySourceID(ctx, source.ID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, op := range ops {
|
||||
if op.Operation == operation {
|
||||
return &op, nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// CheckCompliance performs a full compliance check for a URL and operation.
|
||||
func (e *Enforcer) CheckCompliance(ctx context.Context, req *CheckComplianceRequest) (*CheckComplianceResponse, error) {
|
||||
response := &CheckComplianceResponse{
|
||||
IsAllowed: false,
|
||||
RequiresCitation: false,
|
||||
}
|
||||
|
||||
// Check if source is whitelisted
|
||||
source, err := e.CheckSource(ctx, req.URL, req.Bundesland)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if source == nil {
|
||||
reason := BlockReasonNotWhitelisted
|
||||
response.BlockReason = &reason
|
||||
return response, nil
|
||||
}
|
||||
|
||||
response.Source = source
|
||||
response.License = &source.License
|
||||
response.CitationTemplate = source.CitationTemplate
|
||||
|
||||
// Check operation permission
|
||||
opPerm, err := e.CheckOperation(ctx, source, req.Operation)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if opPerm == nil || !opPerm.IsAllowed {
|
||||
var reason BlockReason
|
||||
if req.Operation == OperationTraining {
|
||||
reason = BlockReasonTrainingForbidden
|
||||
} else {
|
||||
reason = BlockReasonLicenseViolation
|
||||
}
|
||||
response.BlockReason = &reason
|
||||
return response, nil
|
||||
}
|
||||
|
||||
response.IsAllowed = true
|
||||
response.RequiresCitation = opPerm.RequiresCitation
|
||||
|
||||
return response, nil
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// PII CHECKING
|
||||
// =============================================================================
|
||||
|
||||
// DetectPII scans text for PII patterns and returns matches.
|
||||
func (e *Enforcer) DetectPII(ctx context.Context, text string) (*PIITestResponse, error) {
|
||||
return e.piiDetector.Detect(ctx, text)
|
||||
}
|
||||
|
||||
// ShouldBlockForPII determines if content should be blocked based on PII matches.
|
||||
func (e *Enforcer) ShouldBlockForPII(response *PIITestResponse) bool {
|
||||
if response == nil {
|
||||
return false
|
||||
}
|
||||
return response.ShouldBlock
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// LOGGING
|
||||
// =============================================================================
|
||||
|
||||
// LogBlocked logs a blocked URL to the blocked content log.
|
||||
func (e *Enforcer) LogBlocked(ctx context.Context, rawURL string, reason BlockReason, ruleID *uuid.UUID, details map[string]interface{}) error {
|
||||
domain, _ := extractDomain(rawURL)
|
||||
return e.auditor.LogBlocked(ctx, rawURL, domain, reason, ruleID, details)
|
||||
}
|
||||
|
||||
// LogChange logs a policy change to the audit log.
|
||||
func (e *Enforcer) LogChange(ctx context.Context, action AuditAction, entityType AuditEntityType, entityID *uuid.UUID, oldValue, newValue interface{}, userEmail *string) error {
|
||||
return e.auditor.LogChange(ctx, action, entityType, entityID, oldValue, newValue, userEmail, nil, nil)
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// BATCH OPERATIONS
|
||||
// =============================================================================
|
||||
|
||||
// FilterURLs filters a list of URLs, returning only whitelisted ones.
|
||||
func (e *Enforcer) FilterURLs(ctx context.Context, urls []string, bundesland *Bundesland, operation Operation) ([]FilteredURL, error) {
|
||||
results := make([]FilteredURL, 0, len(urls))
|
||||
|
||||
for _, u := range urls {
|
||||
result := FilteredURL{
|
||||
URL: u,
|
||||
IsAllowed: false,
|
||||
}
|
||||
|
||||
source, err := e.CheckSource(ctx, u, bundesland)
|
||||
if err != nil {
|
||||
result.Error = err.Error()
|
||||
results = append(results, result)
|
||||
continue
|
||||
}
|
||||
|
||||
if source == nil {
|
||||
result.BlockReason = BlockReasonNotWhitelisted
|
||||
results = append(results, result)
|
||||
continue
|
||||
}
|
||||
|
||||
opPerm, err := e.CheckOperation(ctx, source, operation)
|
||||
if err != nil {
|
||||
result.Error = err.Error()
|
||||
results = append(results, result)
|
||||
continue
|
||||
}
|
||||
|
||||
if opPerm == nil || !opPerm.IsAllowed {
|
||||
if operation == OperationTraining {
|
||||
result.BlockReason = BlockReasonTrainingForbidden
|
||||
} else {
|
||||
result.BlockReason = BlockReasonLicenseViolation
|
||||
}
|
||||
results = append(results, result)
|
||||
continue
|
||||
}
|
||||
|
||||
result.IsAllowed = true
|
||||
result.Source = source
|
||||
result.RequiresCitation = opPerm.RequiresCitation
|
||||
results = append(results, result)
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
// FilteredURL represents the result of filtering a single URL.
|
||||
type FilteredURL struct {
|
||||
URL string `json:"url"`
|
||||
IsAllowed bool `json:"is_allowed"`
|
||||
Source *AllowedSource `json:"source,omitempty"`
|
||||
BlockReason BlockReason `json:"block_reason,omitempty"`
|
||||
RequiresCitation bool `json:"requires_citation"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// HELPERS
|
||||
// =============================================================================
|
||||
|
||||
// extractDomain extracts the domain from a URL.
|
||||
func extractDomain(rawURL string) (string, error) {
|
||||
// Handle URLs without scheme
|
||||
if !strings.Contains(rawURL, "://") {
|
||||
rawURL = "https://" + rawURL
|
||||
}
|
||||
|
||||
parsed, err := url.Parse(rawURL)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
host := parsed.Hostname()
|
||||
|
||||
// Remove www. prefix
|
||||
host = strings.TrimPrefix(host, "www.")
|
||||
|
||||
return host, nil
|
||||
}
|
||||
|
||||
// IsTrainingAllowed checks if training is allowed for any source (should always be false).
|
||||
func (e *Enforcer) IsTrainingAllowed(ctx context.Context) (bool, error) {
|
||||
// Training should NEVER be allowed - this is a safeguard
|
||||
matrix, err := e.store.GetOperationsMatrix(ctx)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
for _, source := range matrix {
|
||||
for _, op := range source.Operations {
|
||||
if op.Operation == OperationTraining && op.IsAllowed {
|
||||
// This should never happen - log a warning
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// GetSourceByURL is a convenience method to get a source by URL.
|
||||
func (e *Enforcer) GetSourceByURL(ctx context.Context, rawURL string, bundesland *Bundesland) (*AllowedSource, error) {
|
||||
return e.CheckSource(ctx, rawURL, bundesland)
|
||||
}
|
||||
|
||||
// GetCitationForURL generates a citation for a URL if required.
|
||||
func (e *Enforcer) GetCitationForURL(ctx context.Context, rawURL string, bundesland *Bundesland, title string, date string) (string, error) {
|
||||
source, err := e.CheckSource(ctx, rawURL, bundesland)
|
||||
if err != nil || source == nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if source.CitationTemplate == nil || *source.CitationTemplate == "" {
|
||||
// Default citation format
|
||||
return "Quelle: " + source.Name + ", " + title + ", " + date, nil
|
||||
}
|
||||
|
||||
// Replace placeholders in template
|
||||
citation := *source.CitationTemplate
|
||||
citation = strings.ReplaceAll(citation, "{title}", title)
|
||||
citation = strings.ReplaceAll(citation, "{date}", date)
|
||||
citation = strings.ReplaceAll(citation, "{url}", rawURL)
|
||||
citation = strings.ReplaceAll(citation, "{domain}", source.Domain)
|
||||
citation = strings.ReplaceAll(citation, "{source}", source.Name)
|
||||
|
||||
return citation, nil
|
||||
}
|
||||
Reference in New Issue
Block a user