fix: Restore all files lost during destructive rebase
A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.
This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).
Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
276
ai-compliance-sdk/internal/llm/pii_detector.go
Normal file
276
ai-compliance-sdk/internal/llm/pii_detector.go
Normal file
@@ -0,0 +1,276 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/breakpilot/ai-compliance-sdk/internal/rbac"
|
||||
)
|
||||
|
||||
// PIIType represents a type of personally identifiable information
|
||||
type PIIType string
|
||||
|
||||
const (
|
||||
PIITypeEmail PIIType = "email"
|
||||
PIITypePhone PIIType = "phone"
|
||||
PIITypeIPv4 PIIType = "ip_v4"
|
||||
PIITypeIPv6 PIIType = "ip_v6"
|
||||
PIITypeIBAN PIIType = "iban"
|
||||
PIITypeUUID PIIType = "uuid"
|
||||
PIITypeName PIIType = "name"
|
||||
PIITypeSocialSec PIIType = "social_security"
|
||||
PIITypeCreditCard PIIType = "credit_card"
|
||||
PIITypeDateOfBirth PIIType = "date_of_birth"
|
||||
PIITypeSalary PIIType = "salary"
|
||||
PIITypeAddress PIIType = "address"
|
||||
)
|
||||
|
||||
// PIIPattern defines a pattern for identifying PII
|
||||
type PIIPattern struct {
|
||||
Type PIIType
|
||||
Pattern *regexp.Regexp
|
||||
Replacement string
|
||||
Level rbac.PIIRedactionLevel // Minimum level at which this is redacted
|
||||
}
|
||||
|
||||
// PIIFinding represents a found PII instance
|
||||
type PIIFinding struct {
|
||||
Type string `json:"type"`
|
||||
Match string `json:"match"`
|
||||
Start int `json:"start"`
|
||||
End int `json:"end"`
|
||||
}
|
||||
|
||||
// PIIDetector detects and redacts personally identifiable information
|
||||
type PIIDetector struct {
|
||||
patterns []*PIIPattern
|
||||
}
|
||||
|
||||
// Pre-compiled patterns for common PII types
|
||||
var (
|
||||
emailPattern = regexp.MustCompile(`\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b`)
|
||||
ipv4Pattern = regexp.MustCompile(`\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b`)
|
||||
ipv6Pattern = regexp.MustCompile(`\b(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\b`)
|
||||
phonePattern = regexp.MustCompile(`(?:\+49|0049)[\s.-]?\d{2,4}[\s.-]?\d{3,8}|\b0\d{2,4}[\s.-]?\d{3,8}\b|\b\+\d{1,3}[\s.-]?\d{2,4}[\s.-]?\d{3,8}\b`)
|
||||
ibanPattern = regexp.MustCompile(`(?i)\b[A-Z]{2}\d{2}[\s]?(?:\d{4}[\s]?){3,5}\d{1,4}\b`)
|
||||
uuidPattern = regexp.MustCompile(`(?i)\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b`)
|
||||
namePattern = regexp.MustCompile(`\b(?:Herr|Frau|Hr\.|Fr\.|Mr\.|Mrs\.|Ms\.)\s+[A-ZÄÖÜ][a-zäöüß]+(?:\s+[A-ZÄÖÜ][a-zäöüß]+)?\b`)
|
||||
creditCardPattern = regexp.MustCompile(`\b(?:\d{4}[\s-]?){3}\d{4}\b`)
|
||||
dobPattern = regexp.MustCompile(`\b(?:0[1-9]|[12][0-9]|3[01])\.(?:0[1-9]|1[012])\.(?:19|20)\d{2}\b`)
|
||||
salaryPattern = regexp.MustCompile(`(?i)(?:gehalt|salary|lohn|vergütung|einkommen)[:\s]+(?:€|EUR|USD|\$)?\s*[\d.,]+(?:\s*(?:€|EUR|USD|\$))?`)
|
||||
addressPattern = regexp.MustCompile(`(?i)\b(?:str\.|straße|strasse|weg|platz|allee)\s+\d+[a-z]?\b`)
|
||||
)
|
||||
|
||||
// NewPIIDetector creates a new PII detector with default patterns
|
||||
func NewPIIDetector() *PIIDetector {
|
||||
return &PIIDetector{
|
||||
patterns: DefaultPIIPatterns(),
|
||||
}
|
||||
}
|
||||
|
||||
// NewPIIDetectorWithPatterns creates a new PII detector with custom patterns
|
||||
func NewPIIDetectorWithPatterns(patterns []*PIIPattern) *PIIDetector {
|
||||
return &PIIDetector{
|
||||
patterns: patterns,
|
||||
}
|
||||
}
|
||||
|
||||
// DefaultPIIPatterns returns the default set of PII patterns
|
||||
func DefaultPIIPatterns() []*PIIPattern {
|
||||
return []*PIIPattern{
|
||||
{Type: PIITypeEmail, Pattern: emailPattern, Replacement: "[EMAIL_REDACTED]", Level: rbac.PIIRedactionMinimal},
|
||||
{Type: PIITypeIPv4, Pattern: ipv4Pattern, Replacement: "[IP_REDACTED]", Level: rbac.PIIRedactionMinimal},
|
||||
{Type: PIITypeIPv6, Pattern: ipv6Pattern, Replacement: "[IP_REDACTED]", Level: rbac.PIIRedactionMinimal},
|
||||
{Type: PIITypePhone, Pattern: phonePattern, Replacement: "[PHONE_REDACTED]", Level: rbac.PIIRedactionMinimal},
|
||||
}
|
||||
}
|
||||
|
||||
// AllPIIPatterns returns all available PII patterns
|
||||
func AllPIIPatterns() []*PIIPattern {
|
||||
return []*PIIPattern{
|
||||
{Type: PIITypeEmail, Pattern: emailPattern, Replacement: "[EMAIL_REDACTED]", Level: rbac.PIIRedactionMinimal},
|
||||
{Type: PIITypeIPv4, Pattern: ipv4Pattern, Replacement: "[IP_REDACTED]", Level: rbac.PIIRedactionMinimal},
|
||||
{Type: PIITypeIPv6, Pattern: ipv6Pattern, Replacement: "[IP_REDACTED]", Level: rbac.PIIRedactionMinimal},
|
||||
{Type: PIITypePhone, Pattern: phonePattern, Replacement: "[PHONE_REDACTED]", Level: rbac.PIIRedactionMinimal},
|
||||
{Type: PIITypeIBAN, Pattern: ibanPattern, Replacement: "[IBAN_REDACTED]", Level: rbac.PIIRedactionModerate},
|
||||
{Type: PIITypeUUID, Pattern: uuidPattern, Replacement: "[UUID_REDACTED]", Level: rbac.PIIRedactionStrict},
|
||||
{Type: PIITypeName, Pattern: namePattern, Replacement: "[NAME_REDACTED]", Level: rbac.PIIRedactionModerate},
|
||||
{Type: PIITypeCreditCard, Pattern: creditCardPattern, Replacement: "[CARD_REDACTED]", Level: rbac.PIIRedactionMinimal},
|
||||
{Type: PIITypeDateOfBirth, Pattern: dobPattern, Replacement: "[DOB_REDACTED]", Level: rbac.PIIRedactionModerate},
|
||||
{Type: PIITypeSalary, Pattern: salaryPattern, Replacement: "[SALARY_REDACTED]", Level: rbac.PIIRedactionStrict},
|
||||
{Type: PIITypeAddress, Pattern: addressPattern, Replacement: "[ADDRESS_REDACTED]", Level: rbac.PIIRedactionModerate},
|
||||
}
|
||||
}
|
||||
|
||||
// FindPII finds all PII in the text
|
||||
func (d *PIIDetector) FindPII(text string) []PIIFinding {
|
||||
if text == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
var findings []PIIFinding
|
||||
for _, pattern := range d.patterns {
|
||||
matches := pattern.Pattern.FindAllStringIndex(text, -1)
|
||||
for _, match := range matches {
|
||||
findings = append(findings, PIIFinding{
|
||||
Type: string(pattern.Type),
|
||||
Match: text[match[0]:match[1]],
|
||||
Start: match[0],
|
||||
End: match[1],
|
||||
})
|
||||
}
|
||||
}
|
||||
return findings
|
||||
}
|
||||
|
||||
// ContainsPII checks if the text contains any PII
|
||||
func (d *PIIDetector) ContainsPII(text string) bool {
|
||||
if text == "" {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, pattern := range d.patterns {
|
||||
if pattern.Pattern.MatchString(text) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Redact removes PII from the given text based on redaction level
|
||||
func (d *PIIDetector) Redact(text string, level rbac.PIIRedactionLevel) string {
|
||||
if text == "" || level == rbac.PIIRedactionNone {
|
||||
return text
|
||||
}
|
||||
|
||||
result := text
|
||||
for _, pattern := range d.patterns {
|
||||
if d.shouldRedactAtLevel(pattern.Level, level) {
|
||||
result = pattern.Pattern.ReplaceAllString(result, pattern.Replacement)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// shouldRedactAtLevel determines if a pattern should be applied at the given level
|
||||
func (d *PIIDetector) shouldRedactAtLevel(patternLevel, requestedLevel rbac.PIIRedactionLevel) bool {
|
||||
levelOrder := map[rbac.PIIRedactionLevel]int{
|
||||
rbac.PIIRedactionNone: 0,
|
||||
rbac.PIIRedactionMinimal: 1,
|
||||
rbac.PIIRedactionModerate: 2,
|
||||
rbac.PIIRedactionStrict: 3,
|
||||
}
|
||||
|
||||
return levelOrder[requestedLevel] >= levelOrder[patternLevel]
|
||||
}
|
||||
|
||||
// RedactMap redacts PII from all string values in a map
|
||||
func (d *PIIDetector) RedactMap(data map[string]any, level rbac.PIIRedactionLevel) map[string]any {
|
||||
result := make(map[string]any)
|
||||
for key, value := range data {
|
||||
switch v := value.(type) {
|
||||
case string:
|
||||
result[key] = d.Redact(v, level)
|
||||
case map[string]any:
|
||||
result[key] = d.RedactMap(v, level)
|
||||
case []any:
|
||||
result[key] = d.redactSlice(v, level)
|
||||
default:
|
||||
result[key] = v
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (d *PIIDetector) redactSlice(data []any, level rbac.PIIRedactionLevel) []any {
|
||||
result := make([]any, len(data))
|
||||
for i, value := range data {
|
||||
switch v := value.(type) {
|
||||
case string:
|
||||
result[i] = d.Redact(v, level)
|
||||
case map[string]any:
|
||||
result[i] = d.RedactMap(v, level)
|
||||
case []any:
|
||||
result[i] = d.redactSlice(v, level)
|
||||
default:
|
||||
result[i] = v
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// SafeLogString creates a safe-to-log version of a string
|
||||
func (d *PIIDetector) SafeLogString(text string) string {
|
||||
return d.Redact(text, rbac.PIIRedactionStrict)
|
||||
}
|
||||
|
||||
// DetectDataCategories attempts to detect data categories in text
|
||||
func (d *PIIDetector) DetectDataCategories(text string) []string {
|
||||
if text == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
var categories []string
|
||||
textLower := strings.ToLower(text)
|
||||
|
||||
// Salary detection
|
||||
if salaryPattern.MatchString(text) || strings.Contains(textLower, "gehalt") || strings.Contains(textLower, "salary") {
|
||||
categories = append(categories, "salary")
|
||||
}
|
||||
|
||||
// Health detection
|
||||
healthKeywords := []string{"diagnose", "krankheit", "medikament", "therapie", "arzt", "krankenhaus",
|
||||
"health", "medical", "diagnosis", "treatment", "hospital"}
|
||||
for _, kw := range healthKeywords {
|
||||
if strings.Contains(textLower, kw) {
|
||||
categories = append(categories, "health")
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Financial detection
|
||||
if ibanPattern.MatchString(text) || creditCardPattern.MatchString(text) ||
|
||||
strings.Contains(textLower, "konto") || strings.Contains(textLower, "bank") {
|
||||
categories = append(categories, "financial")
|
||||
}
|
||||
|
||||
// Personal detection (names, addresses, DOB)
|
||||
if namePattern.MatchString(text) || addressPattern.MatchString(text) || dobPattern.MatchString(text) {
|
||||
categories = append(categories, "personal")
|
||||
}
|
||||
|
||||
// HR detection
|
||||
hrKeywords := []string{"mitarbeiter", "employee", "kündigung", "termination", "beförderung", "promotion",
|
||||
"leistungsbeurteilung", "performance review", "personalakte"}
|
||||
for _, kw := range hrKeywords {
|
||||
if strings.Contains(textLower, kw) {
|
||||
categories = append(categories, "hr")
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return categories
|
||||
}
|
||||
|
||||
// Global default detector
|
||||
var defaultDetector = NewPIIDetectorWithPatterns(AllPIIPatterns())
|
||||
|
||||
// RedactPII is a convenience function using the default detector
|
||||
func RedactPII(text string, level rbac.PIIRedactionLevel) string {
|
||||
return defaultDetector.Redact(text, level)
|
||||
}
|
||||
|
||||
// ContainsPIIDefault checks if text contains PII using default patterns
|
||||
func ContainsPIIDefault(text string) bool {
|
||||
return defaultDetector.ContainsPII(text)
|
||||
}
|
||||
|
||||
// FindPIIDefault finds PII using default patterns
|
||||
func FindPIIDefault(text string) []PIIFinding {
|
||||
return defaultDetector.FindPII(text)
|
||||
}
|
||||
|
||||
// DetectDataCategoriesDefault detects data categories using default detector
|
||||
func DetectDataCategoriesDefault(text string) []string {
|
||||
return defaultDetector.DetectDataCategories(text)
|
||||
}
|
||||
Reference in New Issue
Block a user