""" PII Redactor Redacts Personally Identifiable Information (PII) from logs and responses. Essential for DSGVO/GDPR compliance in BreakPilot. Redacted data types: - Email addresses - IP addresses - German phone numbers - Names (when identified) - Student IDs - Credit card numbers - IBAN numbers Usage: from middleware import PIIRedactor, redact_pii # Use in logging logger.info(redact_pii(f"User {email} logged in from {ip}")) # Configure redactor redactor = PIIRedactor(patterns=["email", "ip", "phone"]) safe_message = redactor.redact(sensitive_message) """ import re from dataclasses import dataclass, field from typing import Dict, List, Optional, Pattern, Set @dataclass class PIIPattern: """Definition of a PII pattern.""" name: str pattern: Pattern replacement: str # Pre-compiled regex patterns for common PII PII_PATTERNS: Dict[str, PIIPattern] = { "email": PIIPattern( name="email", pattern=re.compile( r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', re.IGNORECASE ), replacement="[EMAIL_REDACTED]", ), "ip_v4": PIIPattern( name="ip_v4", pattern=re.compile( r'\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b' ), replacement="[IP_REDACTED]", ), "ip_v6": PIIPattern( name="ip_v6", pattern=re.compile( r'\b(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\b' ), replacement="[IP_REDACTED]", ), "phone_de": PIIPattern( name="phone_de", pattern=re.compile( r'(? str: """ Redact PII from the given text. Args: text: The text to redact PII from Returns: Text with PII replaced by redaction markers """ if not text: return text result = text for pattern in self._active_patterns: if self.preserve_format: # Replace with same-length placeholder def replace_preserve(match): length = len(match.group()) return "*" * length result = pattern.pattern.sub(replace_preserve, result) else: result = pattern.pattern.sub(pattern.replacement, result) return result def contains_pii(self, text: str) -> bool: """ Check if text contains any PII. Args: text: The text to check Returns: True if PII is detected """ if not text: return False for pattern in self._active_patterns: if pattern.pattern.search(text): return True return False def find_pii(self, text: str) -> List[Dict[str, str]]: """ Find all PII in text with their types. Args: text: The text to search Returns: List of dicts with 'type' and 'match' keys """ if not text: return [] findings = [] for pattern in self._active_patterns: for match in pattern.pattern.finditer(text): findings.append({ "type": pattern.name, "match": match.group(), "start": match.start(), "end": match.end(), }) return findings # Module-level default redactor instance _default_redactor: Optional[PIIRedactor] = None def get_default_redactor() -> PIIRedactor: """Get or create the default redactor instance.""" global _default_redactor if _default_redactor is None: _default_redactor = PIIRedactor() return _default_redactor def redact_pii(text: str) -> str: """ Convenience function to redact PII using the default redactor. Args: text: Text to redact Returns: Redacted text Example: logger.info(redact_pii(f"User {email} logged in")) """ return get_default_redactor().redact(text) class PIIRedactingLogFilter: """ Logging filter that automatically redacts PII from log messages. Usage: import logging handler = logging.StreamHandler() handler.addFilter(PIIRedactingLogFilter()) logger = logging.getLogger() logger.addHandler(handler) """ def __init__(self, redactor: Optional[PIIRedactor] = None): self.redactor = redactor or get_default_redactor() def filter(self, record): # Redact the message if record.msg: record.msg = self.redactor.redact(str(record.msg)) # Redact args if present if record.args: if isinstance(record.args, dict): record.args = { k: self.redactor.redact(str(v)) if isinstance(v, str) else v for k, v in record.args.items() } elif isinstance(record.args, tuple): record.args = tuple( self.redactor.redact(str(v)) if isinstance(v, str) else v for v in record.args ) return True def create_safe_dict(data: dict, redactor: Optional[PIIRedactor] = None) -> dict: """ Create a copy of a dictionary with PII redacted. Args: data: Dictionary to redact redactor: Optional custom redactor Returns: New dictionary with redacted values """ r = redactor or get_default_redactor() def redact_value(value): if isinstance(value, str): return r.redact(value) elif isinstance(value, dict): return create_safe_dict(value, r) elif isinstance(value, list): return [redact_value(v) for v in value] return value return {k: redact_value(v) for k, v in data.items()}