""" SQLAlchemy Database Models für Alerts Agent. Persistiert Topics, Alerts, Rules und Profile in PostgreSQL. Nutzt die gleiche Base wie classroom_engine für konsistente Migrationen. """ from datetime import datetime from sqlalchemy import ( Column, String, Integer, Float, DateTime, JSON, Boolean, Text, Enum as SQLEnum, ForeignKey, Index ) from sqlalchemy.orm import relationship import enum import uuid # Import Base from classroom_engine for shared metadata from classroom_engine.database import Base class AlertSourceEnum(str, enum.Enum): """Quelle des Alerts.""" GOOGLE_ALERTS_RSS = "google_alerts_rss" GOOGLE_ALERTS_EMAIL = "google_alerts_email" RSS_FEED = "rss_feed" WEBHOOK = "webhook" MANUAL = "manual" class AlertStatusEnum(str, enum.Enum): """Verarbeitungsstatus des Alerts.""" NEW = "new" PROCESSED = "processed" DUPLICATE = "duplicate" SCORED = "scored" REVIEWED = "reviewed" ARCHIVED = "archived" class RelevanceDecisionEnum(str, enum.Enum): """Relevanz-Entscheidung.""" KEEP = "KEEP" DROP = "DROP" REVIEW = "REVIEW" class FeedTypeEnum(str, enum.Enum): """Typ der Feed-Quelle.""" RSS = "rss" EMAIL = "email" WEBHOOK = "webhook" class RuleActionEnum(str, enum.Enum): """Aktionen für Regeln.""" KEEP = "keep" DROP = "drop" TAG = "tag" EMAIL = "email" WEBHOOK = "webhook" SLACK = "slack" class ImportanceLevelEnum(str, enum.Enum): """5-stufige Wichtigkeitsskala für Guided Mode.""" INFO = "info" # 0.0-0.4 - Informativ PRUEFEN = "pruefen" # 0.4-0.6 - Zu prüfen WICHTIG = "wichtig" # 0.6-0.75 - Wichtig DRINGEND = "dringend" # 0.75-0.9 - Dringend KRITISCH = "kritisch" # 0.9-1.0 - Kritisch class AlertModeEnum(str, enum.Enum): """Modus für Alert-Nutzung.""" GUIDED = "guided" # Geführter Modus für Lehrer/Schulleitungen EXPERT = "expert" # Experten-Modus für IT-affine Nutzer class MigrationModeEnum(str, enum.Enum): """Wie wurden die Alerts migriert.""" FORWARD = "forward" # E-Mail-Weiterleitung IMPORT = "import" # RSS-Import RECONSTRUCTED = "reconstructed" # Automatisch rekonstruiert class DigestStatusEnum(str, enum.Enum): """Status der Digest-Generierung.""" PENDING = "pending" GENERATING = "generating" SENT = "sent" FAILED = "failed" class UserRoleEnum(str, enum.Enum): """Rolle des Nutzers für Template-Empfehlungen.""" LEHRKRAFT = "lehrkraft" SCHULLEITUNG = "schulleitung" IT_BEAUFTRAGTE = "it_beauftragte" class AlertTopicDB(Base): """ Alert Topic / Feed-Quelle. Repräsentiert eine Google Alert-Konfiguration oder einen RSS-Feed. """ __tablename__ = 'alert_topics' id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4())) user_id = Column(String(36), nullable=True, index=True) # Optional: Multi-User # Topic-Details name = Column(String(255), nullable=False) description = Column(Text, default="") # Feed-Konfiguration feed_url = Column(String(2000), nullable=True) feed_type = Column( SQLEnum(FeedTypeEnum), default=FeedTypeEnum.RSS, nullable=False ) # Scheduling is_active = Column(Boolean, default=True, index=True) fetch_interval_minutes = Column(Integer, default=60) last_fetched_at = Column(DateTime, nullable=True) last_fetch_error = Column(Text, nullable=True) # Statistiken total_items_fetched = Column(Integer, default=0) items_kept = Column(Integer, default=0) items_dropped = Column(Integer, default=0) # Timestamps created_at = Column(DateTime, default=datetime.utcnow) updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) # Relationships alerts = relationship("AlertItemDB", back_populates="topic", cascade="all, delete-orphan") rules = relationship("AlertRuleDB", back_populates="topic", cascade="all, delete-orphan") def __repr__(self): return f"" class AlertItemDB(Base): """ Einzelner Alert-Eintrag. Entspricht einem Artikel/Link aus Google Alerts oder RSS. """ __tablename__ = 'alert_items' id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4())) topic_id = Column(String(36), ForeignKey('alert_topics.id', ondelete='CASCADE'), nullable=False, index=True) # Content title = Column(Text, nullable=False) url = Column(String(2000), nullable=False) snippet = Column(Text, default="") article_text = Column(Text, nullable=True) # Volltext (optional) # Metadaten lang = Column(String(10), default="de") published_at = Column(DateTime, nullable=True, index=True) fetched_at = Column(DateTime, default=datetime.utcnow, index=True) processed_at = Column(DateTime, nullable=True) # Source source = Column( SQLEnum(AlertSourceEnum), default=AlertSourceEnum.GOOGLE_ALERTS_RSS, nullable=False ) # Deduplication url_hash = Column(String(64), unique=True, nullable=False, index=True) content_hash = Column(String(64), nullable=True) # SimHash für Fuzzy-Matching canonical_url = Column(String(2000), nullable=True) # Status status = Column( SQLEnum(AlertStatusEnum), default=AlertStatusEnum.NEW, nullable=False, index=True ) cluster_id = Column(String(36), nullable=True) # Gruppierung ähnlicher Alerts # Relevanz-Scoring relevance_score = Column(Float, nullable=True) relevance_decision = Column( SQLEnum(RelevanceDecisionEnum), nullable=True, index=True ) relevance_reasons = Column(JSON, default=list) # ["matches_priority", ...] relevance_summary = Column(Text, nullable=True) scored_by_model = Column(String(100), nullable=True) # "llama3.1:8b" scored_at = Column(DateTime, nullable=True) # User Actions user_marked_relevant = Column(Boolean, nullable=True) # Explizites Feedback user_tags = Column(JSON, default=list) # ["wichtig", "später lesen"] user_notes = Column(Text, nullable=True) # Guided Mode Fields (NEU) importance_level = Column( SQLEnum(ImportanceLevelEnum), nullable=True, index=True ) why_relevant = Column(Text, nullable=True) # "Warum relevant?" Erklärung next_steps = Column(JSON, default=list) # ["Schulleitung informieren", "Frist beachten"] action_deadline = Column(DateTime, nullable=True) # Falls es eine Frist gibt source_name = Column(String(255), nullable=True) # "Kultusministerium NRW" source_credibility = Column(String(50), default="official") # official, news, blog # Timestamps created_at = Column(DateTime, default=datetime.utcnow) updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) # Relationship topic = relationship("AlertTopicDB", back_populates="alerts") # Composite Index für häufige Queries __table_args__ = ( Index('ix_alert_items_topic_status', 'topic_id', 'status'), Index('ix_alert_items_topic_decision', 'topic_id', 'relevance_decision'), ) def __repr__(self): return f"" class AlertRuleDB(Base): """ Filterregel für Alerts. Definiert Bedingungen und Aktionen für automatische Verarbeitung. """ __tablename__ = 'alert_rules' id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4())) topic_id = Column(String(36), ForeignKey('alert_topics.id', ondelete='CASCADE'), nullable=True, index=True) user_id = Column(String(36), nullable=True, index=True) # Rule-Details name = Column(String(255), nullable=False) description = Column(Text, default="") # Bedingungen (als JSON) # Format: [{"field": "title", "op": "contains", "value": "..."}] conditions = Column(JSON, nullable=False, default=list) # Aktion action_type = Column( SQLEnum(RuleActionEnum), default=RuleActionEnum.KEEP, nullable=False ) action_config = Column(JSON, default=dict) # {"email": "x@y.z", "tags": [...]} # Priorisierung (höher = wird zuerst ausgeführt) priority = Column(Integer, default=0, index=True) is_active = Column(Boolean, default=True, index=True) # Statistiken match_count = Column(Integer, default=0) last_matched_at = Column(DateTime, nullable=True) # Timestamps created_at = Column(DateTime, default=datetime.utcnow) updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) # Relationship topic = relationship("AlertTopicDB", back_populates="rules") def __repr__(self): return f"" class AlertProfileDB(Base): """ Nutzer-Profil für Relevanz-Scoring. Speichert Prioritäten, Ausschlüsse und Lern-Beispiele. """ __tablename__ = 'alert_profiles' id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4())) user_id = Column(String(36), unique=True, nullable=True, index=True) # Name für Anzeige (falls mehrere Profile pro User) name = Column(String(255), default="Default") # Relevanz-Kriterien # Format: [{"label": "Inklusion", "weight": 0.9, "keywords": [...], "description": "..."}] priorities = Column(JSON, default=list) # Ausschluss-Keywords exclusions = Column(JSON, default=list) # ["Stellenanzeige", "Werbung"] # Few-Shot Beispiele für LLM # Format: [{"title": "...", "url": "...", "reason": "...", "added_at": "..."}] positive_examples = Column(JSON, default=list) negative_examples = Column(JSON, default=list) # Policies # Format: {"prefer_german_sources": true, "max_age_days": 30} policies = Column(JSON, default=dict) # Statistiken total_scored = Column(Integer, default=0) total_kept = Column(Integer, default=0) total_dropped = Column(Integer, default=0) accuracy_estimate = Column(Float, nullable=True) # Timestamps created_at = Column(DateTime, default=datetime.utcnow) updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) def __repr__(self): return f"" def get_prompt_context(self) -> str: """ Generiere Kontext für LLM-Prompt. Dieser Text wird in den System-Prompt des Relevanz-Scorers eingefügt. """ lines = ["## Relevanzprofil des Nutzers\n"] # Prioritäten if self.priorities: lines.append("### Prioritäten (Themen von Interesse):") for p in self.priorities: weight = p.get("weight", 0.5) weight_label = "Sehr wichtig" if weight > 0.7 else "Wichtig" if weight > 0.4 else "Interessant" lines.append(f"- **{p.get('label', 'Unbenannt')}** ({weight_label})") if p.get("description"): lines.append(f" {p['description']}") if p.get("keywords"): lines.append(f" Keywords: {', '.join(p['keywords'])}") lines.append("") # Ausschlüsse if self.exclusions: lines.append("### Ausschlüsse (ignorieren):") lines.append(f"Themen mit diesen Keywords: {', '.join(self.exclusions)}") lines.append("") # Positive Beispiele (letzte 5) if self.positive_examples: lines.append("### Beispiele für relevante Alerts:") for ex in self.positive_examples[-5:]: lines.append(f"- \"{ex.get('title', '')}\"") if ex.get("reason"): lines.append(f" Grund: {ex['reason']}") lines.append("") # Negative Beispiele (letzte 5) if self.negative_examples: lines.append("### Beispiele für irrelevante Alerts:") for ex in self.negative_examples[-5:]: lines.append(f"- \"{ex.get('title', '')}\"") if ex.get("reason"): lines.append(f" Grund: {ex['reason']}") lines.append("") # Policies if self.policies: lines.append("### Zusätzliche Regeln:") for key, value in self.policies.items(): lines.append(f"- {key}: {value}") return "\n".join(lines) @classmethod def create_default_education_profile(cls) -> "AlertProfileDB": """ Erstelle ein Standard-Profil für Bildungsthemen. """ return cls( name="Bildung Default", priorities=[ { "label": "Inklusion", "weight": 0.9, "keywords": ["inklusiv", "Förderbedarf", "Behinderung", "Barrierefreiheit"], "description": "Inklusive Bildung, Förderschulen, Nachteilsausgleich" }, { "label": "Datenschutz Schule", "weight": 0.85, "keywords": ["DSGVO", "Schülerfotos", "Einwilligung", "personenbezogene Daten"], "description": "DSGVO in Schulen, Datenschutz bei Klassenfotos" }, { "label": "Schulrecht Bayern", "weight": 0.8, "keywords": ["BayEUG", "Schulordnung", "Kultusministerium", "Bayern"], "description": "Bayerisches Schulrecht, Verordnungen" }, { "label": "Digitalisierung Schule", "weight": 0.7, "keywords": ["DigitalPakt", "Tablet-Klasse", "Lernplattform"], "description": "Digitale Medien im Unterricht" }, ], exclusions=["Stellenanzeige", "Praktikum gesucht", "Werbung", "Pressemitteilung"], policies={ "prefer_german_sources": True, "max_age_days": 30, "min_content_length": 100, } ) # ============================================================================ # DUAL-MODE SYSTEM: Templates, Subscriptions, Sources, Digests # ============================================================================ class AlertTemplateDB(Base): """ Vorkonfigurierte Alert-Templates (Playbooks). Für Guided Mode: Lehrer wählen 1-3 Templates statt RSS-Feeds zu konfigurieren. """ __tablename__ = 'alert_templates' id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4())) # Template-Identität slug = Column(String(100), unique=True, nullable=False) # "foerderprogramme", "abitur-updates" name = Column(String(255), nullable=False) # "Förderprogramme & Fristen" description = Column(Text, default="") # B1/B2 Deutsch, 1-2 Sätze icon = Column(String(50), default="") # Emoji: "💰", "📝", "⚖️" category = Column(String(100), default="") # "administration", "teaching", "it" # Zielgruppen (welche Rollen profitieren) target_roles = Column(JSON, default=list) # ["schulleitung", "lehrkraft"] # Template-Konfiguration topics_config = Column(JSON, default=list) # Vorkonfigurierte RSS-Feeds rules_config = Column(JSON, default=list) # Vorkonfigurierte Regeln profile_config = Column(JSON, default=dict) # Prioritäten/Ausschlüsse # Importance-Mapping (Score → 5 Stufen) importance_config = Column(JSON, default=dict) # {"critical": 0.90, "urgent": 0.75, ...} # Ausgabe-Einstellungen max_cards_per_day = Column(Integer, default=10) digest_enabled = Column(Boolean, default=True) digest_day = Column(String(20), default="monday") # Tag für wöchentlichen Digest # Lokalisierung language = Column(String(10), default="de") # Metadata is_active = Column(Boolean, default=True) is_premium = Column(Boolean, default=False) # Für kostenpflichtige Templates sort_order = Column(Integer, default=0) # Timestamps created_at = Column(DateTime, default=datetime.utcnow) updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) # Relationships subscriptions = relationship("UserAlertSubscriptionDB", back_populates="template") def __repr__(self): return f"" class AlertSourceDB(Base): """ Alert-Quelle für Migration bestehender Alerts. Unterstützt: E-Mail-Weiterleitung, RSS-Import, Rekonstruktion. """ __tablename__ = 'alert_sources' id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4())) tenant_id = Column(String(36), nullable=True, index=True) # Für Multi-Tenant user_id = Column(String(36), nullable=True, index=True) # Quellen-Typ source_type = Column( SQLEnum(FeedTypeEnum), default=FeedTypeEnum.RSS, nullable=False ) # Original-Bezeichnung (vom Kunden) original_label = Column(String(255), nullable=True) # "EU IT Ausschreibungen" # E-Mail-Weiterleitung inbound_address = Column(String(255), nullable=True, unique=True) # alerts+tenant123@breakpilot.app # RSS-Import rss_url = Column(String(2000), nullable=True) # Migration-Modus migration_mode = Column( SQLEnum(MigrationModeEnum), default=MigrationModeEnum.IMPORT, nullable=False ) # Verknüpfung zu erstelltem Topic topic_id = Column(String(36), ForeignKey('alert_topics.id', ondelete='SET NULL'), nullable=True) # Status is_active = Column(Boolean, default=True) items_received = Column(Integer, default=0) last_item_at = Column(DateTime, nullable=True) # Timestamps created_at = Column(DateTime, default=datetime.utcnow) updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) def __repr__(self): return f"" class UserAlertSubscriptionDB(Base): """ User-Subscription für Alert-Templates oder Expert-Profile. Speichert Modus-Wahl, Template-Verknüpfung und Wizard-Zustand. """ __tablename__ = 'user_alert_subscriptions' id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4())) user_id = Column(String(36), nullable=False, index=True) school_id = Column(String(36), nullable=True, index=True) # Optional: Schulkontext # Modus-Auswahl mode = Column( SQLEnum(AlertModeEnum), default=AlertModeEnum.GUIDED, nullable=False ) # Nutzer-Rolle (für Guided Mode) user_role = Column( SQLEnum(UserRoleEnum), nullable=True ) # Template-Verknüpfung (Guided Mode) - kann mehrere sein template_id = Column(String(36), ForeignKey('alert_templates.id', ondelete='SET NULL'), nullable=True) selected_template_ids = Column(JSON, default=list) # Bis zu 3 Templates # Profil-Verknüpfung (Expert Mode) profile_id = Column(String(36), ForeignKey('alert_profiles.id', ondelete='SET NULL'), nullable=True) # Subscription-Einstellungen is_active = Column(Boolean, default=True) notification_email = Column(String(255), nullable=True) # Digest-Präferenzen digest_enabled = Column(Boolean, default=True) digest_frequency = Column(String(20), default="weekly") # weekly, daily digest_day = Column(String(20), default="monday") last_digest_sent_at = Column(DateTime, nullable=True) # Wizard-Zustand (für unvollständige Setups) wizard_step = Column(Integer, default=0) wizard_completed = Column(Boolean, default=False) wizard_state = Column(JSON, default=dict) # Zwischenspeicher für Wizard-Daten # Timestamps created_at = Column(DateTime, default=datetime.utcnow) updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) # Relationships template = relationship("AlertTemplateDB", back_populates="subscriptions") profile = relationship("AlertProfileDB") digests = relationship("AlertDigestDB", back_populates="subscription", cascade="all, delete-orphan") def __repr__(self): return f"" class AlertDigestDB(Base): """ Wöchentliche Digest-Zusammenfassung. Enthält gerenderte Zusammenfassung + Statistiken. """ __tablename__ = 'alert_digests' id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4())) subscription_id = Column(String(36), ForeignKey('user_alert_subscriptions.id', ondelete='CASCADE'), nullable=False, index=True) user_id = Column(String(36), nullable=False, index=True) # Zeitraum period_start = Column(DateTime, nullable=False) period_end = Column(DateTime, nullable=False) # Content title = Column(String(255), default="") # "KW 3/2026 - Ihre Bildungs-Alerts" summary_html = Column(Text, default="") # Gerenderte HTML-Zusammenfassung summary_pdf_url = Column(String(500), nullable=True) # Link zum PDF-Export # Statistiken total_alerts = Column(Integer, default=0) kritisch_count = Column(Integer, default=0) dringend_count = Column(Integer, default=0) wichtig_count = Column(Integer, default=0) pruefen_count = Column(Integer, default=0) info_count = Column(Integer, default=0) # Enthaltene Alert-IDs alert_ids = Column(JSON, default=list) # Status status = Column( SQLEnum(DigestStatusEnum), default=DigestStatusEnum.PENDING, nullable=False ) sent_at = Column(DateTime, nullable=True) error_message = Column(Text, nullable=True) # Timestamps created_at = Column(DateTime, default=datetime.utcnow) # Relationships subscription = relationship("UserAlertSubscriptionDB", back_populates="digests") def __repr__(self): return f""