breakpilot-pwa/scripts/qwen_refactor_orchestrator.py

#!/usr/bin/env python3
"""
Qwen Refactoring Orchestrator
=============================
Orchestriert Code-Refactoring via Qwen2.5:32B auf dem Mac Mini.

Workflow:
1. Liest große Dateien in Chunks
2. Sendet Refactoring-Prompts an Qwen via Ollama API
3. Validiert und integriert Ergebnisse
4. Führt Tests aus

Usage:
    python qwen_refactor_orchestrator.py --file backend/frontend/static/js/studio.js
    python qwen_refactor_orchestrator.py --all-large-files
    python qwen_refactor_orchestrator.py --status
"""

import argparse
import asyncio
import json
import os
import re
import subprocess
import sys
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from typing import List, Optional, Dict, Any
import httpx

# Konfiguration
MAC_MINI_HOST = "mac-mini-von-benjamin.fritz.box"
OLLAMA_PORT = 11434
OLLAMA_URL = f"http://{MAC_MINI_HOST}:{OLLAMA_PORT}"
MODEL_NAME = "qwen2.5:32b"
MAX_CHUNK_LINES = 800  # ~1500 Tokens pro Chunk
MAX_CONTEXT_TOKENS = 28000  # Sicherheitsmarge für 32K Kontext

PROJECT_ROOT = Path(__file__).parent.parent
REFACTOR_OUTPUT_DIR = PROJECT_ROOT / "refactored"
REFACTOR_LOG_FILE = PROJECT_ROOT / "refactor_log.json"

# Top 10 große Dateien
LARGE_FILES = [
    ("backend/frontend/static/js/studio.js", 9787, "JavaScript"),
    ("website/components/admin/SystemInfoSection.tsx", 5690, "TypeScript/React"),
    ("backend/frontend/modules/companion.py", 5513, "Python"),
    ("backend/classroom_api.py", 4467, "Python"),
    ("backend/frontend/school.py", 3732, "Python"),
    ("backend/frontend/components/admin_panel.py", 3434, "Python"),
    ("backend/ai_processor.py", 2999, "Python"),
    ("website/app/admin/rag/page.tsx", 2964, "TypeScript/React"),
    ("backend/frontend/modules/alerts.py", 2902, "Python"),
    ("backend/frontend/meetings.py", 2847, "Python"),
]


@dataclass
class RefactorChunk:
    """Ein Chunk einer Datei für Refactoring"""
    file_path: str
    chunk_index: int
    total_chunks: int
    start_line: int
    end_line: int
    content: str
    language: str
    refactored_content: Optional[str] = None
    status: str = "pending"  # pending, processing, completed, failed
    error: Optional[str] = None


@dataclass
class RefactorSession:
    """Eine Refactoring-Session für eine Datei"""
    file_path: str
    language: str
    original_lines: int
    chunks: List[RefactorChunk] = field(default_factory=list)
    started_at: Optional[datetime] = None
    completed_at: Optional[datetime] = None
    status: str = "pending"
    tests_passed: Optional[bool] = None


class QwenRefactorOrchestrator:
    """Orchestriert Refactoring via Qwen auf Mac Mini"""

    def __init__(self):
        self.sessions: Dict[str, RefactorSession] = {}
        self.load_state()

    def load_state(self):
        """Lädt den Zustand aus der Log-Datei"""
        if REFACTOR_LOG_FILE.exists():
            try:
                with open(REFACTOR_LOG_FILE) as f:
                    data = json.load(f)
                    # Rekonstruiere Sessions aus JSON
                    for path, session_data in data.get("sessions", {}).items():
                        self.sessions[path] = RefactorSession(**session_data)
            except Exception as e:
                print(f"Warning: Could not load state: {e}")

    def save_state(self):
        """Speichert den Zustand in die Log-Datei"""
        data = {
            "sessions": {
                path: {
                    "file_path": s.file_path,
                    "language": s.language,
                    "original_lines": s.original_lines,
                    "status": s.status,
                    "started_at": s.started_at.isoformat() if s.started_at else None,
                    "completed_at": s.completed_at.isoformat() if s.completed_at else None,
                    "tests_passed": s.tests_passed,
                    "chunks": [
                        {
                            "chunk_index": c.chunk_index,
                            "total_chunks": c.total_chunks,
                            "start_line": c.start_line,
                            "end_line": c.end_line,
                            "status": c.status,
                            "error": c.error,
                        }
                        for c in s.chunks
                    ],
                }
                for path, s in self.sessions.items()
            },
            "last_updated": datetime.now().isoformat(),
        }
        with open(REFACTOR_LOG_FILE, "w") as f:
            json.dump(data, f, indent=2)

    async def check_ollama_status(self) -> Dict[str, Any]:
        """Prüft ob Ollama auf dem Mac Mini erreichbar ist"""
        try:
            async with httpx.AsyncClient(timeout=10.0) as client:
                response = await client.get(f"{OLLAMA_URL}/api/tags")
                if response.status_code == 200:
                    models = response.json().get("models", [])
                    qwen_available = any(m.get("name", "").startswith("qwen2.5:32b") for m in models)
                    return {
                        "status": "online",
                        "models": [m.get("name") for m in models],
                        "qwen_available": qwen_available,
                    }
        except Exception as e:
            return {"status": "offline", "error": str(e)}
        return {"status": "unknown"}

    def split_file_into_chunks(self, file_path: str, language: str) -> List[RefactorChunk]:
        """Teilt eine Datei in logische Chunks auf"""
        full_path = PROJECT_ROOT / file_path
        if not full_path.exists():
            raise FileNotFoundError(f"File not found: {full_path}")

        with open(full_path) as f:
            lines = f.readlines()

        chunks = []
        current_chunk_start = 0
        current_chunk_lines = []

        # Finde logische Trennstellen basierend auf Sprache
        if language == "Python":
            # Trenne bei Klassen und Top-Level-Funktionen
            split_pattern = re.compile(r"^(class |def |async def )")
        elif language in ["JavaScript", "TypeScript/React"]:
            # Trenne bei Funktionen, Klassen, und export statements
            split_pattern = re.compile(r"^(export |function |class |const \w+ = )")
        else:
            split_pattern = None

        for i, line in enumerate(lines):
            current_chunk_lines.append(line)

            # Prüfe ob wir splitten sollten
            should_split = False
            if len(current_chunk_lines) >= MAX_CHUNK_LINES:
                should_split = True
            elif split_pattern and i > current_chunk_start + 100:
                # Versuche an logischer Stelle zu trennen
                if split_pattern.match(line) and len(current_chunk_lines) > 200:
                    should_split = True

            if should_split or i == len(lines) - 1:
                chunk = RefactorChunk(
                    file_path=file_path,
                    chunk_index=len(chunks),
                    total_chunks=0,  # Wird später gesetzt
                    start_line=current_chunk_start + 1,
                    end_line=i + 1,
                    content="".join(current_chunk_lines),
                    language=language,
                )
                chunks.append(chunk)
                current_chunk_start = i + 1
                current_chunk_lines = []

        # Setze total_chunks
        for chunk in chunks:
            chunk.total_chunks = len(chunks)

        return chunks

    def create_refactoring_prompt(self, chunk: RefactorChunk, session: RefactorSession) -> str:
        """Erstellt den Refactoring-Prompt für Qwen"""
        return f"""Du bist ein erfahrener Software-Entwickler. Refaktoriere den folgenden {chunk.language}-Code.

ZIELE:
1. Teile große Funktionen in kleinere, wiederverwendbare Einheiten
2. Verbessere die Lesbarkeit durch bessere Variablennamen
3. Entferne doppelten Code (DRY-Prinzip)
4. Füge hilfreiche Kommentare hinzu (aber nicht übertreiben)
5. Behalte die gesamte Funktionalität bei!

REGELN:
- Keine neuen Dependencies hinzufügen
- Alle existierenden Exports/APIs beibehalten
- Keine Breaking Changes
- Code muss weiterhin funktionieren

DATEI: {chunk.file_path}
CHUNK: {chunk.chunk_index + 1}/{chunk.total_chunks} (Zeilen {chunk.start_line}-{chunk.end_line})
SPRACHE: {chunk.language}
ORIGINALZEILEN: {session.original_lines}

--- ORIGINAL CODE ---
{chunk.content}
--- END ORIGINAL CODE ---

Gib NUR den refaktorierten Code zurück, ohne Erklärungen oder Markdown-Blöcke.
"""

    async def refactor_chunk(self, chunk: RefactorChunk, session: RefactorSession) -> bool:
        """Refaktoriert einen einzelnen Chunk via Qwen"""
        chunk.status = "processing"
        self.save_state()

        prompt = self.create_refactoring_prompt(chunk, session)

        try:
            async with httpx.AsyncClient(timeout=300.0) as client:
                response = await client.post(
                    f"{OLLAMA_URL}/api/generate",
                    json={
                        "model": MODEL_NAME,
                        "prompt": prompt,
                        "stream": False,
                        "options": {
                            "num_predict": 4096,
                            "temperature": 0.3,
                        },
                    },
                )

                if response.status_code == 200:
                    result = response.json()
                    refactored = result.get("response", "")

                    # Entferne eventuelle Markdown-Code-Blöcke
                    refactored = re.sub(r"^```\w*\n", "", refactored)
                    refactored = re.sub(r"\n```$", "", refactored)

                    chunk.refactored_content = refactored
                    chunk.status = "completed"
                    self.save_state()
                    return True
                else:
                    chunk.status = "failed"
                    chunk.error = f"HTTP {response.status_code}: {response.text}"
                    self.save_state()
                    return False

        except Exception as e:
            chunk.status = "failed"
            chunk.error = str(e)
            self.save_state()
            return False

    async def refactor_file(self, file_path: str, language: str) -> RefactorSession:
        """Refaktoriert eine komplette Datei"""
        print(f"\n{'=' * 60}")
        print(f"Starte Refactoring: {file_path}")
        print(f"{'=' * 60}")

        # Erstelle Session
        full_path = PROJECT_ROOT / file_path
        with open(full_path) as f:
            original_lines = len(f.readlines())

        chunks = self.split_file_into_chunks(file_path, language)
        session = RefactorSession(
            file_path=file_path,
            language=language,
            original_lines=original_lines,
            chunks=chunks,
            started_at=datetime.now(),
            status="processing",
        )
        self.sessions[file_path] = session
        self.save_state()

        print(f"Datei aufgeteilt in {len(chunks)} Chunks")

        # Refaktoriere jeden Chunk
        for i, chunk in enumerate(chunks):
            print(f"\nChunk {i + 1}/{len(chunks)} (Zeilen {chunk.start_line}-{chunk.end_line})...")
            success = await self.refactor_chunk(chunk, session)
            if success:
                print(f"  ✓ Chunk {i + 1} refaktoriert")
            else:
                print(f"  ✗ Chunk {i + 1} fehlgeschlagen: {chunk.error}")

        # Prüfe ob alle Chunks erfolgreich waren
        all_success = all(c.status == "completed" for c in chunks)

        if all_success:
            # Kombiniere refaktorierten Code
            refactored_content = "\n".join(c.refactored_content for c in chunks if c.refactored_content)

            # Speichere refaktorierten Code
            output_dir = REFACTOR_OUTPUT_DIR / Path(file_path).parent
            output_dir.mkdir(parents=True, exist_ok=True)
            output_file = REFACTOR_OUTPUT_DIR / file_path

            with open(output_file, "w") as f:
                f.write(refactored_content)

            session.status = "completed"
            session.completed_at = datetime.now()
            print(f"\n✓ Refactoring abgeschlossen: {output_file}")
        else:
            session.status = "partial"
            failed_chunks = [c for c in chunks if c.status == "failed"]
            print(f"\n⚠ Refactoring teilweise abgeschlossen. {len(failed_chunks)} Chunks fehlgeschlagen.")

        self.save_state()
        return session

    async def run_tests(self, file_path: str) -> bool:
        """Führt Tests für die refaktorierte Datei aus"""
        print(f"\nFühre Tests aus für {file_path}...")

        # Bestimme Test-Kommando basierend auf Dateityp
        if file_path.endswith(".py"):
            # Python Tests
            test_cmd = ["python", "-m", "pytest", "-v", "--tb=short"]
            if "backend" in file_path:
                test_cmd.append("backend/tests/")
        elif file_path.endswith((".ts", ".tsx", ".js")):
            # TypeScript/JavaScript Tests
            if "website" in file_path:
                test_cmd = ["npm", "test", "--", "--passWithNoTests"]
        else:
            print("  Keine Tests für diesen Dateityp konfiguriert")
            return True

        try:
            result = subprocess.run(
                test_cmd,
                cwd=PROJECT_ROOT,
                capture_output=True,
                text=True,
                timeout=300,
            )
            if result.returncode == 0:
                print("  ✓ Tests bestanden")
                return True
            else:
                print(f"  ✗ Tests fehlgeschlagen:\n{result.stdout}\n{result.stderr}")
                return False
        except Exception as e:
            print(f"  ✗ Test-Ausführung fehlgeschlagen: {e}")
            return False

    def print_status(self):
        """Zeigt den aktuellen Status aller Sessions"""
        print("\n" + "=" * 70)
        print("QWEN REFACTORING STATUS")
        print("=" * 70)

        if not self.sessions:
            print("Keine aktiven Sessions")
            return

        for path, session in self.sessions.items():
            completed = sum(1 for c in session.chunks if c.status == "completed")
            failed = sum(1 for c in session.chunks if c.status == "failed")
            pending = sum(1 for c in session.chunks if c.status == "pending")

            status_icon = {
                "pending": "○",
                "processing": "◐",
                "completed": "●",
                "partial": "◑",
            }.get(session.status, "?")

            print(f"\n{status_icon} {path}")
            print(f"  Status: {session.status}")
            print(f"  Chunks: {completed}/{len(session.chunks)} completed, {failed} failed, {pending} pending")
            if session.started_at:
                print(f"  Gestartet: {session.started_at.strftime('%Y-%m-%d %H:%M')}")
            if session.completed_at:
                print(f"  Abgeschlossen: {session.completed_at.strftime('%Y-%m-%d %H:%M')}")
            if session.tests_passed is not None:
                print(f"  Tests: {'✓ Bestanden' if session.tests_passed else '✗ Fehlgeschlagen'}")


async def main():
    parser = argparse.ArgumentParser(description="Qwen Refactoring Orchestrator")
    parser.add_argument("--file", help="Einzelne Datei refaktorieren")
    parser.add_argument("--all-large-files", action="store_true", help="Alle großen Dateien refaktorieren")
    parser.add_argument("--status", action="store_true", help="Status anzeigen")
    parser.add_argument("--check-ollama", action="store_true", help="Ollama-Status prüfen")
    parser.add_argument("--run-tests", help="Tests für refaktorierte Datei ausführen")
    args = parser.parse_args()

    orchestrator = QwenRefactorOrchestrator()

    if args.status:
        orchestrator.print_status()
        return

    if args.check_ollama:
        print("Prüfe Ollama-Status auf Mac Mini...")
        status = await orchestrator.check_ollama_status()
        print(f"Status: {status['status']}")
        if status.get("models"):
            print(f"Verfügbare Modelle: {', '.join(status['models'])}")
        if status.get("qwen_available"):
            print("✓ Qwen2.5:32B ist verfügbar")
        else:
            print("✗ Qwen2.5:32B ist NICHT verfügbar")
        if status.get("error"):
            print(f"Fehler: {status['error']}")
        return

    if args.run_tests:
        success = await orchestrator.run_tests(args.run_tests)
        sys.exit(0 if success else 1)

    if args.file:
        # Finde Sprache für die Datei
        language = "Unknown"
        for f, _, lang in LARGE_FILES:
            if f == args.file:
                language = lang
                break
        if language == "Unknown":
            if args.file.endswith(".py"):
                language = "Python"
            elif args.file.endswith((".ts", ".tsx")):
                language = "TypeScript/React"
            elif args.file.endswith(".js"):
                language = "JavaScript"

        await orchestrator.refactor_file(args.file, language)
        return

    if args.all_large_files:
        print("Refaktoriere alle großen Dateien...")
        for file_path, lines, language in LARGE_FILES:
            try:
                await orchestrator.refactor_file(file_path, language)
            except Exception as e:
                print(f"Fehler bei {file_path}: {e}")
        return

    # Default: Status anzeigen
    orchestrator.print_status()


if __name__ == "__main__":
    asyncio.run(main())