This repository has been archived on 2026-02-15. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
Benjamin Admin 21a844cb8a fix: Restore all files lost during destructive rebase
A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.

This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).

Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 09:51:32 +01:00

744 lines
26 KiB
Python

"""
Local LLM Component - Transformers.js + ONNX Integration.
Ermoeglicht Header-Extraktion aus Klausuren direkt im Browser:
- Laeuft vollstaendig lokal (Privacy-by-Design)
- ONNX Modell wird beim PWA-Install gecacht
- Extrahiert: Namen, Klasse, Fach, Datum
Architektur:
1. Service Worker cacht ONNX Modell (~100MB)
2. Transformers.js laedt Modell aus Cache
3. Header-Region wird per Canvas extrahiert
4. Vision-Modell extrahiert strukturierte Daten
"""
class LocalLLMComponent:
"""PWA Local LLM Component fuer Header-Extraktion."""
# Modell-Konfiguration
MODEL_ID = "breakpilot/exam-header-extractor"
MODEL_CACHE_NAME = "breakpilot-llm-v1"
MODEL_SIZE_MB = 100
@staticmethod
def get_css() -> str:
"""CSS fuer Local LLM UI-Elemente."""
return """
/* Local LLM Status Indicator */
.local-llm-status {
display: flex;
align-items: center;
gap: 8px;
padding: 8px 12px;
background: var(--bp-surface-elevated);
border-radius: 8px;
font-size: 13px;
}
.local-llm-status.loading {
background: var(--bp-warning-bg);
}
.local-llm-status.ready {
background: var(--bp-success-bg);
}
.local-llm-status.error {
background: var(--bp-error-bg);
}
.llm-status-dot {
width: 8px;
height: 8px;
border-radius: 50%;
background: var(--bp-text-muted);
}
.local-llm-status.loading .llm-status-dot {
background: var(--bp-warning);
animation: pulse 1.5s infinite;
}
.local-llm-status.ready .llm-status-dot {
background: var(--bp-success);
}
.local-llm-status.error .llm-status-dot {
background: var(--bp-error);
}
@keyframes pulse {
0%, 100% { opacity: 1; }
50% { opacity: 0.4; }
}
/* Extraction Progress */
.extraction-progress {
display: flex;
flex-direction: column;
gap: 12px;
padding: 16px;
background: var(--bp-surface);
border-radius: 12px;
border: 1px solid var(--bp-border);
}
.extraction-progress-bar {
height: 6px;
background: var(--bp-border);
border-radius: 3px;
overflow: hidden;
}
.extraction-progress-fill {
height: 100%;
background: linear-gradient(90deg, var(--bp-primary), var(--bp-accent));
border-radius: 3px;
transition: width 0.3s ease;
}
.extraction-stats {
display: flex;
justify-content: space-between;
font-size: 13px;
color: var(--bp-text-muted);
}
/* Instant Feedback Card */
.instant-feedback-card {
background: linear-gradient(135deg, var(--bp-primary-bg), var(--bp-accent-bg));
border-radius: 16px;
padding: 24px;
text-align: center;
animation: fadeInUp 0.5s ease;
}
@keyframes fadeInUp {
from {
opacity: 0;
transform: translateY(20px);
}
to {
opacity: 1;
transform: translateY(0);
}
}
.instant-feedback-card h3 {
margin: 0 0 16px 0;
font-size: 20px;
color: var(--bp-text);
}
.detected-info-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(120px, 1fr));
gap: 16px;
margin-top: 16px;
}
.detected-info-item {
background: var(--bp-surface);
border-radius: 12px;
padding: 16px;
text-align: center;
}
.detected-info-item .label {
font-size: 12px;
color: var(--bp-text-muted);
margin-bottom: 4px;
}
.detected-info-item .value {
font-size: 18px;
font-weight: 600;
color: var(--bp-text);
}
.detected-info-item .confidence {
font-size: 11px;
color: var(--bp-success);
margin-top: 4px;
}
"""
@staticmethod
def get_html() -> str:
"""HTML fuer Local LLM UI-Elemente (wird in Wizard eingebettet)."""
return """
<!-- Local LLM Status (oben im Wizard) -->
<div id="local-llm-status" class="local-llm-status" style="display: none;">
<span class="llm-status-dot"></span>
<span class="llm-status-text">KI-Modell wird geladen...</span>
</div>
<!-- Instant Feedback (nach Upload) -->
<div id="instant-feedback" class="instant-feedback-card" style="display: none;">
<h3>Automatisch erkannt</h3>
<div class="detected-info-grid">
<div class="detected-info-item">
<div class="label">Klasse</div>
<div class="value" id="detected-class">-</div>
<div class="confidence" id="detected-class-conf"></div>
</div>
<div class="detected-info-item">
<div class="label">Fach</div>
<div class="value" id="detected-subject">-</div>
<div class="confidence" id="detected-subject-conf"></div>
</div>
<div class="detected-info-item">
<div class="label">Schueler</div>
<div class="value" id="detected-count">-</div>
</div>
<div class="detected-info-item">
<div class="label">Datum</div>
<div class="value" id="detected-date">-</div>
</div>
</div>
</div>
<!-- Extraction Progress -->
<div id="extraction-progress" class="extraction-progress" style="display: none;">
<div class="extraction-progress-bar">
<div class="extraction-progress-fill" id="extraction-fill" style="width: 0%"></div>
</div>
<div class="extraction-stats">
<span id="extraction-current">0 / 0 analysiert</span>
<span id="extraction-time">~0 Sek verbleibend</span>
</div>
</div>
"""
@staticmethod
def get_js() -> str:
"""JavaScript fuer Transformers.js + ONNX Integration."""
return """
// ============================================================
// LOCAL LLM - Transformers.js + ONNX Header Extraction
// ============================================================
// Konfiguration
const LOCAL_LLM_CONFIG = {
// Transformers.js CDN
transformersUrl: 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.1',
// Modell fuer Header-Extraktion (Vision + Text)
// Option 1: Florence-2 (Microsoft) - gut fuer OCR + Strukturerkennung
// Option 2: PaddleOCR via ONNX - spezialisiert auf Handschrift
// Option 3: Custom fine-tuned Modell
modelId: 'Xenova/florence-2-base', // Fallback: Xenova/vit-gpt2-image-captioning
// Alternative: Lokaler OCR-basierter Ansatz
useOcrFallback: true,
// Cache
cacheName: 'breakpilot-llm-v1',
// Performance
maxParallelExtractions: 4,
headerRegionPercent: 0.20 // Top 20% der Seite
};
// State
let localLLMState = {
isLoading: false,
isReady: false,
error: null,
pipeline: null,
ocrWorker: null
};
// ============================================================
// INITIALIZATION
// ============================================================
/**
* Initialisiert das lokale LLM (Transformers.js).
* Wird beim ersten Magic-Onboarding-Start aufgerufen.
*/
async function initLocalLLM() {
if (localLLMState.isReady) return true;
if (localLLMState.isLoading) {
// Warten bis geladen
return new Promise((resolve) => {
const checkReady = setInterval(() => {
if (localLLMState.isReady) {
clearInterval(checkReady);
resolve(true);
}
}, 100);
});
}
localLLMState.isLoading = true;
updateLLMStatus('loading', 'KI-Modell wird geladen...');
try {
// Transformers.js dynamisch laden
if (!window.Transformers) {
await loadScript(LOCAL_LLM_CONFIG.transformersUrl);
}
// OCR Worker initialisieren (Tesseract.js als Fallback)
if (LOCAL_LLM_CONFIG.useOcrFallback) {
await initOCRWorker();
}
localLLMState.isReady = true;
localLLMState.isLoading = false;
updateLLMStatus('ready', 'KI bereit');
console.log('[LocalLLM] Initialisierung abgeschlossen');
return true;
} catch (error) {
console.error('[LocalLLM] Fehler bei Initialisierung:', error);
localLLMState.error = error;
localLLMState.isLoading = false;
updateLLMStatus('error', 'KI-Fehler: ' + error.message);
return false;
}
}
/**
* Initialisiert Tesseract.js als OCR-Fallback.
*/
async function initOCRWorker() {
// Tesseract.js fuer deutsche Handschrifterkennung
if (!window.Tesseract) {
await loadScript('https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/tesseract.min.js');
}
localLLMState.ocrWorker = await Tesseract.createWorker('deu', 1, {
logger: m => {
if (m.status === 'recognizing text') {
updateExtractionProgress(m.progress * 100);
}
}
});
console.log('[LocalLLM] OCR Worker initialisiert (Deutsch)');
}
// ============================================================
// HEADER EXTRACTION
// ============================================================
/**
* Extrahiert Header-Daten aus mehreren Bildern.
* @param {File[]} files - Array von Bild-Dateien
* @returns {Promise<ExtractionResult>}
*/
async function extractExamHeaders(files) {
if (!localLLMState.isReady) {
await initLocalLLM();
}
const startTime = Date.now();
showExtractionProgress(true);
const results = {
students: [],
detectedClass: null,
detectedSubject: null,
detectedDate: null,
classConfidence: 0,
subjectConfidence: 0,
errors: []
};
// Parallele Verarbeitung mit Limit
const batchSize = LOCAL_LLM_CONFIG.maxParallelExtractions;
for (let i = 0; i < files.length; i += batchSize) {
const batch = files.slice(i, Math.min(i + batchSize, files.length));
const batchResults = await Promise.all(
batch.map(file => extractSingleHeader(file))
);
// Ergebnisse aggregieren
for (const result of batchResults) {
if (result.error) {
results.errors.push(result.error);
continue;
}
if (result.studentName) {
results.students.push({
firstName: result.firstName || result.studentName,
lastNameHint: result.lastNameHint,
fullName: result.studentName,
confidence: result.nameConfidence
});
}
// Klasse/Fach/Datum aggregieren (hoechste Konfidenz gewinnt)
if (result.className && result.classConfidence > results.classConfidence) {
results.detectedClass = result.className;
results.classConfidence = result.classConfidence;
}
if (result.subject && result.subjectConfidence > results.subjectConfidence) {
results.detectedSubject = result.subject;
results.subjectConfidence = result.subjectConfidence;
}
if (result.date && !results.detectedDate) {
results.detectedDate = result.date;
}
}
// Progress Update
const progress = Math.min(100, ((i + batch.length) / files.length) * 100);
const elapsed = (Date.now() - startTime) / 1000;
const remaining = (elapsed / (i + batch.length)) * (files.length - i - batch.length);
updateExtractionProgress(progress, i + batch.length, files.length, remaining);
}
showExtractionProgress(false);
return results;
}
/**
* Extrahiert Header-Daten aus einem einzelnen Bild.
*/
async function extractSingleHeader(file) {
try {
// 1. Bild laden
const img = await loadImageFromFile(file);
// 2. Header-Region extrahieren (Top 20%)
const headerCanvas = extractHeaderRegion(img);
// 3. OCR auf Header-Region
const ocrResult = await performOCR(headerCanvas);
// 4. Strukturierte Daten extrahieren
const parsed = parseHeaderText(ocrResult.text);
return {
...parsed,
nameConfidence: ocrResult.confidence,
rawText: ocrResult.text
};
} catch (error) {
console.error('[LocalLLM] Fehler bei Extraktion:', error);
return { error: error.message };
}
}
/**
* Laedt ein Bild aus einer File.
*/
function loadImageFromFile(file) {
return new Promise((resolve, reject) => {
const img = new Image();
img.onload = () => resolve(img);
img.onerror = reject;
img.src = URL.createObjectURL(file);
});
}
/**
* Extrahiert die Header-Region (Top X%) aus einem Bild.
*/
function extractHeaderRegion(img) {
const canvas = document.createElement('canvas');
const ctx = canvas.getContext('2d');
const headerHeight = Math.floor(img.height * LOCAL_LLM_CONFIG.headerRegionPercent);
canvas.width = img.width;
canvas.height = headerHeight;
ctx.drawImage(img, 0, 0, img.width, headerHeight, 0, 0, img.width, headerHeight);
return canvas;
}
/**
* Fuehrt OCR auf einem Canvas aus.
*/
async function performOCR(canvas) {
if (!localLLMState.ocrWorker) {
throw new Error('OCR Worker nicht initialisiert');
}
const { data } = await localLLMState.ocrWorker.recognize(canvas);
return {
text: data.text,
confidence: data.confidence / 100,
words: data.words
};
}
/**
* Parst OCR-Text und extrahiert strukturierte Daten.
*/
function parseHeaderText(text) {
const result = {
studentName: null,
firstName: null,
lastNameHint: null,
className: null,
classConfidence: 0,
subject: null,
subjectConfidence: 0,
date: null
};
const lines = text.split('\\n').map(l => l.trim()).filter(l => l);
for (const line of lines) {
// Klassenname erkennen (z.B. "3a", "Klasse 10b", "Q1")
const classMatch = line.match(/\\b(Klasse\\s*)?(\\d{1,2}[a-zA-Z]?|Q[12]|E[1-2]|EF|[KG]\\d)\\b/i);
if (classMatch) {
result.className = classMatch[2] || classMatch[0];
result.classConfidence = 0.9;
}
// Fach erkennen
const subjects = {
'mathe': 'Mathematik',
'mathematik': 'Mathematik',
'deutsch': 'Deutsch',
'englisch': 'Englisch',
'english': 'Englisch',
'physik': 'Physik',
'chemie': 'Chemie',
'biologie': 'Biologie',
'bio': 'Biologie',
'geschichte': 'Geschichte',
'erdkunde': 'Erdkunde',
'geographie': 'Geographie',
'politik': 'Politik',
'kunst': 'Kunst',
'musik': 'Musik',
'sport': 'Sport',
'religion': 'Religion',
'ethik': 'Ethik',
'informatik': 'Informatik',
'latein': 'Latein',
'franzoesisch': 'Franzoesisch',
'französisch': 'Franzoesisch',
'spanisch': 'Spanisch'
};
const lowerLine = line.toLowerCase();
for (const [key, value] of Object.entries(subjects)) {
if (lowerLine.includes(key)) {
result.subject = value;
result.subjectConfidence = 0.95;
break;
}
}
// Datum erkennen (verschiedene Formate)
const datePatterns = [
/\\b(\\d{1,2})\\.(\\d{1,2})\\.(\\d{2,4})\\b/, // 12.01.2026
/\\b(\\d{1,2})\\s+(Januar|Februar|Maerz|April|Mai|Juni|Juli|August|September|Oktober|November|Dezember)\\s+(\\d{4})\\b/i
];
for (const pattern of datePatterns) {
const dateMatch = line.match(pattern);
if (dateMatch) {
result.date = dateMatch[0];
break;
}
}
// Name erkennen (erste Zeile mit "Name:" oder alleinstehender Name)
if (line.toLowerCase().includes('name')) {
const nameMatch = line.match(/name[:\\s]+(.+)/i);
if (nameMatch) {
result.studentName = nameMatch[1].trim();
parseStudentName(result);
}
} else if (!result.studentName && lines.indexOf(line) < 3) {
// Erste Zeilen koennen Namen sein
const potentialName = line.replace(/[^a-zA-ZaeoeueAeOeUess\\s.-]/g, '').trim();
if (potentialName.length >= 2 && potentialName.length <= 40) {
// Pruefen ob es wie ein Name aussieht (Gross-/Kleinschreibung)
if (/^[A-ZAEOEUE][a-zaeoeue]+/.test(potentialName)) {
result.studentName = potentialName;
parseStudentName(result);
}
}
}
}
return result;
}
/**
* Parst einen Schuelernamen in Vor- und Nachname.
*/
function parseStudentName(result) {
if (!result.studentName) return;
const name = result.studentName.trim();
const parts = name.split(/\\s+/);
if (parts.length === 1) {
// Nur Vorname
result.firstName = parts[0];
} else if (parts.length === 2) {
// Vorname Nachname oder Nachname, Vorname
if (parts[0].endsWith(',')) {
result.firstName = parts[1];
result.lastNameHint = parts[0].replace(',', '');
} else {
result.firstName = parts[0];
result.lastNameHint = parts[1];
}
} else {
// Mehrere Teile - erster ist Vorname
result.firstName = parts[0];
result.lastNameHint = parts.slice(1).join(' ');
}
// Abkuerzungen erkennen (z.B. "M." fuer Nachname)
if (result.lastNameHint && result.lastNameHint.match(/^[A-Z]\\.?$/)) {
result.lastNameHint = result.lastNameHint.replace('.', '') + '.';
}
}
// ============================================================
// UI HELPERS
// ============================================================
/**
* Aktualisiert den LLM-Status-Indikator.
*/
function updateLLMStatus(state, text) {
const statusEl = document.getElementById('local-llm-status');
if (!statusEl) return;
statusEl.style.display = 'flex';
statusEl.className = 'local-llm-status ' + state;
statusEl.querySelector('.llm-status-text').textContent = text;
}
/**
* Zeigt das Instant-Feedback-Panel.
*/
function showInstantFeedback(data) {
const feedbackEl = document.getElementById('instant-feedback');
if (!feedbackEl) return;
feedbackEl.style.display = 'block';
document.getElementById('detected-class').textContent = data.detectedClass || '-';
document.getElementById('detected-subject').textContent = data.detectedSubject || '-';
document.getElementById('detected-count').textContent = data.studentCount || '0';
document.getElementById('detected-date').textContent = data.detectedDate || '-';
// Konfidenz anzeigen
if (data.classConfidence) {
document.getElementById('detected-class-conf').textContent =
Math.round(data.classConfidence * 100) + '% sicher';
}
if (data.subjectConfidence) {
document.getElementById('detected-subject-conf').textContent =
Math.round(data.subjectConfidence * 100) + '% sicher';
}
}
/**
* Zeigt/versteckt den Extraktions-Fortschritt.
*/
function showExtractionProgress(show) {
const progressEl = document.getElementById('extraction-progress');
if (progressEl) {
progressEl.style.display = show ? 'flex' : 'none';
}
}
/**
* Aktualisiert den Extraktions-Fortschritt.
*/
function updateExtractionProgress(percent, current, total, remainingSec) {
const fillEl = document.getElementById('extraction-fill');
const currentEl = document.getElementById('extraction-current');
const timeEl = document.getElementById('extraction-time');
if (fillEl) fillEl.style.width = percent + '%';
if (currentEl && current !== undefined) {
currentEl.textContent = current + ' / ' + total + ' analysiert';
}
if (timeEl && remainingSec !== undefined) {
timeEl.textContent = '~' + Math.ceil(remainingSec) + ' Sek verbleibend';
}
}
/**
* Laedt ein Script dynamisch.
*/
function loadScript(url) {
return new Promise((resolve, reject) => {
const script = document.createElement('script');
script.src = url;
script.onload = resolve;
script.onerror = reject;
document.head.appendChild(script);
});
}
// ============================================================
// MAGIC ONBOARDING INTEGRATION
// ============================================================
/**
* Hauptfunktion fuer Magic-Analyse beim Upload.
* Wird von klausur_korrektur.py aufgerufen.
*/
async function magicAnalyzeExams(files) {
console.log('[MagicOnboarding] Starte Analyse von', files.length, 'Dateien');
// 1. LLM initialisieren
await initLocalLLM();
// 2. Quick Preview (erste 5 Dateien)
const quickFiles = files.slice(0, 5);
const quickResults = await extractExamHeaders(quickFiles);
// 3. Sofort Feedback zeigen (WOW-Effekt!)
showInstantFeedback({
detectedClass: quickResults.detectedClass,
detectedSubject: quickResults.detectedSubject,
studentCount: files.length,
detectedDate: quickResults.detectedDate,
classConfidence: quickResults.classConfidence,
subjectConfidence: quickResults.subjectConfidence
});
// 4. Rest im Hintergrund verarbeiten
if (files.length > 5) {
const remainingFiles = files.slice(5);
const remainingResults = await extractExamHeaders(remainingFiles);
// Ergebnisse mergen
quickResults.students = [
...quickResults.students,
...remainingResults.students
];
}
console.log('[MagicOnboarding] Analyse abgeschlossen:', quickResults);
return quickResults;
}
// Export fuer globalen Zugriff
window.LocalLLM = {
init: initLocalLLM,
extractHeaders: extractExamHeaders,
magicAnalyze: magicAnalyzeExams,
getStatus: () => localLLMState
};
"""