Files
breakpilot-lehrer/klausur-service/backend/vocab_session_store.py
Benjamin Boenisch 5a31f52310 Initial commit: breakpilot-lehrer - Lehrer KI Platform
Services: Admin-Lehrer, Backend-Lehrer, Studio v2, Website,
Klausur-Service, School-Service, Voice-Service, Geo-Service,
BreakPilot Drive, Agent-Core

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 23:47:26 +01:00

429 lines
13 KiB
Python

"""
Vocabulary Session Store - PostgreSQL persistence for vocab extraction sessions.
Replaces in-memory storage with database persistence.
See migrations/001_vocab_sessions.sql for schema.
"""
import os
import uuid
import logging
import json
from typing import Optional, List, Dict, Any
from datetime import datetime
import asyncpg
logger = logging.getLogger(__name__)
# Database configuration
DATABASE_URL = os.getenv(
"DATABASE_URL",
"postgresql://breakpilot:breakpilot@postgres:5432/breakpilot_db"
)
# Connection pool (initialized lazily)
_pool: Optional[asyncpg.Pool] = None
async def get_pool() -> asyncpg.Pool:
"""Get or create the database connection pool."""
global _pool
if _pool is None:
_pool = await asyncpg.create_pool(DATABASE_URL, min_size=2, max_size=10)
return _pool
async def init_vocab_tables():
"""
Initialize vocab tables if they don't exist.
This is called at startup.
"""
pool = await get_pool()
async with pool.acquire() as conn:
# Check if tables exist
tables_exist = await conn.fetchval("""
SELECT EXISTS (
SELECT FROM information_schema.tables
WHERE table_name = 'vocab_sessions'
)
""")
if not tables_exist:
logger.info("Creating vocab tables...")
# Read and execute migration
migration_path = os.path.join(
os.path.dirname(__file__),
"migrations/001_vocab_sessions.sql"
)
if os.path.exists(migration_path):
with open(migration_path, "r") as f:
sql = f.read()
await conn.execute(sql)
logger.info("Vocab tables created successfully")
else:
logger.warning(f"Migration file not found: {migration_path}")
else:
logger.debug("Vocab tables already exist")
# =============================================================================
# SESSION OPERATIONS
# =============================================================================
async def create_session_db(
session_id: str,
name: str,
description: str = "",
source_language: str = "en",
target_language: str = "de"
) -> Dict[str, Any]:
"""Create a new vocabulary session in the database."""
pool = await get_pool()
async with pool.acquire() as conn:
row = await conn.fetchrow("""
INSERT INTO vocab_sessions (
id, name, description, source_language, target_language,
status, vocabulary_count
) VALUES ($1, $2, $3, $4, $5, 'pending', 0)
RETURNING *
""", uuid.UUID(session_id), name, description, source_language, target_language)
return _row_to_dict(row)
async def get_session_db(session_id: str) -> Optional[Dict[str, Any]]:
"""Get a session by ID."""
pool = await get_pool()
async with pool.acquire() as conn:
row = await conn.fetchrow("""
SELECT * FROM vocab_sessions WHERE id = $1
""", uuid.UUID(session_id))
if row:
return _row_to_dict(row)
return None
async def list_sessions_db(
limit: int = 50,
offset: int = 0,
status: Optional[str] = None
) -> List[Dict[str, Any]]:
"""List all sessions with optional filtering."""
pool = await get_pool()
async with pool.acquire() as conn:
if status:
rows = await conn.fetch("""
SELECT * FROM vocab_sessions
WHERE status = $1
ORDER BY created_at DESC
LIMIT $2 OFFSET $3
""", status, limit, offset)
else:
rows = await conn.fetch("""
SELECT * FROM vocab_sessions
ORDER BY created_at DESC
LIMIT $1 OFFSET $2
""", limit, offset)
return [_row_to_dict(row) for row in rows]
async def update_session_db(
session_id: str,
**kwargs
) -> Optional[Dict[str, Any]]:
"""Update a session with given fields."""
pool = await get_pool()
# Build dynamic UPDATE query
fields = []
values = []
param_idx = 1
allowed_fields = [
'name', 'description', 'status', 'vocabulary_count',
'extraction_confidence', 'image_path', 'pdf_path', 'pdf_page_count',
'ocr_prompts', 'processed_pages', 'successful_pages', 'failed_pages'
]
for key, value in kwargs.items():
if key in allowed_fields:
fields.append(f"{key} = ${param_idx}")
# Convert dicts/lists to JSON for JSONB columns
if key in ['ocr_prompts', 'processed_pages', 'successful_pages', 'failed_pages']:
value = json.dumps(value) if value else None
values.append(value)
param_idx += 1
if not fields:
return await get_session_db(session_id)
values.append(uuid.UUID(session_id))
async with pool.acquire() as conn:
row = await conn.fetchrow(f"""
UPDATE vocab_sessions
SET {', '.join(fields)}
WHERE id = ${param_idx}
RETURNING *
""", *values)
if row:
return _row_to_dict(row)
return None
async def delete_session_db(session_id: str) -> bool:
"""Delete a session and all related data (cascades)."""
pool = await get_pool()
async with pool.acquire() as conn:
result = await conn.execute("""
DELETE FROM vocab_sessions WHERE id = $1
""", uuid.UUID(session_id))
return result == "DELETE 1"
# =============================================================================
# VOCABULARY OPERATIONS
# =============================================================================
async def add_vocabulary_db(
session_id: str,
vocab_list: List[Dict[str, Any]]
) -> List[Dict[str, Any]]:
"""Add vocabulary entries to a session."""
if not vocab_list:
return []
pool = await get_pool()
results = []
async with pool.acquire() as conn:
for vocab in vocab_list:
vocab_id = str(uuid.uuid4())
row = await conn.fetchrow("""
INSERT INTO vocab_entries (
id, session_id, english, german, example_sentence,
example_sentence_gap, word_type, source_page
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
RETURNING *
""",
uuid.UUID(vocab_id),
uuid.UUID(session_id),
vocab.get('english', ''),
vocab.get('german', ''),
vocab.get('example_sentence'),
vocab.get('example_sentence_gap'),
vocab.get('word_type'),
vocab.get('source_page')
)
results.append(_row_to_dict(row))
# Update vocabulary count
await conn.execute("""
UPDATE vocab_sessions
SET vocabulary_count = (
SELECT COUNT(*) FROM vocab_entries WHERE session_id = $1
)
WHERE id = $1
""", uuid.UUID(session_id))
return results
async def get_vocabulary_db(
session_id: str,
source_page: Optional[int] = None
) -> List[Dict[str, Any]]:
"""Get vocabulary entries for a session."""
pool = await get_pool()
async with pool.acquire() as conn:
if source_page is not None:
rows = await conn.fetch("""
SELECT * FROM vocab_entries
WHERE session_id = $1 AND source_page = $2
ORDER BY created_at
""", uuid.UUID(session_id), source_page)
else:
rows = await conn.fetch("""
SELECT * FROM vocab_entries
WHERE session_id = $1
ORDER BY source_page NULLS LAST, created_at
""", uuid.UUID(session_id))
return [_row_to_dict(row) for row in rows]
async def update_vocabulary_db(
entry_id: str,
**kwargs
) -> Optional[Dict[str, Any]]:
"""Update a single vocabulary entry."""
pool = await get_pool()
fields = []
values = []
param_idx = 1
allowed_fields = [
'english', 'german', 'example_sentence', 'example_sentence_gap',
'word_type', 'source_page'
]
for key, value in kwargs.items():
if key in allowed_fields:
fields.append(f"{key} = ${param_idx}")
values.append(value)
param_idx += 1
if not fields:
return None
values.append(uuid.UUID(entry_id))
async with pool.acquire() as conn:
row = await conn.fetchrow(f"""
UPDATE vocab_entries
SET {', '.join(fields)}
WHERE id = ${param_idx}
RETURNING *
""", *values)
if row:
return _row_to_dict(row)
return None
async def clear_page_vocabulary_db(session_id: str, page: int) -> int:
"""Clear all vocabulary for a specific page."""
pool = await get_pool()
async with pool.acquire() as conn:
result = await conn.execute("""
DELETE FROM vocab_entries
WHERE session_id = $1 AND source_page = $2
""", uuid.UUID(session_id), page)
# Update vocabulary count
await conn.execute("""
UPDATE vocab_sessions
SET vocabulary_count = (
SELECT COUNT(*) FROM vocab_entries WHERE session_id = $1
)
WHERE id = $1
""", uuid.UUID(session_id))
# Return count of deleted rows
count = int(result.split()[-1]) if result else 0
return count
# =============================================================================
# WORKSHEET OPERATIONS
# =============================================================================
async def create_worksheet_db(
session_id: str,
worksheet_types: List[str],
pdf_path: Optional[str] = None,
solution_path: Optional[str] = None
) -> Dict[str, Any]:
"""Create a worksheet record."""
pool = await get_pool()
worksheet_id = str(uuid.uuid4())
async with pool.acquire() as conn:
row = await conn.fetchrow("""
INSERT INTO vocab_worksheets (
id, session_id, worksheet_types, pdf_path, solution_path
) VALUES ($1, $2, $3, $4, $5)
RETURNING *
""",
uuid.UUID(worksheet_id),
uuid.UUID(session_id),
json.dumps(worksheet_types),
pdf_path,
solution_path
)
return _row_to_dict(row)
async def get_worksheet_db(worksheet_id: str) -> Optional[Dict[str, Any]]:
"""Get a worksheet by ID."""
pool = await get_pool()
async with pool.acquire() as conn:
row = await conn.fetchrow("""
SELECT * FROM vocab_worksheets WHERE id = $1
""", uuid.UUID(worksheet_id))
if row:
return _row_to_dict(row)
return None
async def delete_worksheets_for_session_db(session_id: str) -> int:
"""Delete all worksheets for a session."""
pool = await get_pool()
async with pool.acquire() as conn:
result = await conn.execute("""
DELETE FROM vocab_worksheets WHERE session_id = $1
""", uuid.UUID(session_id))
count = int(result.split()[-1]) if result else 0
return count
# =============================================================================
# PDF CACHE OPERATIONS
# =============================================================================
# Simple in-memory cache for PDF data (temporary until served)
_pdf_cache: Dict[str, bytes] = {}
def cache_pdf_data(worksheet_id: str, pdf_data: bytes) -> None:
"""Cache PDF data temporarily for download."""
_pdf_cache[worksheet_id] = pdf_data
def get_cached_pdf_data(worksheet_id: str) -> Optional[bytes]:
"""Get cached PDF data."""
return _pdf_cache.get(worksheet_id)
def clear_cached_pdf_data(worksheet_id: str) -> None:
"""Clear cached PDF data."""
_pdf_cache.pop(worksheet_id, None)
# =============================================================================
# HELPER FUNCTIONS
# =============================================================================
def _row_to_dict(row: asyncpg.Record) -> Dict[str, Any]:
"""Convert asyncpg Record to dict with proper type handling."""
if row is None:
return {}
result = dict(row)
# Convert UUIDs to strings
for key in ['id', 'session_id']:
if key in result and result[key] is not None:
result[key] = str(result[key])
# Convert datetimes to ISO strings
for key in ['created_at', 'updated_at', 'generated_at']:
if key in result and result[key] is not None:
result[key] = result[key].isoformat()
# Parse JSONB fields back to dicts/lists
for key in ['ocr_prompts', 'processed_pages', 'successful_pages', 'failed_pages', 'worksheet_types']:
if key in result and result[key] is not None:
if isinstance(result[key], str):
result[key] = json.loads(result[key])
return result