Services: Admin-Lehrer, Backend-Lehrer, Studio v2, Website, Klausur-Service, School-Service, Voice-Service, Geo-Service, BreakPilot Drive, Agent-Core Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
429 lines
13 KiB
Python
429 lines
13 KiB
Python
"""
|
|
Vocabulary Session Store - PostgreSQL persistence for vocab extraction sessions.
|
|
|
|
Replaces in-memory storage with database persistence.
|
|
See migrations/001_vocab_sessions.sql for schema.
|
|
"""
|
|
|
|
import os
|
|
import uuid
|
|
import logging
|
|
import json
|
|
from typing import Optional, List, Dict, Any
|
|
from datetime import datetime
|
|
|
|
import asyncpg
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Database configuration
|
|
DATABASE_URL = os.getenv(
|
|
"DATABASE_URL",
|
|
"postgresql://breakpilot:breakpilot@postgres:5432/breakpilot_db"
|
|
)
|
|
|
|
# Connection pool (initialized lazily)
|
|
_pool: Optional[asyncpg.Pool] = None
|
|
|
|
|
|
async def get_pool() -> asyncpg.Pool:
|
|
"""Get or create the database connection pool."""
|
|
global _pool
|
|
if _pool is None:
|
|
_pool = await asyncpg.create_pool(DATABASE_URL, min_size=2, max_size=10)
|
|
return _pool
|
|
|
|
|
|
async def init_vocab_tables():
|
|
"""
|
|
Initialize vocab tables if they don't exist.
|
|
This is called at startup.
|
|
"""
|
|
pool = await get_pool()
|
|
async with pool.acquire() as conn:
|
|
# Check if tables exist
|
|
tables_exist = await conn.fetchval("""
|
|
SELECT EXISTS (
|
|
SELECT FROM information_schema.tables
|
|
WHERE table_name = 'vocab_sessions'
|
|
)
|
|
""")
|
|
|
|
if not tables_exist:
|
|
logger.info("Creating vocab tables...")
|
|
# Read and execute migration
|
|
migration_path = os.path.join(
|
|
os.path.dirname(__file__),
|
|
"migrations/001_vocab_sessions.sql"
|
|
)
|
|
if os.path.exists(migration_path):
|
|
with open(migration_path, "r") as f:
|
|
sql = f.read()
|
|
await conn.execute(sql)
|
|
logger.info("Vocab tables created successfully")
|
|
else:
|
|
logger.warning(f"Migration file not found: {migration_path}")
|
|
else:
|
|
logger.debug("Vocab tables already exist")
|
|
|
|
|
|
# =============================================================================
|
|
# SESSION OPERATIONS
|
|
# =============================================================================
|
|
|
|
async def create_session_db(
|
|
session_id: str,
|
|
name: str,
|
|
description: str = "",
|
|
source_language: str = "en",
|
|
target_language: str = "de"
|
|
) -> Dict[str, Any]:
|
|
"""Create a new vocabulary session in the database."""
|
|
pool = await get_pool()
|
|
async with pool.acquire() as conn:
|
|
row = await conn.fetchrow("""
|
|
INSERT INTO vocab_sessions (
|
|
id, name, description, source_language, target_language,
|
|
status, vocabulary_count
|
|
) VALUES ($1, $2, $3, $4, $5, 'pending', 0)
|
|
RETURNING *
|
|
""", uuid.UUID(session_id), name, description, source_language, target_language)
|
|
|
|
return _row_to_dict(row)
|
|
|
|
|
|
async def get_session_db(session_id: str) -> Optional[Dict[str, Any]]:
|
|
"""Get a session by ID."""
|
|
pool = await get_pool()
|
|
async with pool.acquire() as conn:
|
|
row = await conn.fetchrow("""
|
|
SELECT * FROM vocab_sessions WHERE id = $1
|
|
""", uuid.UUID(session_id))
|
|
|
|
if row:
|
|
return _row_to_dict(row)
|
|
return None
|
|
|
|
|
|
async def list_sessions_db(
|
|
limit: int = 50,
|
|
offset: int = 0,
|
|
status: Optional[str] = None
|
|
) -> List[Dict[str, Any]]:
|
|
"""List all sessions with optional filtering."""
|
|
pool = await get_pool()
|
|
async with pool.acquire() as conn:
|
|
if status:
|
|
rows = await conn.fetch("""
|
|
SELECT * FROM vocab_sessions
|
|
WHERE status = $1
|
|
ORDER BY created_at DESC
|
|
LIMIT $2 OFFSET $3
|
|
""", status, limit, offset)
|
|
else:
|
|
rows = await conn.fetch("""
|
|
SELECT * FROM vocab_sessions
|
|
ORDER BY created_at DESC
|
|
LIMIT $1 OFFSET $2
|
|
""", limit, offset)
|
|
|
|
return [_row_to_dict(row) for row in rows]
|
|
|
|
|
|
async def update_session_db(
|
|
session_id: str,
|
|
**kwargs
|
|
) -> Optional[Dict[str, Any]]:
|
|
"""Update a session with given fields."""
|
|
pool = await get_pool()
|
|
|
|
# Build dynamic UPDATE query
|
|
fields = []
|
|
values = []
|
|
param_idx = 1
|
|
|
|
allowed_fields = [
|
|
'name', 'description', 'status', 'vocabulary_count',
|
|
'extraction_confidence', 'image_path', 'pdf_path', 'pdf_page_count',
|
|
'ocr_prompts', 'processed_pages', 'successful_pages', 'failed_pages'
|
|
]
|
|
|
|
for key, value in kwargs.items():
|
|
if key in allowed_fields:
|
|
fields.append(f"{key} = ${param_idx}")
|
|
# Convert dicts/lists to JSON for JSONB columns
|
|
if key in ['ocr_prompts', 'processed_pages', 'successful_pages', 'failed_pages']:
|
|
value = json.dumps(value) if value else None
|
|
values.append(value)
|
|
param_idx += 1
|
|
|
|
if not fields:
|
|
return await get_session_db(session_id)
|
|
|
|
values.append(uuid.UUID(session_id))
|
|
|
|
async with pool.acquire() as conn:
|
|
row = await conn.fetchrow(f"""
|
|
UPDATE vocab_sessions
|
|
SET {', '.join(fields)}
|
|
WHERE id = ${param_idx}
|
|
RETURNING *
|
|
""", *values)
|
|
|
|
if row:
|
|
return _row_to_dict(row)
|
|
return None
|
|
|
|
|
|
async def delete_session_db(session_id: str) -> bool:
|
|
"""Delete a session and all related data (cascades)."""
|
|
pool = await get_pool()
|
|
async with pool.acquire() as conn:
|
|
result = await conn.execute("""
|
|
DELETE FROM vocab_sessions WHERE id = $1
|
|
""", uuid.UUID(session_id))
|
|
return result == "DELETE 1"
|
|
|
|
|
|
# =============================================================================
|
|
# VOCABULARY OPERATIONS
|
|
# =============================================================================
|
|
|
|
async def add_vocabulary_db(
|
|
session_id: str,
|
|
vocab_list: List[Dict[str, Any]]
|
|
) -> List[Dict[str, Any]]:
|
|
"""Add vocabulary entries to a session."""
|
|
if not vocab_list:
|
|
return []
|
|
|
|
pool = await get_pool()
|
|
results = []
|
|
|
|
async with pool.acquire() as conn:
|
|
for vocab in vocab_list:
|
|
vocab_id = str(uuid.uuid4())
|
|
row = await conn.fetchrow("""
|
|
INSERT INTO vocab_entries (
|
|
id, session_id, english, german, example_sentence,
|
|
example_sentence_gap, word_type, source_page
|
|
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
|
|
RETURNING *
|
|
""",
|
|
uuid.UUID(vocab_id),
|
|
uuid.UUID(session_id),
|
|
vocab.get('english', ''),
|
|
vocab.get('german', ''),
|
|
vocab.get('example_sentence'),
|
|
vocab.get('example_sentence_gap'),
|
|
vocab.get('word_type'),
|
|
vocab.get('source_page')
|
|
)
|
|
results.append(_row_to_dict(row))
|
|
|
|
# Update vocabulary count
|
|
await conn.execute("""
|
|
UPDATE vocab_sessions
|
|
SET vocabulary_count = (
|
|
SELECT COUNT(*) FROM vocab_entries WHERE session_id = $1
|
|
)
|
|
WHERE id = $1
|
|
""", uuid.UUID(session_id))
|
|
|
|
return results
|
|
|
|
|
|
async def get_vocabulary_db(
|
|
session_id: str,
|
|
source_page: Optional[int] = None
|
|
) -> List[Dict[str, Any]]:
|
|
"""Get vocabulary entries for a session."""
|
|
pool = await get_pool()
|
|
async with pool.acquire() as conn:
|
|
if source_page is not None:
|
|
rows = await conn.fetch("""
|
|
SELECT * FROM vocab_entries
|
|
WHERE session_id = $1 AND source_page = $2
|
|
ORDER BY created_at
|
|
""", uuid.UUID(session_id), source_page)
|
|
else:
|
|
rows = await conn.fetch("""
|
|
SELECT * FROM vocab_entries
|
|
WHERE session_id = $1
|
|
ORDER BY source_page NULLS LAST, created_at
|
|
""", uuid.UUID(session_id))
|
|
|
|
return [_row_to_dict(row) for row in rows]
|
|
|
|
|
|
async def update_vocabulary_db(
|
|
entry_id: str,
|
|
**kwargs
|
|
) -> Optional[Dict[str, Any]]:
|
|
"""Update a single vocabulary entry."""
|
|
pool = await get_pool()
|
|
|
|
fields = []
|
|
values = []
|
|
param_idx = 1
|
|
|
|
allowed_fields = [
|
|
'english', 'german', 'example_sentence', 'example_sentence_gap',
|
|
'word_type', 'source_page'
|
|
]
|
|
|
|
for key, value in kwargs.items():
|
|
if key in allowed_fields:
|
|
fields.append(f"{key} = ${param_idx}")
|
|
values.append(value)
|
|
param_idx += 1
|
|
|
|
if not fields:
|
|
return None
|
|
|
|
values.append(uuid.UUID(entry_id))
|
|
|
|
async with pool.acquire() as conn:
|
|
row = await conn.fetchrow(f"""
|
|
UPDATE vocab_entries
|
|
SET {', '.join(fields)}
|
|
WHERE id = ${param_idx}
|
|
RETURNING *
|
|
""", *values)
|
|
|
|
if row:
|
|
return _row_to_dict(row)
|
|
return None
|
|
|
|
|
|
async def clear_page_vocabulary_db(session_id: str, page: int) -> int:
|
|
"""Clear all vocabulary for a specific page."""
|
|
pool = await get_pool()
|
|
async with pool.acquire() as conn:
|
|
result = await conn.execute("""
|
|
DELETE FROM vocab_entries
|
|
WHERE session_id = $1 AND source_page = $2
|
|
""", uuid.UUID(session_id), page)
|
|
|
|
# Update vocabulary count
|
|
await conn.execute("""
|
|
UPDATE vocab_sessions
|
|
SET vocabulary_count = (
|
|
SELECT COUNT(*) FROM vocab_entries WHERE session_id = $1
|
|
)
|
|
WHERE id = $1
|
|
""", uuid.UUID(session_id))
|
|
|
|
# Return count of deleted rows
|
|
count = int(result.split()[-1]) if result else 0
|
|
return count
|
|
|
|
|
|
# =============================================================================
|
|
# WORKSHEET OPERATIONS
|
|
# =============================================================================
|
|
|
|
async def create_worksheet_db(
|
|
session_id: str,
|
|
worksheet_types: List[str],
|
|
pdf_path: Optional[str] = None,
|
|
solution_path: Optional[str] = None
|
|
) -> Dict[str, Any]:
|
|
"""Create a worksheet record."""
|
|
pool = await get_pool()
|
|
worksheet_id = str(uuid.uuid4())
|
|
|
|
async with pool.acquire() as conn:
|
|
row = await conn.fetchrow("""
|
|
INSERT INTO vocab_worksheets (
|
|
id, session_id, worksheet_types, pdf_path, solution_path
|
|
) VALUES ($1, $2, $3, $4, $5)
|
|
RETURNING *
|
|
""",
|
|
uuid.UUID(worksheet_id),
|
|
uuid.UUID(session_id),
|
|
json.dumps(worksheet_types),
|
|
pdf_path,
|
|
solution_path
|
|
)
|
|
|
|
return _row_to_dict(row)
|
|
|
|
|
|
async def get_worksheet_db(worksheet_id: str) -> Optional[Dict[str, Any]]:
|
|
"""Get a worksheet by ID."""
|
|
pool = await get_pool()
|
|
async with pool.acquire() as conn:
|
|
row = await conn.fetchrow("""
|
|
SELECT * FROM vocab_worksheets WHERE id = $1
|
|
""", uuid.UUID(worksheet_id))
|
|
|
|
if row:
|
|
return _row_to_dict(row)
|
|
return None
|
|
|
|
|
|
async def delete_worksheets_for_session_db(session_id: str) -> int:
|
|
"""Delete all worksheets for a session."""
|
|
pool = await get_pool()
|
|
async with pool.acquire() as conn:
|
|
result = await conn.execute("""
|
|
DELETE FROM vocab_worksheets WHERE session_id = $1
|
|
""", uuid.UUID(session_id))
|
|
|
|
count = int(result.split()[-1]) if result else 0
|
|
return count
|
|
|
|
|
|
# =============================================================================
|
|
# PDF CACHE OPERATIONS
|
|
# =============================================================================
|
|
|
|
# Simple in-memory cache for PDF data (temporary until served)
|
|
_pdf_cache: Dict[str, bytes] = {}
|
|
|
|
|
|
def cache_pdf_data(worksheet_id: str, pdf_data: bytes) -> None:
|
|
"""Cache PDF data temporarily for download."""
|
|
_pdf_cache[worksheet_id] = pdf_data
|
|
|
|
|
|
def get_cached_pdf_data(worksheet_id: str) -> Optional[bytes]:
|
|
"""Get cached PDF data."""
|
|
return _pdf_cache.get(worksheet_id)
|
|
|
|
|
|
def clear_cached_pdf_data(worksheet_id: str) -> None:
|
|
"""Clear cached PDF data."""
|
|
_pdf_cache.pop(worksheet_id, None)
|
|
|
|
|
|
# =============================================================================
|
|
# HELPER FUNCTIONS
|
|
# =============================================================================
|
|
|
|
def _row_to_dict(row: asyncpg.Record) -> Dict[str, Any]:
|
|
"""Convert asyncpg Record to dict with proper type handling."""
|
|
if row is None:
|
|
return {}
|
|
|
|
result = dict(row)
|
|
|
|
# Convert UUIDs to strings
|
|
for key in ['id', 'session_id']:
|
|
if key in result and result[key] is not None:
|
|
result[key] = str(result[key])
|
|
|
|
# Convert datetimes to ISO strings
|
|
for key in ['created_at', 'updated_at', 'generated_at']:
|
|
if key in result and result[key] is not None:
|
|
result[key] = result[key].isoformat()
|
|
|
|
# Parse JSONB fields back to dicts/lists
|
|
for key in ['ocr_prompts', 'processed_pages', 'successful_pages', 'failed_pages', 'worksheet_types']:
|
|
if key in result and result[key] is not None:
|
|
if isinstance(result[key], str):
|
|
result[key] = json.loads(result[key])
|
|
|
|
return result
|