breakpilot-lehrer/backend-lehrer/unit_analytics_helpers.py

"""
Unit Analytics API - Helpers.

Database access, statistical computation, and utility functions.
"""

import os
import logging
from typing import List, Dict, Optional

logger = logging.getLogger(__name__)

# Feature flags
USE_DATABASE = os.getenv("GAME_USE_DATABASE", "true").lower() == "true"

# Database singleton
_analytics_db = None


async def get_analytics_database():
    """Get analytics database instance."""
    global _analytics_db
    if not USE_DATABASE:
        return None
    if _analytics_db is None:
        try:
            from unit.database import get_analytics_db
            _analytics_db = await get_analytics_db()
            logger.info("Analytics database initialized")
        except ImportError:
            logger.warning("Analytics database module not available")
        except Exception as e:
            logger.warning(f"Analytics database not available: {e}")
    return _analytics_db


def calculate_gain_distribution(gains: List[float]) -> Dict[str, int]:
    """Calculate distribution of learning gains into buckets."""
    distribution = {
        "< -20%": 0,
        "-20% to -10%": 0,
        "-10% to 0%": 0,
        "0% to 10%": 0,
        "10% to 20%": 0,
        "> 20%": 0,
    }

    for gain in gains:
        gain_percent = gain * 100
        if gain_percent < -20:
            distribution["< -20%"] += 1
        elif gain_percent < -10:
            distribution["-20% to -10%"] += 1
        elif gain_percent < 0:
            distribution["-10% to 0%"] += 1
        elif gain_percent < 10:
            distribution["0% to 10%"] += 1
        elif gain_percent < 20:
            distribution["10% to 20%"] += 1
        else:
            distribution["> 20%"] += 1

    return distribution


def calculate_trend(scores: List[float]) -> str:
    """Calculate trend from a series of scores."""
    if len(scores) < 3:
        return "insufficient_data"

    # Simple linear regression
    n = len(scores)
    x_mean = (n - 1) / 2
    y_mean = sum(scores) / n

    numerator = sum((i - x_mean) * (scores[i] - y_mean) for i in range(n))
    denominator = sum((i - x_mean) ** 2 for i in range(n))

    if denominator == 0:
        return "stable"

    slope = numerator / denominator

    if slope > 0.05:
        return "improving"
    elif slope < -0.05:
        return "declining"
    else:
        return "stable"


def calculate_difficulty_rating(success_rate: float, avg_attempts: float) -> float:
    """Calculate difficulty rating 1-5 based on success metrics."""
    # Lower success rate and higher attempts = higher difficulty
    base_difficulty = (1 - success_rate) * 3 + 1  # 1-4 range
    attempt_modifier = min(avg_attempts - 1, 1)  # 0-1 range
    return min(5.0, base_difficulty + attempt_modifier)