backend-lehrer (10 files): - game/database.py (785 → 5), correction_api.py (683 → 4) - classroom_engine/antizipation.py (676 → 5) - llm_gateway schools/edu_search already done in prior batch klausur-service (12 files): - orientation_crop_api.py (694 → 5), pdf_export.py (677 → 4) - zeugnis_crawler.py (676 → 5), grid_editor_api.py (671 → 5) - eh_templates.py (658 → 5), mail/api.py (651 → 5) - qdrant_service.py (638 → 5), training_api.py (625 → 4) website (6 pages): - middleware (696 → 8), mail (733 → 6), consent (628 → 8) - compliance/risks (622 → 5), export (502 → 5), brandbook (629 → 7) studio-v2 (3 components): - B2BMigrationWizard (848 → 3), CleanupPanel (765 → 2) - dashboard-experimental (739 → 2) admin-lehrer (4 files): - uebersetzungen (769 → 4), manager (670 → 2) - ChunkBrowserQA (675 → 6), dsfa/page (674 → 5) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
106 lines
3.0 KiB
Python
106 lines
3.0 KiB
Python
"""
|
|
Zeugnis Crawler - Start/stop/status control functions.
|
|
"""
|
|
|
|
import asyncio
|
|
from typing import Optional, Dict, Any
|
|
|
|
from zeugnis_worker import ZeugnisCrawler, get_crawler_state
|
|
|
|
|
|
_crawler_instance: Optional[ZeugnisCrawler] = None
|
|
_crawler_task: Optional[asyncio.Task] = None
|
|
|
|
|
|
async def start_crawler(bundesland: Optional[str] = None, source_id: Optional[str] = None) -> bool:
|
|
"""Start the crawler."""
|
|
global _crawler_instance, _crawler_task
|
|
|
|
state = get_crawler_state()
|
|
|
|
if state.is_running:
|
|
return False
|
|
|
|
state.is_running = True
|
|
state.documents_crawled_today = 0
|
|
state.documents_indexed_today = 0
|
|
state.errors_today = 0
|
|
|
|
_crawler_instance = ZeugnisCrawler()
|
|
await _crawler_instance.init()
|
|
|
|
async def run_crawler():
|
|
try:
|
|
from metrics_db import get_pool
|
|
pool = await get_pool()
|
|
|
|
if pool:
|
|
async with pool.acquire() as conn:
|
|
# Get sources to crawl
|
|
if source_id:
|
|
sources = await conn.fetch(
|
|
"SELECT id, bundesland FROM zeugnis_sources WHERE id = $1",
|
|
source_id
|
|
)
|
|
elif bundesland:
|
|
sources = await conn.fetch(
|
|
"SELECT id, bundesland FROM zeugnis_sources WHERE bundesland = $1",
|
|
bundesland
|
|
)
|
|
else:
|
|
sources = await conn.fetch(
|
|
"SELECT id, bundesland FROM zeugnis_sources ORDER BY bundesland"
|
|
)
|
|
|
|
for source in sources:
|
|
if not state.is_running:
|
|
break
|
|
await _crawler_instance.crawl_source(source["id"])
|
|
|
|
except Exception as e:
|
|
print(f"Crawler error: {e}")
|
|
|
|
finally:
|
|
state.is_running = False
|
|
if _crawler_instance:
|
|
await _crawler_instance.close()
|
|
|
|
_crawler_task = asyncio.create_task(run_crawler())
|
|
return True
|
|
|
|
|
|
async def stop_crawler() -> bool:
|
|
"""Stop the crawler."""
|
|
global _crawler_task
|
|
|
|
state = get_crawler_state()
|
|
|
|
if not state.is_running:
|
|
return False
|
|
|
|
state.is_running = False
|
|
|
|
if _crawler_task:
|
|
_crawler_task.cancel()
|
|
try:
|
|
await _crawler_task
|
|
except asyncio.CancelledError:
|
|
pass
|
|
|
|
return True
|
|
|
|
|
|
def get_crawler_status() -> Dict[str, Any]:
|
|
"""Get current crawler status."""
|
|
state = get_crawler_state()
|
|
return {
|
|
"is_running": state.is_running,
|
|
"current_source": state.current_source_id,
|
|
"current_bundesland": state.current_bundesland,
|
|
"queue_length": len(state.queue),
|
|
"documents_crawled_today": state.documents_crawled_today,
|
|
"documents_indexed_today": state.documents_indexed_today,
|
|
"errors_today": state.errors_today,
|
|
"last_activity": state.last_activity.isoformat() if state.last_activity else None,
|
|
}
|