Files
breakpilot-lehrer/klausur-service/backend/zeugnis/control.py
Benjamin Admin 165c493d1e
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 2m22s
CI / test-python-agent-core (push) Successful in 21s
CI / test-nodejs-website (push) Successful in 23s
Restructure: Move 52 files into 7 domain packages
korrektur/ zeugnis/ admin/ compliance/ worksheet/ training/ metrics/
52 shims, relative imports, RAG untouched.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-25 22:10:48 +02:00

106 lines
3.0 KiB
Python

"""
Zeugnis Crawler - Start/stop/status control functions.
"""
import asyncio
from typing import Optional, Dict, Any
from .worker import ZeugnisCrawler, get_crawler_state
_crawler_instance: Optional[ZeugnisCrawler] = None
_crawler_task: Optional[asyncio.Task] = None
async def start_crawler(bundesland: Optional[str] = None, source_id: Optional[str] = None) -> bool:
"""Start the crawler."""
global _crawler_instance, _crawler_task
state = get_crawler_state()
if state.is_running:
return False
state.is_running = True
state.documents_crawled_today = 0
state.documents_indexed_today = 0
state.errors_today = 0
_crawler_instance = ZeugnisCrawler()
await _crawler_instance.init()
async def run_crawler():
try:
from metrics_db import get_pool
pool = await get_pool()
if pool:
async with pool.acquire() as conn:
# Get sources to crawl
if source_id:
sources = await conn.fetch(
"SELECT id, bundesland FROM zeugnis_sources WHERE id = $1",
source_id
)
elif bundesland:
sources = await conn.fetch(
"SELECT id, bundesland FROM zeugnis_sources WHERE bundesland = $1",
bundesland
)
else:
sources = await conn.fetch(
"SELECT id, bundesland FROM zeugnis_sources ORDER BY bundesland"
)
for source in sources:
if not state.is_running:
break
await _crawler_instance.crawl_source(source["id"])
except Exception as e:
print(f"Crawler error: {e}")
finally:
state.is_running = False
if _crawler_instance:
await _crawler_instance.close()
_crawler_task = asyncio.create_task(run_crawler())
return True
async def stop_crawler() -> bool:
"""Stop the crawler."""
global _crawler_task
state = get_crawler_state()
if not state.is_running:
return False
state.is_running = False
if _crawler_task:
_crawler_task.cancel()
try:
await _crawler_task
except asyncio.CancelledError:
pass
return True
def get_crawler_status() -> Dict[str, Any]:
"""Get current crawler status."""
state = get_crawler_state()
return {
"is_running": state.is_running,
"current_source": state.current_source_id,
"current_bundesland": state.current_bundesland,
"queue_length": len(state.queue),
"documents_crawled_today": state.documents_crawled_today,
"documents_indexed_today": state.documents_indexed_today,
"errors_today": state.errors_today,
"last_activity": state.last_activity.isoformat() if state.last_activity else None,
}