Restructure: Move 52 files into 7 domain packages
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 2m22s
CI / test-python-agent-core (push) Successful in 21s
CI / test-nodejs-website (push) Successful in 23s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 2m22s
CI / test-python-agent-core (push) Successful in 21s
CI / test-nodejs-website (push) Successful in 23s
korrektur/ zeugnis/ admin/ compliance/ worksheet/ training/ metrics/ 52 shims, relative imports, RAG untouched. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,105 +1,4 @@
|
||||
"""
|
||||
Zeugnis Crawler - Start/stop/status control functions.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from typing import Optional, Dict, Any
|
||||
|
||||
from zeugnis_worker import ZeugnisCrawler, get_crawler_state
|
||||
|
||||
|
||||
_crawler_instance: Optional[ZeugnisCrawler] = None
|
||||
_crawler_task: Optional[asyncio.Task] = None
|
||||
|
||||
|
||||
async def start_crawler(bundesland: Optional[str] = None, source_id: Optional[str] = None) -> bool:
|
||||
"""Start the crawler."""
|
||||
global _crawler_instance, _crawler_task
|
||||
|
||||
state = get_crawler_state()
|
||||
|
||||
if state.is_running:
|
||||
return False
|
||||
|
||||
state.is_running = True
|
||||
state.documents_crawled_today = 0
|
||||
state.documents_indexed_today = 0
|
||||
state.errors_today = 0
|
||||
|
||||
_crawler_instance = ZeugnisCrawler()
|
||||
await _crawler_instance.init()
|
||||
|
||||
async def run_crawler():
|
||||
try:
|
||||
from metrics_db import get_pool
|
||||
pool = await get_pool()
|
||||
|
||||
if pool:
|
||||
async with pool.acquire() as conn:
|
||||
# Get sources to crawl
|
||||
if source_id:
|
||||
sources = await conn.fetch(
|
||||
"SELECT id, bundesland FROM zeugnis_sources WHERE id = $1",
|
||||
source_id
|
||||
)
|
||||
elif bundesland:
|
||||
sources = await conn.fetch(
|
||||
"SELECT id, bundesland FROM zeugnis_sources WHERE bundesland = $1",
|
||||
bundesland
|
||||
)
|
||||
else:
|
||||
sources = await conn.fetch(
|
||||
"SELECT id, bundesland FROM zeugnis_sources ORDER BY bundesland"
|
||||
)
|
||||
|
||||
for source in sources:
|
||||
if not state.is_running:
|
||||
break
|
||||
await _crawler_instance.crawl_source(source["id"])
|
||||
|
||||
except Exception as e:
|
||||
print(f"Crawler error: {e}")
|
||||
|
||||
finally:
|
||||
state.is_running = False
|
||||
if _crawler_instance:
|
||||
await _crawler_instance.close()
|
||||
|
||||
_crawler_task = asyncio.create_task(run_crawler())
|
||||
return True
|
||||
|
||||
|
||||
async def stop_crawler() -> bool:
|
||||
"""Stop the crawler."""
|
||||
global _crawler_task
|
||||
|
||||
state = get_crawler_state()
|
||||
|
||||
if not state.is_running:
|
||||
return False
|
||||
|
||||
state.is_running = False
|
||||
|
||||
if _crawler_task:
|
||||
_crawler_task.cancel()
|
||||
try:
|
||||
await _crawler_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def get_crawler_status() -> Dict[str, Any]:
|
||||
"""Get current crawler status."""
|
||||
state = get_crawler_state()
|
||||
return {
|
||||
"is_running": state.is_running,
|
||||
"current_source": state.current_source_id,
|
||||
"current_bundesland": state.current_bundesland,
|
||||
"queue_length": len(state.queue),
|
||||
"documents_crawled_today": state.documents_crawled_today,
|
||||
"documents_indexed_today": state.documents_indexed_today,
|
||||
"errors_today": state.errors_today,
|
||||
"last_activity": state.last_activity.isoformat() if state.last_activity else None,
|
||||
}
|
||||
# Backward-compat shim -- module moved to zeugnis/control.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("zeugnis.control")
|
||||
|
||||
Reference in New Issue
Block a user