Files
breakpilot-compliance/document-crawler/db.py
Benjamin Boenisch 364d2c69ff feat: Add Document Crawler & Auto-Onboarding service (Phase 1.4)
New standalone Python/FastAPI service for automatic compliance document
scanning, LLM-based classification, IPFS archival, and gap analysis.
Includes extractors (PDF, DOCX, XLSX, PPTX), keyword fallback classifier,
compliance matrix, and full REST API on port 8098.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-13 20:35:15 +01:00

48 lines
1.1 KiB
Python

"""asyncpg pool management for Document Crawler."""
import asyncpg
from config import settings
_pool: asyncpg.Pool | None = None
async def get_pool() -> asyncpg.Pool:
global _pool
if _pool is None:
_pool = await asyncpg.create_pool(
settings.DATABASE_URL,
min_size=2,
max_size=10,
)
return _pool
async def close_pool():
global _pool
if _pool is not None:
await _pool.close()
_pool = None
async def run_migration():
"""Run the crawler migration on startup."""
import os
migration_path = os.path.join(
os.path.dirname(__file__), "migrations", "014_crawler_tables.sql"
)
if not os.path.exists(migration_path):
return
pool = await get_pool()
async with pool.acquire() as conn:
# Check if migration already applied
exists = await conn.fetchval(
"SELECT EXISTS(SELECT 1 FROM information_schema.tables WHERE table_name = 'crawler_sources')"
)
if exists:
return
with open(migration_path) as f:
sql = f.read()
await conn.execute(sql)