backend-lehrer (11 files): - llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6) - messenger_api.py (840 → 5), print_generator.py (824 → 5) - unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4) - llm_gateway/routes/edu_search_seeds.py (710 → 4) klausur-service (12 files): - ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4) - legal_corpus_api.py (790 → 4), page_crop.py (758 → 3) - mail/ai_service.py (747 → 4), github_crawler.py (767 → 3) - trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4) - dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4) website (6 pages): - audit-checklist (867 → 8), content (806 → 6) - screen-flow (790 → 4), scraper (789 → 5) - zeugnisse (776 → 5), modules (745 → 4) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
66 lines
1.8 KiB
Python
66 lines
1.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Full Compliance Pipeline for Legal Corpus — Barrel Re-export.
|
|
|
|
Split into submodules:
|
|
- compliance_models.py — Dataclasses (Checkpoint, Control, Measure)
|
|
- compliance_extraction.py — Pattern extraction & control/measure generation
|
|
- compliance_pipeline.py — Pipeline phases & orchestrator
|
|
|
|
Run on Mac Mini:
|
|
nohup python full_compliance_pipeline.py > /tmp/compliance_pipeline.log 2>&1 &
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
import sys
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(levelname)s - %(message)s',
|
|
handlers=[
|
|
logging.StreamHandler(sys.stdout),
|
|
logging.FileHandler('/tmp/compliance_pipeline.log')
|
|
]
|
|
)
|
|
|
|
# Re-export all public symbols
|
|
from compliance_models import Checkpoint, Control, Measure
|
|
from compliance_extraction import (
|
|
extract_checkpoints_from_chunk,
|
|
generate_control_for_checkpoints,
|
|
generate_measure_for_control,
|
|
)
|
|
from compliance_pipeline import CompliancePipeline
|
|
|
|
__all__ = [
|
|
"Checkpoint",
|
|
"Control",
|
|
"Measure",
|
|
"extract_checkpoints_from_chunk",
|
|
"generate_control_for_checkpoints",
|
|
"generate_measure_for_control",
|
|
"CompliancePipeline",
|
|
]
|
|
|
|
|
|
async def main():
|
|
import argparse
|
|
parser = argparse.ArgumentParser(description="Run the compliance pipeline")
|
|
parser.add_argument("--force-reindex", action="store_true",
|
|
help="Force re-ingestion of all documents")
|
|
parser.add_argument("--skip-ingestion", action="store_true",
|
|
help="Skip ingestion phase, use existing chunks")
|
|
args = parser.parse_args()
|
|
|
|
pipeline = CompliancePipeline()
|
|
await pipeline.run_full_pipeline(
|
|
force_reindex=args.force_reindex,
|
|
skip_ingestion=args.skip_ingestion
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|