#!/usr/bin/env python3 """ Full Compliance Pipeline for Legal Corpus — Barrel Re-export. Split into submodules: - compliance_models.py — Dataclasses (Checkpoint, Control, Measure) - compliance_extraction.py — Pattern extraction & control/measure generation - compliance_pipeline.py — Pipeline phases & orchestrator Run on Mac Mini: nohup python full_compliance_pipeline.py > /tmp/compliance_pipeline.log 2>&1 & """ import asyncio import logging import sys # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.StreamHandler(sys.stdout), logging.FileHandler('/tmp/compliance_pipeline.log') ] ) # Re-export all public symbols from compliance_models import Checkpoint, Control, Measure from compliance_extraction import ( extract_checkpoints_from_chunk, generate_control_for_checkpoints, generate_measure_for_control, ) from compliance_pipeline import CompliancePipeline __all__ = [ "Checkpoint", "Control", "Measure", "extract_checkpoints_from_chunk", "generate_control_for_checkpoints", "generate_measure_for_control", "CompliancePipeline", ] async def main(): import argparse parser = argparse.ArgumentParser(description="Run the compliance pipeline") parser.add_argument("--force-reindex", action="store_true", help="Force re-ingestion of all documents") parser.add_argument("--skip-ingestion", action="store_true", help="Skip ingestion phase, use existing chunks") args = parser.parse_args() pipeline = CompliancePipeline() await pipeline.run_full_pipeline( force_reindex=args.force_reindex, skip_ingestion=args.skip_ingestion ) if __name__ == "__main__": asyncio.run(main())