backend-lehrer (11 files): - llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6) - messenger_api.py (840 → 5), print_generator.py (824 → 5) - unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4) - llm_gateway/routes/edu_search_seeds.py (710 → 4) klausur-service (12 files): - ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4) - legal_corpus_api.py (790 → 4), page_crop.py (758 → 3) - mail/ai_service.py (747 → 4), github_crawler.py (767 → 3) - trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4) - dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4) website (6 pages): - audit-checklist (867 → 8), content (806 → 6) - screen-flow (790 → 4), scraper (789 → 5) - zeugnisse (776 → 5), modules (745 → 4) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
36 lines
788 B
Python
36 lines
788 B
Python
"""
|
|
GitHub Repository Crawler — Barrel Re-export
|
|
|
|
Split into:
|
|
- github_crawler_parsers.py — ExtractedDocument, MarkdownParser, HTMLParser, JSONParser
|
|
- github_crawler_core.py — GitHubCrawler, RepositoryDownloader, crawl_source
|
|
|
|
All public names are re-exported here for backward compatibility.
|
|
"""
|
|
|
|
# Parsers
|
|
from github_crawler_parsers import ( # noqa: F401
|
|
ExtractedDocument,
|
|
MarkdownParser,
|
|
HTMLParser,
|
|
JSONParser,
|
|
)
|
|
|
|
# Crawler and downloader
|
|
from github_crawler_core import ( # noqa: F401
|
|
GITHUB_API_URL,
|
|
GITLAB_API_URL,
|
|
GITHUB_TOKEN,
|
|
MAX_FILE_SIZE,
|
|
REQUEST_TIMEOUT,
|
|
RATE_LIMIT_DELAY,
|
|
GitHubCrawler,
|
|
RepositoryDownloader,
|
|
crawl_source,
|
|
main,
|
|
)
|
|
|
|
if __name__ == "__main__":
|
|
import asyncio
|
|
asyncio.run(main())
|