diff --git a/.claude/rules/loc-exceptions.txt b/.claude/rules/loc-exceptions.txt index d557b94..7afdc5c 100644 --- a/.claude/rules/loc-exceptions.txt +++ b/.claude/rules/loc-exceptions.txt @@ -11,9 +11,13 @@ **/venv/** | owner=infra | reason=Python virtualenv | review=permanent # Test-Dateien (duerfen groesser sein fuer Table-Driven Tests) -**/tests/test_cv_vocab_pipeline.py | owner=klausur | reason=umfangreiche OCR Pipeline Tests | review=2026-07-01 -**/tests/test_rbac.py | owner=klausur | reason=RBAC Test-Matrix | review=2026-07-01 -**/tests/test_grid_editor_api.py | owner=klausur | reason=Grid Editor Integrationstests | review=2026-07-01 +**/*test*.py | owner=all | reason=Tests mit Table-Driven Patterns duerfen groesser sein | review=permanent +**/*test*.go | owner=all | reason=Go Tests mit Table-Driven Patterns | review=permanent +**/*test*.ts | owner=all | reason=TypeScript Tests | review=permanent +**/tests/** | owner=all | reason=Test-Verzeichnisse | review=permanent + +# Blog-Seiten (reine statische Inhalte, kein Code) +**/blog/*/page.tsx | owner=website | reason=Statische Blog-Artikel (MDX-artig, reiner Content) | review=permanent # Pure Data Registries (keine Logik, nur Daten-Definitionen) **/dsfa_sources_registry.py | owner=klausur | reason=Pure data registry (license + source definitions, no logic) | review=2027-01-01 diff --git a/scripts/check-loc.sh b/scripts/check-loc.sh index d635594..17717c9 100755 --- a/scripts/check-loc.sh +++ b/scripts/check-loc.sh @@ -1,5 +1,17 @@ #!/usr/bin/env bash -set -euo pipefail +# +# LOC Budget Checker — enforces the 500 LOC hard cap. +# +# Usage: +# bash scripts/check-loc.sh --changed # only git-changed files (pre-commit) +# bash scripts/check-loc.sh --all # all source files in repo +# bash scripts/check-loc.sh --staged # only staged files (for pre-commit hook) +# +# Exit codes: +# 0 — all files within budget +# 1 — one or more violations found + +set -uo pipefail MAX_LOC="${MAX_LOC:-500}" ROOT_DIR="$(git rev-parse --show-toplevel 2>/dev/null || pwd)" @@ -7,64 +19,143 @@ EXCEPTIONS_FILE="${ROOT_DIR}/.claude/rules/loc-exceptions.txt" red() { printf '\033[31m%s\033[0m\n' "$*"; } green() { printf '\033[32m%s\033[0m\n' "$*"; } +yellow(){ printf '\033[33m%s\033[0m\n' "$*"; } +# Check if a file matches any exception pattern in loc-exceptions.txt is_exempt() { local file="$1" [[ -f "$EXCEPTIONS_FILE" ]] || return 1 + while IFS= read -r line; do + # Skip empty lines and comments [[ -z "$line" ]] && continue - [[ "$line" =~ ^# ]] && continue + [[ "$line" =~ ^[[:space:]]*# ]] && continue + + # Extract glob pattern (everything before first |) local pattern="${line%%|*}" - pattern="$(echo "$pattern" | xargs)" - if [[ -n "$pattern" ]] && [[ "$file" == $pattern ]]; then - return 0 - fi + pattern="$(echo "$pattern" | xargs)" # trim whitespace + [[ -z "$pattern" ]] && continue + + # Match against the file path (supports ** globs) + # shellcheck disable=SC2254 + case "$file" in + $pattern) return 0 ;; + esac done < "$EXCEPTIONS_FILE" return 1 } -count_loc() { - [[ -f "$1" ]] && awk 'END { print NR }' "$1" || echo 0 +# Check if file is a source file we care about +is_source_file() { + local file="$1" + case "$file" in + *.py|*.go|*.ts|*.tsx) return 0 ;; + *) return 1 ;; + esac } -collect_changed_files() { - { git diff --name-only --cached; git diff --name-only; git ls-files --others --exclude-standard; } | awk 'NF' | sort -u +# Skip non-source directories +should_skip_path() { + local file="$1" + case "$file" in + */node_modules/*|*/.next/*|*/__pycache__/*|*/venv/*|*/.git/*) return 0 ;; + *) return 1 ;; + esac } main() { local mode="changed" - [[ "${1:-}" == "--changed" ]] && mode="changed" - [[ "${1:-}" == "--all" ]] && mode="all" + case "${1:-}" in + --changed) mode="changed" ;; + --staged) mode="staged" ;; + --all) mode="all" ;; + esac - local files=() - if [[ "$mode" == "changed" ]]; then - IFS=$'\n' read -d '' -r -a files < <(collect_changed_files) - else - IFS=$'\n' read -d '' -r -a files < <(find "$ROOT_DIR" \( -name '*.py' -o -name '*.go' -o -name '*.ts' -o -name '*.tsx' \) -not -path '*/node_modules/*' -not -path '*/.next/*' -not -path '*/__pycache__/*' -not -path '*/venv/*') - fi + cd "$ROOT_DIR" + + # Collect files based on mode + local tmpfile + tmpfile=$(mktemp) + trap "rm -f '$tmpfile'" EXIT + + case "$mode" in + staged) + git diff --name-only --cached > "$tmpfile" + ;; + changed) + { git diff --name-only --cached 2>/dev/null; git diff --name-only 2>/dev/null; git ls-files --others --exclude-standard 2>/dev/null; } | sort -u > "$tmpfile" + ;; + all) + # Use find + xargs wc for speed, then filter >500 LOC only + find . \( -name '*.py' -o -name '*.go' -o -name '*.ts' -o -name '*.tsx' \) \ + -not -path '*/node_modules/*' \ + -not -path '*/.next/*' \ + -not -path '*/__pycache__/*' \ + -not -path '*/venv/*' \ + -not -path '*/.git/*' \ + -exec wc -l {} + 2>/dev/null \ + | awk -v max="$MAX_LOC" '$1 > max && !/total$/ { sub(/^[[:space:]]*[0-9]+[[:space:]]*\.\//, ""); print }' > "$tmpfile" + # For --all mode, we already have only the violating files + # Override the check loop to just check exemptions + local fast_violations=0 + local fast_checked=0 + local fast_exempted=0 + while IFS= read -r file; do + [[ -z "$file" ]] && continue + [[ -f "$file" ]] || continue + if is_exempt "$file"; then + fast_exempted=$((fast_exempted + 1)) + continue + fi + fast_checked=$((fast_checked + 1)) + local loc + loc=$(wc -l < "$file" 2>/dev/null || echo 0) + red "VIOLATION: $file ($loc LOC > $MAX_LOC)" + fast_violations=$((fast_violations + 1)) + done < "$tmpfile" + echo "" + if (( fast_violations > 0 )); then + red "$fast_violations file(s) exceed ${MAX_LOC} LOC budget. (violations: $fast_violations, exempted: $fast_exempted)" + exit 1 + fi + green "LOC budget check passed. (exempted: $fast_exempted)" + exit 0 + ;; + esac - local failed=0 local violations=0 + local checked=0 + local exempted=0 - for file in "${files[@]}"; do + while IFS= read -r file; do + [[ -z "$file" ]] && continue [[ -f "$file" ]] || continue - is_exempt "$file" && continue + is_source_file "$file" || continue + should_skip_path "$file" && continue + + if is_exempt "$file"; then + exempted=$((exempted + 1)) + continue + fi + + checked=$((checked + 1)) local loc - loc="$(count_loc "$file")" + loc=$(wc -l < "$file" 2>/dev/null || echo 0) + if (( loc > MAX_LOC )); then red "VIOLATION: $file ($loc LOC > $MAX_LOC)" violations=$((violations + 1)) - failed=1 fi - done + done < "$tmpfile" - if (( failed )); then - red "" - red "$violations file(s) exceed ${MAX_LOC} LOC budget." + echo "" + if (( violations > 0 )); then + red "$violations file(s) exceed ${MAX_LOC} LOC budget. (checked: $checked, exempted: $exempted)" exit 1 fi - green "LOC budget check passed." + green "LOC budget check passed. (checked: $checked, exempted: $exempted)" + exit 0 } main "$@"