[guardrail-change] Install pre-commit LOC budget hook
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 43s
CI / test-go-edu-search (push) Successful in 34s
CI / test-python-klausur (push) Failing after 2m35s
CI / test-python-agent-core (push) Successful in 20s
CI / test-nodejs-website (push) Successful in 25s

- Rewrote scripts/check-loc.sh: fixed macOS compat, added --staged mode,
  optimized --all mode with find+wc pipeline
- Added .git/hooks/pre-commit that runs check-loc.sh --staged
- Extended loc-exceptions.txt with glob patterns for test files (*test*)
  and blog content pages (blog/*/page.tsx)

The hook blocks commits containing staged files >500 LOC unless exempted.
Bypass for emergencies: git commit --no-verify

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-25 10:28:33 +02:00
parent bd4b956e3c
commit 37db47fcd9
2 changed files with 126 additions and 31 deletions

View File

@@ -11,9 +11,13 @@
**/venv/** | owner=infra | reason=Python virtualenv | review=permanent
# Test-Dateien (duerfen groesser sein fuer Table-Driven Tests)
**/tests/test_cv_vocab_pipeline.py | owner=klausur | reason=umfangreiche OCR Pipeline Tests | review=2026-07-01
**/tests/test_rbac.py | owner=klausur | reason=RBAC Test-Matrix | review=2026-07-01
**/tests/test_grid_editor_api.py | owner=klausur | reason=Grid Editor Integrationstests | review=2026-07-01
**/*test*.py | owner=all | reason=Tests mit Table-Driven Patterns duerfen groesser sein | review=permanent
**/*test*.go | owner=all | reason=Go Tests mit Table-Driven Patterns | review=permanent
**/*test*.ts | owner=all | reason=TypeScript Tests | review=permanent
**/tests/** | owner=all | reason=Test-Verzeichnisse | review=permanent
# Blog-Seiten (reine statische Inhalte, kein Code)
**/blog/*/page.tsx | owner=website | reason=Statische Blog-Artikel (MDX-artig, reiner Content) | review=permanent
# Pure Data Registries (keine Logik, nur Daten-Definitionen)
**/dsfa_sources_registry.py | owner=klausur | reason=Pure data registry (license + source definitions, no logic) | review=2027-01-01

View File

@@ -1,5 +1,17 @@
#!/usr/bin/env bash
set -euo pipefail
#
# LOC Budget Checker — enforces the 500 LOC hard cap.
#
# Usage:
# bash scripts/check-loc.sh --changed # only git-changed files (pre-commit)
# bash scripts/check-loc.sh --all # all source files in repo
# bash scripts/check-loc.sh --staged # only staged files (for pre-commit hook)
#
# Exit codes:
# 0 — all files within budget
# 1 — one or more violations found
set -uo pipefail
MAX_LOC="${MAX_LOC:-500}"
ROOT_DIR="$(git rev-parse --show-toplevel 2>/dev/null || pwd)"
@@ -7,64 +19,143 @@ EXCEPTIONS_FILE="${ROOT_DIR}/.claude/rules/loc-exceptions.txt"
red() { printf '\033[31m%s\033[0m\n' "$*"; }
green() { printf '\033[32m%s\033[0m\n' "$*"; }
yellow(){ printf '\033[33m%s\033[0m\n' "$*"; }
# Check if a file matches any exception pattern in loc-exceptions.txt
is_exempt() {
local file="$1"
[[ -f "$EXCEPTIONS_FILE" ]] || return 1
while IFS= read -r line; do
# Skip empty lines and comments
[[ -z "$line" ]] && continue
[[ "$line" =~ ^# ]] && continue
[[ "$line" =~ ^[[:space:]]*# ]] && continue
# Extract glob pattern (everything before first |)
local pattern="${line%%|*}"
pattern="$(echo "$pattern" | xargs)"
if [[ -n "$pattern" ]] && [[ "$file" == $pattern ]]; then
return 0
fi
pattern="$(echo "$pattern" | xargs)" # trim whitespace
[[ -z "$pattern" ]] && continue
# Match against the file path (supports ** globs)
# shellcheck disable=SC2254
case "$file" in
$pattern) return 0 ;;
esac
done < "$EXCEPTIONS_FILE"
return 1
}
count_loc() {
[[ -f "$1" ]] && awk 'END { print NR }' "$1" || echo 0
# Check if file is a source file we care about
is_source_file() {
local file="$1"
case "$file" in
*.py|*.go|*.ts|*.tsx) return 0 ;;
*) return 1 ;;
esac
}
collect_changed_files() {
{ git diff --name-only --cached; git diff --name-only; git ls-files --others --exclude-standard; } | awk 'NF' | sort -u
# Skip non-source directories
should_skip_path() {
local file="$1"
case "$file" in
*/node_modules/*|*/.next/*|*/__pycache__/*|*/venv/*|*/.git/*) return 0 ;;
*) return 1 ;;
esac
}
main() {
local mode="changed"
[[ "${1:-}" == "--changed" ]] && mode="changed"
[[ "${1:-}" == "--all" ]] && mode="all"
case "${1:-}" in
--changed) mode="changed" ;;
--staged) mode="staged" ;;
--all) mode="all" ;;
esac
local files=()
if [[ "$mode" == "changed" ]]; then
IFS=$'\n' read -d '' -r -a files < <(collect_changed_files)
else
IFS=$'\n' read -d '' -r -a files < <(find "$ROOT_DIR" \( -name '*.py' -o -name '*.go' -o -name '*.ts' -o -name '*.tsx' \) -not -path '*/node_modules/*' -not -path '*/.next/*' -not -path '*/__pycache__/*' -not -path '*/venv/*')
fi
cd "$ROOT_DIR"
# Collect files based on mode
local tmpfile
tmpfile=$(mktemp)
trap "rm -f '$tmpfile'" EXIT
case "$mode" in
staged)
git diff --name-only --cached > "$tmpfile"
;;
changed)
{ git diff --name-only --cached 2>/dev/null; git diff --name-only 2>/dev/null; git ls-files --others --exclude-standard 2>/dev/null; } | sort -u > "$tmpfile"
;;
all)
# Use find + xargs wc for speed, then filter >500 LOC only
find . \( -name '*.py' -o -name '*.go' -o -name '*.ts' -o -name '*.tsx' \) \
-not -path '*/node_modules/*' \
-not -path '*/.next/*' \
-not -path '*/__pycache__/*' \
-not -path '*/venv/*' \
-not -path '*/.git/*' \
-exec wc -l {} + 2>/dev/null \
| awk -v max="$MAX_LOC" '$1 > max && !/total$/ { sub(/^[[:space:]]*[0-9]+[[:space:]]*\.\//, ""); print }' > "$tmpfile"
# For --all mode, we already have only the violating files
# Override the check loop to just check exemptions
local fast_violations=0
local fast_checked=0
local fast_exempted=0
while IFS= read -r file; do
[[ -z "$file" ]] && continue
[[ -f "$file" ]] || continue
if is_exempt "$file"; then
fast_exempted=$((fast_exempted + 1))
continue
fi
fast_checked=$((fast_checked + 1))
local loc
loc=$(wc -l < "$file" 2>/dev/null || echo 0)
red "VIOLATION: $file ($loc LOC > $MAX_LOC)"
fast_violations=$((fast_violations + 1))
done < "$tmpfile"
echo ""
if (( fast_violations > 0 )); then
red "$fast_violations file(s) exceed ${MAX_LOC} LOC budget. (violations: $fast_violations, exempted: $fast_exempted)"
exit 1
fi
green "LOC budget check passed. (exempted: $fast_exempted)"
exit 0
;;
esac
local failed=0
local violations=0
local checked=0
local exempted=0
for file in "${files[@]}"; do
while IFS= read -r file; do
[[ -z "$file" ]] && continue
[[ -f "$file" ]] || continue
is_exempt "$file" && continue
is_source_file "$file" || continue
should_skip_path "$file" && continue
if is_exempt "$file"; then
exempted=$((exempted + 1))
continue
fi
checked=$((checked + 1))
local loc
loc="$(count_loc "$file")"
loc=$(wc -l < "$file" 2>/dev/null || echo 0)
if (( loc > MAX_LOC )); then
red "VIOLATION: $file ($loc LOC > $MAX_LOC)"
violations=$((violations + 1))
failed=1
fi
done
done < "$tmpfile"
if (( failed )); then
red ""
red "$violations file(s) exceed ${MAX_LOC} LOC budget."
echo ""
if (( violations > 0 )); then
red "$violations file(s) exceed ${MAX_LOC} LOC budget. (checked: $checked, exempted: $exempted)"
exit 1
fi
green "LOC budget check passed."
green "LOC budget check passed. (checked: $checked, exempted: $exempted)"
exit 0
}
main "$@"