breakpilot-compliance/scripts/check-loc.sh

#!/usr/bin/env bash
# check-loc.sh — File-size budget enforcer for breakpilot-compliance.
#
# Soft target: 300 LOC. Hard cap: 500 LOC.
#
# Usage:
#   scripts/check-loc.sh                    # scan whole repo, respect exceptions
#   scripts/check-loc.sh --changed          # only files changed vs origin/main
#   scripts/check-loc.sh path/to/file.py    # check specific files
#   scripts/check-loc.sh --json             # machine-readable output
#
# Exit codes:
#   0 — clean (no hard violations)
#   1 — at least one file exceeds the hard cap (500)
#   2 — invalid invocation
#
# Behavior:
#   - Skips test files, generated files, vendor dirs, node_modules, .git, dist, build,
#     .next, __pycache__, migrations, and anything matching .claude/rules/loc-exceptions.txt.
#   - Counts non-blank, non-comment-only lines is NOT done — we count raw lines so the
#     rule is unambiguous. If you want to game it with blank lines, you're missing the point.

set -euo pipefail

SOFT=300
HARD=500
REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
EXCEPTIONS_FILE="$REPO_ROOT/.claude/rules/loc-exceptions.txt"

CHANGED_ONLY=0
JSON=0
TARGETS=()

for arg in "$@"; do
  case "$arg" in
    --changed) CHANGED_ONLY=1 ;;
    --json)    JSON=1 ;;
    -h|--help)
      sed -n '2,18p' "$0"; exit 0 ;;
    -*) echo "unknown flag: $arg" >&2; exit 2 ;;
    *)  TARGETS+=("$arg") ;;
  esac
done

# Patterns excluded from the budget regardless of path.
is_excluded() {
  local f="$1"
  case "$f" in
    */node_modules/*|*/.next/*|*/.git/*|*/dist/*|*/build/*|*/__pycache__/*|*/vendor/*) return 0 ;;
    */migrations/*|*/alembic/versions/*) return 0 ;;
    *_test.go|*.test.ts|*.test.tsx|*.spec.ts|*.spec.tsx) return 0 ;;
    */tests/*|*/test/*) return 0 ;;
    *.md|*.json|*.yaml|*.yml|*.lock|*.sum|*.mod|*.toml|*.cfg|*.ini) return 0 ;;
    *.svg|*.png|*.jpg|*.jpeg|*.gif|*.ico|*.pdf|*.woff|*.woff2|*.ttf) return 0 ;;
    *.generated.*|*.gen.*|*_pb.go|*_pb2.py|*.pb.go) return 0 ;;
  esac
  return 1
}

is_in_exceptions() {
  [[ -f "$EXCEPTIONS_FILE" ]] || return 1
  local rel="${1#$REPO_ROOT/}"
  grep -Fxq "$rel" "$EXCEPTIONS_FILE"
}

collect_targets() {
  if (( ${#TARGETS[@]} > 0 )); then
    printf '%s\n' "${TARGETS[@]}"
  elif (( CHANGED_ONLY )); then
    git -C "$REPO_ROOT" diff --name-only --diff-filter=AM origin/main...HEAD 2>/dev/null \
      || git -C "$REPO_ROOT" diff --name-only --diff-filter=AM HEAD
  else
    git -C "$REPO_ROOT" ls-files
  fi
}

violations_hard=()
violations_soft=()

while IFS= read -r f; do
  [[ -z "$f" ]] && continue
  abs="$f"
  [[ "$abs" != /* ]] && abs="$REPO_ROOT/$f"
  [[ -f "$abs" ]] || continue
  is_excluded "$abs" && continue
  is_in_exceptions "$abs" && continue
  loc=$(wc -l < "$abs" | tr -d ' ')
  if (( loc > HARD )); then
    violations_hard+=("$loc	$f")
  elif (( loc > SOFT )); then
    violations_soft+=("$loc	$f")
  fi
done < <(collect_targets)

if (( JSON )); then
  printf '{"hard":['
  first=1; for v in "${violations_hard[@]}"; do
    loc="${v%%	*}"; path="${v#*	}"
    (( first )) || printf ','; first=0
    printf '{"loc":%s,"path":"%s"}' "$loc" "$path"
  done
  printf '],"soft":['
  first=1; for v in "${violations_soft[@]}"; do
    loc="${v%%	*}"; path="${v#*	}"
    (( first )) || printf ','; first=0
    printf '{"loc":%s,"path":"%s"}' "$loc" "$path"
  done
  printf ']}\n'
else
  if (( ${#violations_soft[@]} > 0 )); then
    echo "::warning:: $((${#violations_soft[@]})) file(s) exceed soft target ($SOFT lines):"
    printf '  %s\n' "${violations_soft[@]}" | sort -rn
  fi
  if (( ${#violations_hard[@]} > 0 )); then
    echo "::error:: $((${#violations_hard[@]})) file(s) exceed HARD CAP ($HARD lines) — split required:"
    printf '  %s\n' "${violations_hard[@]}" | sort -rn
    echo
    echo "If a file legitimately must exceed $HARD lines (generated code, large data tables),"
    echo "add it to .claude/rules/loc-exceptions.txt with a one-line rationale comment above it."
  fi
fi

(( ${#violations_hard[@]} == 0 ))