|
| 1 | +#!/usr/bin/env bash |
| 2 | +set -euo pipefail |
| 3 | + |
| 4 | +# audit-grammar-security.sh — Scan vendored tree-sitter grammar scanner.c |
| 5 | +# files for dangerous patterns that could indicate malicious code. |
| 6 | +# |
| 7 | +# Grammars are third-party C source compiled into our binary. parser.c files |
| 8 | +# are auto-generated (low risk), but scanner.c files are hand-written C with |
| 9 | +# access to memory allocation and input text. |
| 10 | +# |
| 11 | +# Usage: scripts/audit-grammar-security.sh [directory] |
| 12 | +# Default: internal/cbm/vendored/grammars/ |
| 13 | + |
| 14 | +ROOT="$(cd "$(dirname "$0")/.." && pwd)" |
| 15 | +GRAMMAR_DIR="${1:-$ROOT/internal/cbm/vendored/grammars}" |
| 16 | + |
| 17 | +if [ ! -d "$GRAMMAR_DIR" ]; then |
| 18 | + echo "ERROR: directory not found: $GRAMMAR_DIR" >&2 |
| 19 | + exit 1 |
| 20 | +fi |
| 21 | + |
| 22 | +echo "=== Grammar Security Audit ===" |
| 23 | +echo "Scanning: $GRAMMAR_DIR" |
| 24 | +echo "" |
| 25 | + |
| 26 | +TOTAL=0 |
| 27 | +WARNED=0 |
| 28 | +BLOCKED=0 |
| 29 | + |
| 30 | +# Safe includes that scanners legitimately use |
| 31 | +SAFE_INCLUDES='string\.h|stdint\.h|stdbool\.h|stdlib\.h|wctype\.h|stdio\.h|stddef\.h|limits\.h|assert\.h|ctype\.h|wchar\.h|math\.h|stdalign\.h|stdarg\.h|float\.h|inttypes\.h' |
| 32 | + |
| 33 | +# Dangerous includes |
| 34 | +DANGER_INCLUDES='unistd\.h|sys/|netdb\.h|dlfcn\.h|signal\.h|spawn\.h|pthread\.h|fcntl\.h|dirent\.h|termios\.h|arpa/|netinet/' |
| 35 | + |
| 36 | +# Dangerous function calls |
| 37 | +DANGER_CALLS='system\s*\(|exec[lvpe]+\s*\(|popen\s*\(|fopen\s*\(|fwrite\s*\(|socket\s*\(|getenv\s*\(|fork\s*\(|dlopen\s*\(|connect\s*\(|bind\s*\(|listen\s*\(|accept\s*\(|sendto\s*\(|recvfrom\s*\(|mmap\s*\(|mprotect\s*\(' |
| 38 | + |
| 39 | +# Suspicious patterns |
| 40 | +SUSPICIOUS='__attribute__\s*\(\s*\(\s*constructor|__attribute__\s*\(\s*\(\s*destructor|asm\s*\(|__asm__|__asm\b' |
| 41 | + |
| 42 | +for grammar_dir in "$GRAMMAR_DIR"/*/; do |
| 43 | + name=$(basename "$grammar_dir") |
| 44 | + TOTAL=$((TOTAL + 1)) |
| 45 | + issues="" |
| 46 | + |
| 47 | + for src in "$grammar_dir"scanner.c "$grammar_dir"scanner.cc "$grammar_dir"*.h "$grammar_dir"*.inc; do |
| 48 | + [ -f "$src" ] || continue |
| 49 | + basename_src=$(basename "$src") |
| 50 | + |
| 51 | + # Skip tree_sitter/ subdirectory headers (those are standard) |
| 52 | + case "$src" in |
| 53 | + */tree_sitter/*) continue ;; |
| 54 | + esac |
| 55 | + |
| 56 | + # Check dangerous includes |
| 57 | + while IFS= read -r line; do |
| 58 | + issues="${issues} WARN $basename_src: dangerous include: $line\n" |
| 59 | + done < <(grep -nE "#include\s*<($DANGER_INCLUDES)" "$src" 2>/dev/null || true) |
| 60 | + |
| 61 | + # Check dangerous calls |
| 62 | + while IFS= read -r line; do |
| 63 | + issues="${issues} BLOCK $basename_src: dangerous call: $line\n" |
| 64 | + done < <(grep -nE "$DANGER_CALLS" "$src" 2>/dev/null || true) |
| 65 | + |
| 66 | + # Check suspicious patterns |
| 67 | + while IFS= read -r line; do |
| 68 | + issues="${issues} BLOCK $basename_src: suspicious pattern: $line\n" |
| 69 | + done < <(grep -nE "$SUSPICIOUS" "$src" 2>/dev/null || true) |
| 70 | + |
| 71 | + # Check for base64-like long encoded strings (40+ alphanumeric chars) |
| 72 | + while IFS= read -r line; do |
| 73 | + issues="${issues} WARN $basename_src: possible encoded data: $line\n" |
| 74 | + done < <(grep -nE '"[A-Za-z0-9+/]{60,}={0,2}"' "$src" 2>/dev/null || true) |
| 75 | + done |
| 76 | + |
| 77 | + if [ -n "$issues" ]; then |
| 78 | + if echo -e "$issues" | grep -q "BLOCK"; then |
| 79 | + echo "BLOCK $name:" |
| 80 | + echo -e "$issues" |
| 81 | + BLOCKED=$((BLOCKED + 1)) |
| 82 | + else |
| 83 | + echo "WARN $name:" |
| 84 | + echo -e "$issues" |
| 85 | + WARNED=$((WARNED + 1)) |
| 86 | + fi |
| 87 | + fi |
| 88 | +done |
| 89 | + |
| 90 | +echo "────────────────────────────────────────────" |
| 91 | +echo " Scanned: $TOTAL grammars" |
| 92 | +echo " Clean: $((TOTAL - WARNED - BLOCKED))" |
| 93 | +echo " Warned: $WARNED" |
| 94 | +echo " Blocked: $BLOCKED" |
| 95 | +echo "────────────────────────────────────────────" |
| 96 | + |
| 97 | +if [ "$BLOCKED" -gt 0 ]; then |
| 98 | + echo "" |
| 99 | + echo "FAILED: $BLOCKED grammar(s) have dangerous patterns. Review before vendoring." |
| 100 | + exit 1 |
| 101 | +fi |
| 102 | + |
| 103 | +echo "" |
| 104 | +echo "PASSED: No dangerous patterns found." |
| 105 | +exit 0 |
0 commit comments