Skip to content

Commit f26acd4

Browse files
committed
Eliminate NOLINTNEXTLINE suppressions + iterative AST walkers
Remove ~480 dead per-line NOLINTNEXTLINE comments. Update .clang-tidy with documented upstream bug references for disabled checks. Convert key AST walkers to iterative with explicit stacks: walk_defs (with context-aware stack frames for class nesting), walk_throws, walk_readwrites, walk_calls, walk_usages, walk_env_accesses, extract_elixir_call, glob_match, cbm_yaml_free, rmdir_recursive, resolve_func_name, walk_variables_iter, count_branching. Reversed child push for left-to-right traversal. Disable readability-implicit-bool-conversion globally — designed for C++, flags all idiomatic C patterns (bool = x && y) with no config to allow them. Fix clang-format, implicit-bool edge cases, add test_helpers.h.
1 parent 88fea66 commit f26acd4

54 files changed

Lines changed: 595 additions & 857 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.clang-tidy

Lines changed: 38 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,32 @@
22
#
33
# ALL checks enabled. WarningsAsErrors: '*'.
44
# Check thresholds configured for idiomatic C11 (not C++).
5+
#
6+
# Globally disabled checks must have an upstream bug reference or
7+
# architectural justification. Re-evaluate on LLVM upgrades.
8+
#
9+
# Documented LLVM false positives:
10+
# bugprone-multi-level-implicit-pointer-conversion — LLVM #93959, relaxed in LLVM 19
11+
# clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling — LLVM #64027
12+
# clang-analyzer-optin.portability.UnixAPI — malloc(0) behavior, guarded in code
13+
#
14+
# Architectural:
15+
# bugprone-command-processor — popen required for git, inputs validated
16+
# cert-env33-c — popen/system needed for git commands, inputs validated
17+
#
18+
# Pending fix (phases 4-6):
19+
# bugprone-easily-swappable-parameters — Phase 5: struct params
20+
# concurrency-mt-unsafe — Phase 4: thread-safe wrappers
21+
# cert-err33-c — Phase 4: check fwrite/fseek return values
22+
# performance-no-int-to-ptr — Phase 4: union type-punning
23+
# bugprone-branch-clone — Phase 6
24+
# readability-avoid-nested-conditional-operator — Phase 6
25+
# readability-redundant-casting — Phase 6
26+
# bugprone-implicit-widening-of-multiplication-result — Phase 6
27+
# bugprone-unchecked-string-to-number-conversion — Phase 6
28+
# clang-analyzer-core.CallAndMessage — Phase 6
29+
# clang-analyzer-core.NullPointerArithm — Phase 6
30+
# clang-analyzer-security.insecureAPI.strcpy — Phase 6
531

632
Checks: >
733
-*,
@@ -16,41 +42,39 @@ Checks: >
1642
readability-*,
1743
clang-analyzer-*,
1844
-misc-no-recursion,
45+
-readability-implicit-bool-conversion,
1946
-bugprone-multi-level-implicit-pointer-conversion,
20-
-bugprone-branch-clone,
47+
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
48+
-clang-analyzer-optin.portability.UnixAPI,
49+
-clang-analyzer-security.ArrayBound,
50+
-clang-analyzer-unix.Malloc,
51+
-misc-include-cleaner,
2152
-bugprone-command-processor,
53+
-cert-env33-c,
54+
-bugprone-easily-swappable-parameters,
55+
-concurrency-mt-unsafe,
56+
-cert-err33-c,
57+
-performance-no-int-to-ptr,
58+
-bugprone-branch-clone,
2259
-readability-avoid-nested-conditional-operator,
2360
-readability-redundant-casting,
2461
-bugprone-implicit-widening-of-multiplication-result,
2562
-bugprone-unchecked-string-to-number-conversion,
26-
-bugprone-easily-swappable-parameters,
2763
-clang-analyzer-core.CallAndMessage,
28-
-clang-analyzer-security.ArrayBound,
29-
-clang-analyzer-unix.Malloc,
30-
-clang-analyzer-optin.portability.UnixAPI,
3164
-clang-analyzer-core.NullPointerArithm,
32-
-performance-no-int-to-ptr,
33-
-concurrency-mt-unsafe,
34-
-cert-err33-c,
35-
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
3665
-clang-analyzer-security.insecureAPI.strcpy,
37-
-misc-include-cleaner,
3866
3967
WarningsAsErrors: '*'
4068

4169
CheckOptions:
4270
bugprone-implicit-widening-of-multiplication-result.UseCXXStaticCastsInCppSources: false
43-
# C idiom: i, j, k, n, p, s are standard variable names
4471
readability-identifier-length.MinimumVariableNameLength: 1
4572
readability-identifier-length.MinimumParameterNameLength: 1
4673
readability-identifier-length.MinimumLoopCounterNameLength: 1
47-
# C idiom: if (!ptr), if (count) are standard
4874
readability-implicit-bool-conversion.AllowIntegerConditions: true
4975
readability-implicit-bool-conversion.AllowPointerConditions: true
50-
# Systems C uses bit shifts, buffer sizes, hash constants, protocol sizes
5176
readability-magic-numbers.IgnoredIntegerValues: "1;2;3;4;5;6;7;8;9;10;11;12;14;15;16;20;24;32;48;64;100;128;200;256;493;512;755;1000;1024;1040;2048;4096;8192;16384;32768;65536;500000;1000000;1000000000"
5277
readability-magic-numbers.IgnoredFloatingPointValues: "0.0;0.25;0.5;0.7;0.75;0.8;0.85;0.9;0.95;1.0;2.0;100.0;1e308"
53-
# Cypher parser + tree-sitter resolver have deeply nested execution logic
5478
readability-function-cognitive-complexity.Threshold: 250
5579
readability-function-size.StatementThreshold: 400
5680
readability-function-size.LineThreshold: 800

internal/cbm/ac.c

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,10 @@ typedef struct {
4545

4646
static void queue_init(Queue *q, int cap) {
4747
q->data = (int *)malloc(cap * sizeof(int));
48-
// NOLINTNEXTLINE(clang-analyzer-security.ArrayBound)
4948
q->head = q->tail = 0;
5049
q->cap = cap;
5150
}
5251
static void queue_push(Queue *q, int v) {
53-
// NOLINTNEXTLINE(clang-analyzer-security.ArrayBound)
5452
q->data[q->tail++] = v;
5553
}
5654
static int queue_pop(Queue *q) {
@@ -128,7 +126,6 @@ CBMAutomaton *cbm_ac_build(const char **patterns, const int *lengths, int count,
128126
}
129127
// Mark this state as accepting pattern p.
130128
if (p < CBM_AC_MAX_BITMASK) {
131-
// NOLINTNEXTLINE(clang-analyzer-security.ArrayBound)
132129
ac->output[state] |= (1ULL << p);
133130
}
134131
// Append to output list.
@@ -147,15 +144,12 @@ CBMAutomaton *cbm_ac_build(const char **patterns, const int *lengths, int count,
147144
int *fail = (int *)calloc(num_states, sizeof(int));
148145

149146
Queue q;
150-
// NOLINTNEXTLINE(clang-analyzer-security.ArrayBound)
151147
queue_init(&q, num_states);
152148

153149
// Depth-1 states: failure → root.
154150
for (int c = 0; c < alpha_size; c++) {
155-
// NOLINTNEXTLINE(clang-analyzer-security.ArrayBound)
156151
int s = ac->go_table[c]; // root's goto for c
157152
if (s != 0) {
158-
// NOLINTNEXTLINE(clang-analyzer-security.ArrayBound)
159153
fail[s] = 0;
160154
queue_push(&q, s);
161155
}
@@ -166,11 +160,9 @@ CBMAutomaton *cbm_ac_build(const char **patterns, const int *lengths, int count,
166160
int r = queue_pop(&q);
167161
for (int c = 0; c < alpha_size; c++) {
168162
int idx = (r * alpha_size) + c;
169-
// NOLINTNEXTLINE(clang-analyzer-security.ArrayBound)
170163
int s = ac->go_table[idx];
171164
if (s != -1) {
172165
// s exists in trie
173-
// NOLINTNEXTLINE(clang-analyzer-security.ArrayBound)
174166
fail[s] = ac->go_table[(fail[r] * alpha_size) + c];
175167
// Merge output: dictionary suffix links.
176168
ac->output[s] |= ac->output[fail[s]];
@@ -250,7 +242,6 @@ uint64_t cbm_ac_scan_bitmask(const CBMAutomaton *ac, const char *text, int text_
250242

251243
// Thread-local reusable decompression buffer to avoid repeated malloc/free.
252244
// Each goroutine gets its own OS thread (via CGo), so CBM_TLS is safe.
253-
// NOLINTNEXTLINE(clang-analyzer-optin.portability.UnixAPI)
254245
static CBM_TLS char *tls_decomp_buf = NULL;
255246
static CBM_TLS int tls_decomp_cap = 0;
256247

@@ -259,7 +250,6 @@ static char *get_decomp_buf(int needed) {
259250
free(tls_decomp_buf);
260251
// Round up to 64KB chunks for reuse.
261252
int cap = (needed + DECOMP_BUF_ALIGN_MASK) & ~DECOMP_BUF_ALIGN_MASK;
262-
// NOLINTNEXTLINE(clang-analyzer-optin.portability.UnixAPI)
263253
tls_decomp_buf = (char *)malloc((size_t)cap);
264254
tls_decomp_cap = cap;
265255
}
@@ -364,7 +354,6 @@ int cbm_ac_scan_lz4_batch(const CBMAutomaton *ac, const CBMLz4Entry *entries, in
364354
// num_names — number of names
365355
// out_matches — output buffer for (name_index, pattern_id) pairs
366356
// max_matches — capacity of out_matches
367-
// NOLINTNEXTLINE(bugprone-easily-swappable-parameters)
368357
int cbm_ac_scan_batch(const CBMAutomaton *ac, const char *names_buf, const int *name_offsets,
369358
const int *name_lengths, int num_names, CBMMatchResult *out_matches,
370359
int max_matches) {

internal/cbm/cbm.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,11 @@ static _Atomic uint64_t total_files = 0;
2727

2828
static uint64_t now_ns(void) {
2929
struct timespec ts;
30-
// NOLINTNEXTLINE(misc-include-cleaner) — clock_gettime provided by standard header
3130
cbm_clock_gettime(CLOCK_MONOTONIC, &ts);
3231
return ((uint64_t)ts.tv_sec * NSEC_PER_SEC) + (uint64_t)ts.tv_nsec;
3332
}
3433

3534
// cbm_get_profile returns accumulated parse/extract times and file count.
36-
// NOLINTNEXTLINE(bugprone-easily-swappable-parameters)
3735
void cbm_get_profile(uint64_t *parse_ns, uint64_t *extract_ns, uint64_t *files) {
3836
*parse_ns = atomic_load(&total_parse_ns);
3937
*extract_ns = atomic_load(&total_extract_ns);

internal/cbm/extract_calls.c

Lines changed: 59 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ static const char *strip_quotes(CBMArena *a, const char *text) {
4242
}
4343

4444
// Forward declarations
45-
static void walk_calls(CBMExtractCtx *ctx, TSNode node, const CBMLangSpec *spec);
45+
static void walk_calls(CBMExtractCtx *ctx, TSNode root, const CBMLangSpec *spec);
4646
static char *extract_callee_name(CBMArena *a, TSNode node, const char *source, CBMLanguage lang);
4747
static void extract_jsx_refs(CBMExtractCtx *ctx, TSNode node);
4848

@@ -276,77 +276,75 @@ static char *extract_callee_name(CBMArena *a, TSNode node, const char *source, C
276276
return NULL;
277277
}
278278

279-
// Walk AST for call nodes
280-
// NOLINTNEXTLINE(misc-no-recursion) — intentional AST tree walk
281-
static void walk_calls(CBMExtractCtx *ctx, TSNode node, const CBMLangSpec *spec) {
282-
const char *kind = ts_node_type(node);
283-
284-
if (cbm_kind_in_set(node, spec->call_node_types)) {
285-
char *callee = extract_callee_name(ctx->arena, node, ctx->source, ctx->language);
286-
if (callee && callee[0]) {
287-
// Skip keywords
288-
if (!cbm_is_keyword(callee, ctx->language)) {
289-
CBMCall call = {0};
290-
call.callee_name = callee;
291-
call.enclosing_func_qn = cbm_enclosing_func_qn_cached(ctx, node);
292-
call.first_string_arg = NULL;
293-
294-
/* Extract first string literal argument (URL, topic, key) */
295-
TSNode args = ts_node_child_by_field_name(node, "arguments", 9);
296-
if (!ts_node_is_null(args)) {
297-
uint32_t nc = ts_node_named_child_count(args);
298-
for (uint32_t ai = 0; ai < nc && ai < 3; ai++) {
299-
TSNode arg = ts_node_named_child(args, ai);
300-
const char *ak = ts_node_type(arg);
301-
if (strcmp(ak, "string") == 0 || strcmp(ak, "string_literal") == 0 ||
302-
strcmp(ak, "interpreted_string_literal") == 0 ||
303-
strcmp(ak, "raw_string_literal") == 0 ||
304-
strcmp(ak, "string_content") == 0) {
305-
char *text = cbm_node_text(ctx->arena, arg, ctx->source);
306-
if (text && text[0]) {
307-
/* Strip quotes */
308-
int len = (int)strlen(text);
309-
if (len >= 2 && (text[0] == '"' || text[0] == '\'')) {
310-
text =
311-
cbm_arena_strndup(ctx->arena, text + 1, (size_t)(len - 2));
312-
len -= 2;
313-
}
314-
/* Validate: must be printable ASCII, no control chars */
315-
// NOLINTNEXTLINE(readability-implicit-bool-conversion)
316-
bool valid = (text != NULL && len > 0 && len < 512);
317-
for (int vi = 0; vi < len && valid; vi++) {
318-
unsigned char ch = (unsigned char)text[vi];
319-
if (ch < 0x20 && ch != '\t') {
320-
valid = false;
279+
// Walk AST for call nodes (iterative)
280+
#define CALLS_STACK_CAP 512
281+
static void walk_calls(CBMExtractCtx *ctx, TSNode root, const CBMLangSpec *spec) {
282+
TSNode stack[CALLS_STACK_CAP];
283+
int top = 0;
284+
stack[top++] = root;
285+
286+
while (top > 0) {
287+
TSNode node = stack[--top];
288+
const char *kind = ts_node_type(node);
289+
290+
if (cbm_kind_in_set(node, spec->call_node_types)) {
291+
char *callee = extract_callee_name(ctx->arena, node, ctx->source, ctx->language);
292+
if (callee && callee[0]) {
293+
if (!cbm_is_keyword(callee, ctx->language)) {
294+
CBMCall call = {0};
295+
call.callee_name = callee;
296+
call.enclosing_func_qn = cbm_enclosing_func_qn_cached(ctx, node);
297+
call.first_string_arg = NULL;
298+
299+
TSNode args = ts_node_child_by_field_name(node, "arguments", 9);
300+
if (!ts_node_is_null(args)) {
301+
uint32_t nc = ts_node_named_child_count(args);
302+
for (uint32_t ai = 0; ai < nc && ai < 3; ai++) {
303+
TSNode arg = ts_node_named_child(args, ai);
304+
const char *ak = ts_node_type(arg);
305+
if (strcmp(ak, "string") == 0 || strcmp(ak, "string_literal") == 0 ||
306+
strcmp(ak, "interpreted_string_literal") == 0 ||
307+
strcmp(ak, "raw_string_literal") == 0 ||
308+
strcmp(ak, "string_content") == 0) {
309+
char *text = cbm_node_text(ctx->arena, arg, ctx->source);
310+
if (text && text[0]) {
311+
int len = (int)strlen(text);
312+
if (len >= 2 && (text[0] == '"' || text[0] == '\'')) {
313+
text = cbm_arena_strndup(ctx->arena, text + 1,
314+
(size_t)(len - 2));
315+
len -= 2;
316+
}
317+
bool valid = (text != NULL && len > 0 && len < 512);
318+
for (int vi = 0; vi < len && valid; vi++) {
319+
unsigned char ch = (unsigned char)text[vi];
320+
if (ch < 0x20 && ch != '\t') {
321+
valid = false;
322+
}
323+
}
324+
if (valid) {
325+
call.first_string_arg = text;
321326
}
322327
}
323-
if (valid) {
324-
call.first_string_arg = text;
325-
}
328+
break;
326329
}
327-
break;
328330
}
329331
}
332+
cbm_calls_push(&ctx->result->calls, ctx->arena, call);
330333
}
331-
332-
cbm_calls_push(&ctx->result->calls, ctx->arena, call);
333334
}
334335
}
335-
// Don't recurse into call arguments for nested calls — the walk handles that
336-
}
337336

338-
// JSX component refs (TSX/JSX)
339-
if (ctx->language == CBM_LANG_TSX || ctx->language == CBM_LANG_JAVASCRIPT) {
340-
if (strcmp(kind, "jsx_self_closing_element") == 0 ||
341-
strcmp(kind, "jsx_opening_element") == 0) {
342-
extract_jsx_refs(ctx, node);
337+
if (ctx->language == CBM_LANG_TSX || ctx->language == CBM_LANG_JAVASCRIPT) {
338+
if (strcmp(kind, "jsx_self_closing_element") == 0 ||
339+
strcmp(kind, "jsx_opening_element") == 0) {
340+
extract_jsx_refs(ctx, node);
341+
}
343342
}
344-
}
345343

346-
// Recurse
347-
uint32_t count = ts_node_child_count(node);
348-
for (uint32_t i = 0; i < count; i++) {
349-
walk_calls(ctx, ts_node_child(node, i), spec);
344+
uint32_t count = ts_node_child_count(node);
345+
for (int i = (int)count - 1; i >= 0 && top < CALLS_STACK_CAP; i--) {
346+
stack[top++] = ts_node_child(node, (uint32_t)i);
347+
}
350348
}
351349
}
352350

@@ -542,7 +540,6 @@ void handle_calls(CBMExtractCtx *ctx, TSNode node, const CBMLangSpec *spec, Walk
542540
len -= 2;
543541
}
544542
/* Validate printable */
545-
// NOLINTNEXTLINE(readability-implicit-bool-conversion)
546543
bool valid = (text != NULL && len > 0 && len < 512);
547544
for (int vi = 0; vi < len && valid; vi++) {
548545
if ((unsigned char)text[vi] < 0x20 && text[vi] != '\t') {

0 commit comments

Comments
 (0)