Skip to content

Commit f92fd66

Browse files
DeusDatakingchencmariomeyer
committed
Port FastAPI Depends() + DLL resolve edge tracking (PR #66)
- FastAPI Depends(func_ref): scans Python function signatures for Depends() patterns and creates CALLS edges to the dependency function. Without this, auth/DI functions appear as dead code (in_degree=0). - DLL resolve: scans C/C++ source for GetProcAddress/dlsym/Resolve patterns and creates CALLS edges to synthetic stub nodes, enabling call graph tracking across DLL boundaries. - Extension scoping (#28) was already ported in 6a2b1f5. - SQL size guard (#62) not needed: our C workers use 8MB stacks. Tests: 3 new (httplink_laravel_path_filter, pipeline_fastapi_depends_edges, pipeline_dll_resolve_edges). Total: 2041. Co-Authored-By: kingchenc <kingchenc@users.noreply.github.com> Co-Authored-By: mariomeyer <mariomeyer@users.noreply.github.com>
1 parent 93d332d commit f92fd66

4 files changed

Lines changed: 479 additions & 0 deletions

File tree

src/pipeline/pass_calls.c

Lines changed: 309 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
#include "foundation/compat.h"
1717
#include "cbm.h"
1818

19+
#include "foundation/compat_regex.h"
20+
1921
#include <stdbool.h>
2022
#include <stdio.h>
2123
#include <stdlib.h>
@@ -289,5 +291,312 @@ int cbm_pipeline_pass_calls(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *file
289291
cbm_log_info("pass.done", "pass", "calls", "total", itoa_log(total_calls), "resolved",
290292
itoa_log(resolved), "unresolved", itoa_log(unresolved), "errors",
291293
itoa_log(errors));
294+
295+
/* Additional pattern-based edge passes run after normal call resolution */
296+
cbm_pipeline_pass_fastapi_depends(ctx, files, file_count);
297+
cbm_pipeline_pass_dll_resolve(ctx, files, file_count);
298+
292299
return 0;
293300
}
301+
302+
/* ── FastAPI Depends() tracking ──────────────────────────────────── */
303+
/* Scans Python function signatures for Depends(func_ref) patterns and
304+
* creates CALLS edges from the endpoint to the dependency function.
305+
* Without this, FastAPI auth/DI functions appear as dead code (in_degree=0). */
306+
307+
// NOLINTNEXTLINE(misc-include-cleaner) — cbm_file_info_t provided by standard header
308+
void cbm_pipeline_pass_fastapi_depends(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files,
309+
int file_count) {
310+
cbm_regex_t depends_re;
311+
if (cbm_regcomp(&depends_re, "Depends\\(([A-Za-z_][A-Za-z0-9_.]*)", CBM_REG_EXTENDED) != 0) {
312+
return;
313+
}
314+
315+
int edge_count = 0;
316+
for (int i = 0; i < file_count; i++) {
317+
if (files[i].language != CBM_LANG_PYTHON) {
318+
continue;
319+
}
320+
if (cbm_pipeline_check_cancel(ctx)) {
321+
break;
322+
}
323+
324+
/* Check if file has Depends call in cached extraction */
325+
CBMFileResult *result = ctx->result_cache ? ctx->result_cache[i] : NULL;
326+
if (!result) {
327+
continue;
328+
}
329+
bool has_depends = false;
330+
for (int c = 0; c < result->calls.count; c++) {
331+
if (result->calls.items[c].callee_name &&
332+
strcmp(result->calls.items[c].callee_name, "Depends") == 0) {
333+
has_depends = true;
334+
break;
335+
}
336+
}
337+
if (!has_depends) {
338+
continue;
339+
}
340+
341+
/* Read source and scan for Depends(func_ref) in function signatures */
342+
int source_len = 0;
343+
char *source = read_file(files[i].path, &source_len);
344+
if (!source) {
345+
continue;
346+
}
347+
348+
char *module_qn = cbm_pipeline_fqn_module(ctx->project_name, files[i].rel_path);
349+
350+
/* Build import map for alias resolution */
351+
const char **imp_keys = NULL;
352+
const char **imp_vals = NULL;
353+
int imp_count = 0;
354+
build_import_map(ctx, files[i].rel_path, result, &imp_keys, &imp_vals, &imp_count);
355+
356+
for (int d = 0; d < result->defs.count; d++) {
357+
CBMDefinition *def = &result->defs.items[d];
358+
if (!def->qualified_name || def->start_line == 0) {
359+
continue;
360+
}
361+
if (strcmp(def->label, "Function") != 0 && strcmp(def->label, "Method") != 0) {
362+
continue;
363+
}
364+
365+
/* Extract function signature (def line through ~15 lines for multi-line sigs) */
366+
int sig_end_line = (int)def->start_line + 15;
367+
if (def->end_line > 0 && sig_end_line > (int)def->end_line) {
368+
sig_end_line = (int)def->end_line;
369+
}
370+
371+
/* Find signature region in source */
372+
const char *p = source;
373+
int line = 1;
374+
while (*p && line < def->start_line) {
375+
if (*p == '\n') {
376+
line++;
377+
}
378+
p++;
379+
}
380+
const char *sig_start = p;
381+
while (*p && line < sig_end_line) {
382+
if (*p == '\n') {
383+
line++;
384+
}
385+
p++;
386+
/* Stop at closing paren + colon (end of Python signature) */
387+
if (p > sig_start + 1 && p[-1] == ':' && p[-2] == ')') {
388+
break;
389+
}
390+
}
391+
size_t sig_len = (size_t)(p - sig_start);
392+
char *sig = malloc(sig_len + 1);
393+
if (!sig) {
394+
continue;
395+
}
396+
memcpy(sig, sig_start, sig_len);
397+
sig[sig_len] = '\0';
398+
399+
/* Match Depends(func_ref) patterns */
400+
cbm_regmatch_t match[2];
401+
const char *scan = sig;
402+
while (cbm_regexec(&depends_re, scan, 2, match, 0) == 0) {
403+
int ref_len = match[1].rm_eo - match[1].rm_so;
404+
char func_ref[256];
405+
if (ref_len >= (int)sizeof(func_ref)) {
406+
ref_len = (int)sizeof(func_ref) - 1;
407+
}
408+
memcpy(func_ref, scan + match[1].rm_so, (size_t)ref_len);
409+
func_ref[ref_len] = '\0';
410+
411+
/* Resolve through registry */
412+
cbm_resolution_t res = cbm_registry_resolve(ctx->registry, func_ref, module_qn,
413+
imp_keys, imp_vals, imp_count);
414+
if (res.qualified_name && res.qualified_name[0] != '\0') {
415+
const cbm_gbuf_node_t *src_node =
416+
cbm_gbuf_find_by_qn(ctx->gbuf, def->qualified_name);
417+
const cbm_gbuf_node_t *tgt_node =
418+
cbm_gbuf_find_by_qn(ctx->gbuf, res.qualified_name);
419+
if (src_node && tgt_node && src_node->id != tgt_node->id) {
420+
cbm_gbuf_insert_edge(ctx->gbuf, src_node->id, tgt_node->id, "CALLS",
421+
"{\"confidence\":0.95,\"strategy\":\"fastapi_depends\""
422+
"}");
423+
edge_count++;
424+
}
425+
}
426+
scan += match[0].rm_eo;
427+
}
428+
free(sig);
429+
}
430+
431+
free(module_qn);
432+
free_import_map(imp_keys, imp_vals, imp_count);
433+
free(source);
434+
}
435+
436+
cbm_regfree(&depends_re);
437+
if (edge_count > 0) {
438+
cbm_log_info("pass.fastapi_depends", "edges", itoa_log(edge_count));
439+
}
440+
}
441+
442+
/* ── DLL resolve tracking ────────────────────────────────────────── */
443+
/* Scans C/C++ function source for dynamic DLL resolution patterns
444+
* (GetProcAddress, dlsym, Resolve) and creates CALLS edges to synthetic
445+
* stub nodes, enabling call graph tracking across DLL boundaries. */
446+
447+
// NOLINTNEXTLINE(misc-include-cleaner) — cbm_file_info_t provided by standard header
448+
void cbm_pipeline_pass_dll_resolve(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files,
449+
int file_count) {
450+
cbm_regex_t getproc_re;
451+
cbm_regex_t dlsym_re;
452+
cbm_regex_t resolve_re;
453+
454+
if (cbm_regcomp(&getproc_re,
455+
"GetProcAddress[AW]?\\([^,]+,[[:space:]]*\"([A-Za-z_][A-Za-z0-9_]*)\"",
456+
CBM_REG_EXTENDED) != 0) {
457+
return;
458+
}
459+
if (cbm_regcomp(&dlsym_re, "dlsym\\([^,]+,[[:space:]]*\"([A-Za-z_][A-Za-z0-9_]*)\"",
460+
CBM_REG_EXTENDED) != 0) {
461+
cbm_regfree(&getproc_re);
462+
return;
463+
}
464+
if (cbm_regcomp(&resolve_re, "[.>]Resolve\\([[:space:]]*\"([A-Za-z_][A-Za-z0-9_]*)\"",
465+
CBM_REG_EXTENDED) != 0) {
466+
cbm_regfree(&getproc_re);
467+
cbm_regfree(&dlsym_re);
468+
return;
469+
}
470+
471+
cbm_regex_t *patterns[] = {&getproc_re, &dlsym_re, &resolve_re};
472+
473+
int edge_count = 0;
474+
for (int i = 0; i < file_count; i++) {
475+
if (files[i].language != CBM_LANG_C && files[i].language != CBM_LANG_CPP) {
476+
continue;
477+
}
478+
if (cbm_pipeline_check_cancel(ctx)) {
479+
break;
480+
}
481+
482+
CBMFileResult *result = ctx->result_cache ? ctx->result_cache[i] : NULL;
483+
if (!result) {
484+
continue;
485+
}
486+
487+
/* Early bail: check if any call targets a DLL resolution function */
488+
bool has_dll_call = false;
489+
for (int c = 0; c < result->calls.count; c++) {
490+
const char *name = result->calls.items[c].callee_name;
491+
if (!name) {
492+
continue;
493+
}
494+
if (strcmp(name, "GetProcAddress") == 0 || strcmp(name, "GetProcAddressA") == 0 ||
495+
strcmp(name, "GetProcAddressW") == 0 || strcmp(name, "dlsym") == 0 ||
496+
strstr(name, "Resolve") != NULL) {
497+
has_dll_call = true;
498+
break;
499+
}
500+
}
501+
if (!has_dll_call) {
502+
continue;
503+
}
504+
505+
int source_len = 0;
506+
char *source = read_file(files[i].path, &source_len);
507+
if (!source) {
508+
continue;
509+
}
510+
511+
char *module_qn = cbm_pipeline_fqn_module(ctx->project_name, files[i].rel_path);
512+
513+
for (int d = 0; d < result->defs.count; d++) {
514+
CBMDefinition *def = &result->defs.items[d];
515+
if (!def->qualified_name || def->start_line == 0 || def->end_line == 0) {
516+
continue;
517+
}
518+
if (strcmp(def->label, "Function") != 0 && strcmp(def->label, "Method") != 0) {
519+
continue;
520+
}
521+
522+
/* Extract function body from source */
523+
const char *p = source;
524+
int line = 1;
525+
while (*p && line < def->start_line) {
526+
if (*p == '\n') {
527+
line++;
528+
}
529+
p++;
530+
}
531+
const char *body_start = p;
532+
while (*p && line < def->end_line) {
533+
if (*p == '\n') {
534+
line++;
535+
}
536+
p++;
537+
}
538+
size_t body_len = (size_t)(p - body_start);
539+
char *body = malloc(body_len + 1);
540+
if (!body) {
541+
continue;
542+
}
543+
memcpy(body, body_start, body_len);
544+
body[body_len] = '\0';
545+
546+
/* Match each DLL resolution pattern */
547+
for (int pi = 0; pi < 3; pi++) {
548+
cbm_regmatch_t match[2];
549+
const char *scan = body;
550+
while (cbm_regexec(patterns[pi], scan, 2, match, 0) == 0) {
551+
int fn_len = match[1].rm_eo - match[1].rm_so;
552+
char func_name[256];
553+
if (fn_len >= (int)sizeof(func_name)) {
554+
fn_len = (int)sizeof(func_name) - 1;
555+
}
556+
memcpy(func_name, scan + match[1].rm_so, (size_t)fn_len);
557+
func_name[fn_len] = '\0';
558+
559+
/* Create edge to synthetic DLL stub node */
560+
char target_qn[512];
561+
snprintf(target_qn, sizeof(target_qn), "%s.dll.external.%s", module_qn,
562+
func_name);
563+
564+
const cbm_gbuf_node_t *src_node =
565+
cbm_gbuf_find_by_qn(ctx->gbuf, def->qualified_name);
566+
if (src_node) {
567+
/* Create stub node if it doesn't exist */
568+
if (!cbm_gbuf_find_by_qn(ctx->gbuf, target_qn)) {
569+
char stub_props[256];
570+
snprintf(stub_props, sizeof(stub_props),
571+
"{\"stub\":true,\"source\":\"dll_resolve\","
572+
"\"dll_function\":\"%s\"}",
573+
func_name);
574+
cbm_gbuf_upsert_node(ctx->gbuf, "Function", func_name, target_qn,
575+
files[i].rel_path, (int)def->start_line,
576+
(int)def->start_line, stub_props);
577+
}
578+
const cbm_gbuf_node_t *tgt_node = cbm_gbuf_find_by_qn(ctx->gbuf, target_qn);
579+
if (tgt_node) {
580+
cbm_gbuf_insert_edge(ctx->gbuf, src_node->id, tgt_node->id, "CALLS",
581+
"{\"confidence\":0.85,\"strategy\":\"dll_resolve\""
582+
"}");
583+
edge_count++;
584+
}
585+
}
586+
scan += match[0].rm_eo;
587+
}
588+
}
589+
free(body);
590+
}
591+
592+
free(module_qn);
593+
free(source);
594+
}
595+
596+
cbm_regfree(&getproc_re);
597+
cbm_regfree(&dlsym_re);
598+
cbm_regfree(&resolve_re);
599+
if (edge_count > 0) {
600+
cbm_log_info("pass.dll_resolve", "edges", itoa_log(edge_count));
601+
}
602+
}

src/pipeline/pipeline_internal.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,12 @@ int cbm_pipeline_pass_definitions(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t
355355

356356
int cbm_pipeline_pass_calls(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files, int file_count);
357357

358+
/* Sub-passes called from pass_calls: pattern-based edge extraction */
359+
void cbm_pipeline_pass_fastapi_depends(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files,
360+
int file_count);
361+
void cbm_pipeline_pass_dll_resolve(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files,
362+
int file_count);
363+
358364
int cbm_pipeline_pass_usages(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files, int file_count);
359365

360366
int cbm_pipeline_pass_semantic(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files,

tests/test_httplink.c

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -780,6 +780,37 @@ TEST(httplink_linker_same_service_skip) {
780780
PASS();
781781
}
782782

783+
/* ── Laravel path filter: reject $ and : in matched paths ──────── */
784+
785+
TEST(httplink_laravel_path_filter) {
786+
cbm_route_handler_t routes[8];
787+
int n;
788+
789+
/* Cache key patterns should be filtered (contain $ or :) */
790+
n = cbm_extract_laravel_routes("fn", "proj.fn",
791+
"Cache::get('article:{$this->id}:image')", routes, 8);
792+
ASSERT_EQ(n, 0);
793+
794+
n = cbm_extract_laravel_routes("fn", "proj.fn",
795+
"Route::get(\"cache:$key\", fn() => null)", routes, 8);
796+
ASSERT_EQ(n, 0);
797+
798+
/* Valid routes should still pass (Laravel uses {param} not $param) */
799+
n = cbm_extract_laravel_routes("fn", "proj.fn",
800+
"Route::get('/api/users/{id}', 'UserController@show')", routes,
801+
8);
802+
ASSERT_EQ(n, 1);
803+
ASSERT_STR_EQ(routes[0].path, "/api/users/{id}");
804+
805+
/* Route with no special chars also passes */
806+
n = cbm_extract_laravel_routes("fn", "proj.fn",
807+
"Route::post('/api/login', 'AuthController@login')", routes, 8);
808+
ASSERT_EQ(n, 1);
809+
ASSERT_STR_EQ(routes[0].path, "/api/login");
810+
811+
PASS();
812+
}
813+
783814
/* ── Laravel module-level route extraction ─────────────────────── */
784815

785816
TEST(httplink_laravel_module_level_routes) {
@@ -863,6 +894,9 @@ SUITE(httplink) {
863894
RUN_TEST(httplink_read_source_lines);
864895
RUN_TEST(httplink_read_source_lines_missing_file);
865896

897+
/* Laravel path filter (1 test) */
898+
RUN_TEST(httplink_laravel_path_filter);
899+
866900
/* Integration (3 tests) */
867901
RUN_TEST(httplink_linker_route_nodes);
868902
RUN_TEST(httplink_linker_same_service_skip);

0 commit comments

Comments
 (0)