|
16 | 16 | #include "foundation/compat.h" |
17 | 17 | #include "cbm.h" |
18 | 18 |
|
| 19 | +#include "foundation/compat_regex.h" |
| 20 | + |
19 | 21 | #include <stdbool.h> |
20 | 22 | #include <stdio.h> |
21 | 23 | #include <stdlib.h> |
@@ -289,5 +291,312 @@ int cbm_pipeline_pass_calls(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *file |
289 | 291 | cbm_log_info("pass.done", "pass", "calls", "total", itoa_log(total_calls), "resolved", |
290 | 292 | itoa_log(resolved), "unresolved", itoa_log(unresolved), "errors", |
291 | 293 | itoa_log(errors)); |
| 294 | + |
| 295 | + /* Additional pattern-based edge passes run after normal call resolution */ |
| 296 | + cbm_pipeline_pass_fastapi_depends(ctx, files, file_count); |
| 297 | + cbm_pipeline_pass_dll_resolve(ctx, files, file_count); |
| 298 | + |
292 | 299 | return 0; |
293 | 300 | } |
| 301 | + |
| 302 | +/* ── FastAPI Depends() tracking ──────────────────────────────────── */ |
| 303 | +/* Scans Python function signatures for Depends(func_ref) patterns and |
| 304 | + * creates CALLS edges from the endpoint to the dependency function. |
| 305 | + * Without this, FastAPI auth/DI functions appear as dead code (in_degree=0). */ |
| 306 | + |
| 307 | +// NOLINTNEXTLINE(misc-include-cleaner) — cbm_file_info_t provided by standard header |
| 308 | +void cbm_pipeline_pass_fastapi_depends(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files, |
| 309 | + int file_count) { |
| 310 | + cbm_regex_t depends_re; |
| 311 | + if (cbm_regcomp(&depends_re, "Depends\\(([A-Za-z_][A-Za-z0-9_.]*)", CBM_REG_EXTENDED) != 0) { |
| 312 | + return; |
| 313 | + } |
| 314 | + |
| 315 | + int edge_count = 0; |
| 316 | + for (int i = 0; i < file_count; i++) { |
| 317 | + if (files[i].language != CBM_LANG_PYTHON) { |
| 318 | + continue; |
| 319 | + } |
| 320 | + if (cbm_pipeline_check_cancel(ctx)) { |
| 321 | + break; |
| 322 | + } |
| 323 | + |
| 324 | + /* Check if file has Depends call in cached extraction */ |
| 325 | + CBMFileResult *result = ctx->result_cache ? ctx->result_cache[i] : NULL; |
| 326 | + if (!result) { |
| 327 | + continue; |
| 328 | + } |
| 329 | + bool has_depends = false; |
| 330 | + for (int c = 0; c < result->calls.count; c++) { |
| 331 | + if (result->calls.items[c].callee_name && |
| 332 | + strcmp(result->calls.items[c].callee_name, "Depends") == 0) { |
| 333 | + has_depends = true; |
| 334 | + break; |
| 335 | + } |
| 336 | + } |
| 337 | + if (!has_depends) { |
| 338 | + continue; |
| 339 | + } |
| 340 | + |
| 341 | + /* Read source and scan for Depends(func_ref) in function signatures */ |
| 342 | + int source_len = 0; |
| 343 | + char *source = read_file(files[i].path, &source_len); |
| 344 | + if (!source) { |
| 345 | + continue; |
| 346 | + } |
| 347 | + |
| 348 | + char *module_qn = cbm_pipeline_fqn_module(ctx->project_name, files[i].rel_path); |
| 349 | + |
| 350 | + /* Build import map for alias resolution */ |
| 351 | + const char **imp_keys = NULL; |
| 352 | + const char **imp_vals = NULL; |
| 353 | + int imp_count = 0; |
| 354 | + build_import_map(ctx, files[i].rel_path, result, &imp_keys, &imp_vals, &imp_count); |
| 355 | + |
| 356 | + for (int d = 0; d < result->defs.count; d++) { |
| 357 | + CBMDefinition *def = &result->defs.items[d]; |
| 358 | + if (!def->qualified_name || def->start_line == 0) { |
| 359 | + continue; |
| 360 | + } |
| 361 | + if (strcmp(def->label, "Function") != 0 && strcmp(def->label, "Method") != 0) { |
| 362 | + continue; |
| 363 | + } |
| 364 | + |
| 365 | + /* Extract function signature (def line through ~15 lines for multi-line sigs) */ |
| 366 | + int sig_end_line = (int)def->start_line + 15; |
| 367 | + if (def->end_line > 0 && sig_end_line > (int)def->end_line) { |
| 368 | + sig_end_line = (int)def->end_line; |
| 369 | + } |
| 370 | + |
| 371 | + /* Find signature region in source */ |
| 372 | + const char *p = source; |
| 373 | + int line = 1; |
| 374 | + while (*p && line < def->start_line) { |
| 375 | + if (*p == '\n') { |
| 376 | + line++; |
| 377 | + } |
| 378 | + p++; |
| 379 | + } |
| 380 | + const char *sig_start = p; |
| 381 | + while (*p && line < sig_end_line) { |
| 382 | + if (*p == '\n') { |
| 383 | + line++; |
| 384 | + } |
| 385 | + p++; |
| 386 | + /* Stop at closing paren + colon (end of Python signature) */ |
| 387 | + if (p > sig_start + 1 && p[-1] == ':' && p[-2] == ')') { |
| 388 | + break; |
| 389 | + } |
| 390 | + } |
| 391 | + size_t sig_len = (size_t)(p - sig_start); |
| 392 | + char *sig = malloc(sig_len + 1); |
| 393 | + if (!sig) { |
| 394 | + continue; |
| 395 | + } |
| 396 | + memcpy(sig, sig_start, sig_len); |
| 397 | + sig[sig_len] = '\0'; |
| 398 | + |
| 399 | + /* Match Depends(func_ref) patterns */ |
| 400 | + cbm_regmatch_t match[2]; |
| 401 | + const char *scan = sig; |
| 402 | + while (cbm_regexec(&depends_re, scan, 2, match, 0) == 0) { |
| 403 | + int ref_len = match[1].rm_eo - match[1].rm_so; |
| 404 | + char func_ref[256]; |
| 405 | + if (ref_len >= (int)sizeof(func_ref)) { |
| 406 | + ref_len = (int)sizeof(func_ref) - 1; |
| 407 | + } |
| 408 | + memcpy(func_ref, scan + match[1].rm_so, (size_t)ref_len); |
| 409 | + func_ref[ref_len] = '\0'; |
| 410 | + |
| 411 | + /* Resolve through registry */ |
| 412 | + cbm_resolution_t res = cbm_registry_resolve(ctx->registry, func_ref, module_qn, |
| 413 | + imp_keys, imp_vals, imp_count); |
| 414 | + if (res.qualified_name && res.qualified_name[0] != '\0') { |
| 415 | + const cbm_gbuf_node_t *src_node = |
| 416 | + cbm_gbuf_find_by_qn(ctx->gbuf, def->qualified_name); |
| 417 | + const cbm_gbuf_node_t *tgt_node = |
| 418 | + cbm_gbuf_find_by_qn(ctx->gbuf, res.qualified_name); |
| 419 | + if (src_node && tgt_node && src_node->id != tgt_node->id) { |
| 420 | + cbm_gbuf_insert_edge(ctx->gbuf, src_node->id, tgt_node->id, "CALLS", |
| 421 | + "{\"confidence\":0.95,\"strategy\":\"fastapi_depends\"" |
| 422 | + "}"); |
| 423 | + edge_count++; |
| 424 | + } |
| 425 | + } |
| 426 | + scan += match[0].rm_eo; |
| 427 | + } |
| 428 | + free(sig); |
| 429 | + } |
| 430 | + |
| 431 | + free(module_qn); |
| 432 | + free_import_map(imp_keys, imp_vals, imp_count); |
| 433 | + free(source); |
| 434 | + } |
| 435 | + |
| 436 | + cbm_regfree(&depends_re); |
| 437 | + if (edge_count > 0) { |
| 438 | + cbm_log_info("pass.fastapi_depends", "edges", itoa_log(edge_count)); |
| 439 | + } |
| 440 | +} |
| 441 | + |
| 442 | +/* ── DLL resolve tracking ────────────────────────────────────────── */ |
| 443 | +/* Scans C/C++ function source for dynamic DLL resolution patterns |
| 444 | + * (GetProcAddress, dlsym, Resolve) and creates CALLS edges to synthetic |
| 445 | + * stub nodes, enabling call graph tracking across DLL boundaries. */ |
| 446 | + |
| 447 | +// NOLINTNEXTLINE(misc-include-cleaner) — cbm_file_info_t provided by standard header |
| 448 | +void cbm_pipeline_pass_dll_resolve(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files, |
| 449 | + int file_count) { |
| 450 | + cbm_regex_t getproc_re; |
| 451 | + cbm_regex_t dlsym_re; |
| 452 | + cbm_regex_t resolve_re; |
| 453 | + |
| 454 | + if (cbm_regcomp(&getproc_re, |
| 455 | + "GetProcAddress[AW]?\\([^,]+,[[:space:]]*\"([A-Za-z_][A-Za-z0-9_]*)\"", |
| 456 | + CBM_REG_EXTENDED) != 0) { |
| 457 | + return; |
| 458 | + } |
| 459 | + if (cbm_regcomp(&dlsym_re, "dlsym\\([^,]+,[[:space:]]*\"([A-Za-z_][A-Za-z0-9_]*)\"", |
| 460 | + CBM_REG_EXTENDED) != 0) { |
| 461 | + cbm_regfree(&getproc_re); |
| 462 | + return; |
| 463 | + } |
| 464 | + if (cbm_regcomp(&resolve_re, "[.>]Resolve\\([[:space:]]*\"([A-Za-z_][A-Za-z0-9_]*)\"", |
| 465 | + CBM_REG_EXTENDED) != 0) { |
| 466 | + cbm_regfree(&getproc_re); |
| 467 | + cbm_regfree(&dlsym_re); |
| 468 | + return; |
| 469 | + } |
| 470 | + |
| 471 | + cbm_regex_t *patterns[] = {&getproc_re, &dlsym_re, &resolve_re}; |
| 472 | + |
| 473 | + int edge_count = 0; |
| 474 | + for (int i = 0; i < file_count; i++) { |
| 475 | + if (files[i].language != CBM_LANG_C && files[i].language != CBM_LANG_CPP) { |
| 476 | + continue; |
| 477 | + } |
| 478 | + if (cbm_pipeline_check_cancel(ctx)) { |
| 479 | + break; |
| 480 | + } |
| 481 | + |
| 482 | + CBMFileResult *result = ctx->result_cache ? ctx->result_cache[i] : NULL; |
| 483 | + if (!result) { |
| 484 | + continue; |
| 485 | + } |
| 486 | + |
| 487 | + /* Early bail: check if any call targets a DLL resolution function */ |
| 488 | + bool has_dll_call = false; |
| 489 | + for (int c = 0; c < result->calls.count; c++) { |
| 490 | + const char *name = result->calls.items[c].callee_name; |
| 491 | + if (!name) { |
| 492 | + continue; |
| 493 | + } |
| 494 | + if (strcmp(name, "GetProcAddress") == 0 || strcmp(name, "GetProcAddressA") == 0 || |
| 495 | + strcmp(name, "GetProcAddressW") == 0 || strcmp(name, "dlsym") == 0 || |
| 496 | + strstr(name, "Resolve") != NULL) { |
| 497 | + has_dll_call = true; |
| 498 | + break; |
| 499 | + } |
| 500 | + } |
| 501 | + if (!has_dll_call) { |
| 502 | + continue; |
| 503 | + } |
| 504 | + |
| 505 | + int source_len = 0; |
| 506 | + char *source = read_file(files[i].path, &source_len); |
| 507 | + if (!source) { |
| 508 | + continue; |
| 509 | + } |
| 510 | + |
| 511 | + char *module_qn = cbm_pipeline_fqn_module(ctx->project_name, files[i].rel_path); |
| 512 | + |
| 513 | + for (int d = 0; d < result->defs.count; d++) { |
| 514 | + CBMDefinition *def = &result->defs.items[d]; |
| 515 | + if (!def->qualified_name || def->start_line == 0 || def->end_line == 0) { |
| 516 | + continue; |
| 517 | + } |
| 518 | + if (strcmp(def->label, "Function") != 0 && strcmp(def->label, "Method") != 0) { |
| 519 | + continue; |
| 520 | + } |
| 521 | + |
| 522 | + /* Extract function body from source */ |
| 523 | + const char *p = source; |
| 524 | + int line = 1; |
| 525 | + while (*p && line < def->start_line) { |
| 526 | + if (*p == '\n') { |
| 527 | + line++; |
| 528 | + } |
| 529 | + p++; |
| 530 | + } |
| 531 | + const char *body_start = p; |
| 532 | + while (*p && line < def->end_line) { |
| 533 | + if (*p == '\n') { |
| 534 | + line++; |
| 535 | + } |
| 536 | + p++; |
| 537 | + } |
| 538 | + size_t body_len = (size_t)(p - body_start); |
| 539 | + char *body = malloc(body_len + 1); |
| 540 | + if (!body) { |
| 541 | + continue; |
| 542 | + } |
| 543 | + memcpy(body, body_start, body_len); |
| 544 | + body[body_len] = '\0'; |
| 545 | + |
| 546 | + /* Match each DLL resolution pattern */ |
| 547 | + for (int pi = 0; pi < 3; pi++) { |
| 548 | + cbm_regmatch_t match[2]; |
| 549 | + const char *scan = body; |
| 550 | + while (cbm_regexec(patterns[pi], scan, 2, match, 0) == 0) { |
| 551 | + int fn_len = match[1].rm_eo - match[1].rm_so; |
| 552 | + char func_name[256]; |
| 553 | + if (fn_len >= (int)sizeof(func_name)) { |
| 554 | + fn_len = (int)sizeof(func_name) - 1; |
| 555 | + } |
| 556 | + memcpy(func_name, scan + match[1].rm_so, (size_t)fn_len); |
| 557 | + func_name[fn_len] = '\0'; |
| 558 | + |
| 559 | + /* Create edge to synthetic DLL stub node */ |
| 560 | + char target_qn[512]; |
| 561 | + snprintf(target_qn, sizeof(target_qn), "%s.dll.external.%s", module_qn, |
| 562 | + func_name); |
| 563 | + |
| 564 | + const cbm_gbuf_node_t *src_node = |
| 565 | + cbm_gbuf_find_by_qn(ctx->gbuf, def->qualified_name); |
| 566 | + if (src_node) { |
| 567 | + /* Create stub node if it doesn't exist */ |
| 568 | + if (!cbm_gbuf_find_by_qn(ctx->gbuf, target_qn)) { |
| 569 | + char stub_props[256]; |
| 570 | + snprintf(stub_props, sizeof(stub_props), |
| 571 | + "{\"stub\":true,\"source\":\"dll_resolve\"," |
| 572 | + "\"dll_function\":\"%s\"}", |
| 573 | + func_name); |
| 574 | + cbm_gbuf_upsert_node(ctx->gbuf, "Function", func_name, target_qn, |
| 575 | + files[i].rel_path, (int)def->start_line, |
| 576 | + (int)def->start_line, stub_props); |
| 577 | + } |
| 578 | + const cbm_gbuf_node_t *tgt_node = cbm_gbuf_find_by_qn(ctx->gbuf, target_qn); |
| 579 | + if (tgt_node) { |
| 580 | + cbm_gbuf_insert_edge(ctx->gbuf, src_node->id, tgt_node->id, "CALLS", |
| 581 | + "{\"confidence\":0.85,\"strategy\":\"dll_resolve\"" |
| 582 | + "}"); |
| 583 | + edge_count++; |
| 584 | + } |
| 585 | + } |
| 586 | + scan += match[0].rm_eo; |
| 587 | + } |
| 588 | + } |
| 589 | + free(body); |
| 590 | + } |
| 591 | + |
| 592 | + free(module_qn); |
| 593 | + free(source); |
| 594 | + } |
| 595 | + |
| 596 | + cbm_regfree(&getproc_re); |
| 597 | + cbm_regfree(&dlsym_re); |
| 598 | + cbm_regfree(&resolve_re); |
| 599 | + if (edge_count > 0) { |
| 600 | + cbm_log_info("pass.dll_resolve", "edges", itoa_log(edge_count)); |
| 601 | + } |
| 602 | +} |
0 commit comments