diff --git a/install.ps1 b/install.ps1 index b168962cd..aebfff54a 100644 --- a/install.ps1 +++ b/install.ps1 @@ -333,9 +333,26 @@ function Main { } Write-Host "Testing binary..." - # Test by calling --version on the downloaded binary - $testOutput = & $binaryPath --version 2>&1 - if (-not $testOutput -or $LASTEXITCODE -ne 0) { + # Test by calling --version on the downloaded binary. + # We route stderr through a temp file rather than using `2>&1`, so + # that harmless runtime warnings (e.g. requests' RequestsDependency- + # Warning) are not surfaced by PowerShell as NativeCommandError + # records and misinterpreted as failures. We still surface them to + # the user via Write-Host, and rely on $LASTEXITCODE to decide + # whether the binary actually ran. + $stderrFile = New-TemporaryFile + try { + $testOutput = & $binaryPath --version 2>$stderrFile + $testExit = $LASTEXITCODE + $testStderr = (Get-Content -Raw -ErrorAction SilentlyContinue $stderrFile) + } + finally { + Remove-Item -Force -ErrorAction SilentlyContinue $stderrFile + } + if ($testStderr) { + Write-Host $testStderr + } + if ($testExit -ne 0 -or -not $testOutput) { throw "Failed to execute installed binary: $binaryPath" } diff --git a/languages/elixir/ast/AST_elixir.ml b/languages/elixir/ast/AST_elixir.ml index 951e421ad..44db3173b 100644 --- a/languages/elixir/ast/AST_elixir.ml +++ b/languages/elixir/ast/AST_elixir.ml @@ -209,6 +209,10 @@ and expr = | DotAnon of expr * tok (* only inside Call *) | DotRemote of remote_dot + (* Elixir map/struct field access: `foo.bar` with no parens, no args, + * no do-block. Distinct from `foo.bar(...)` which is a remote call + * (encoded as `Call (DotRemote _, _, _)`). *) + | FieldAccess of remote_dot | ModuleVarAccess of tok (* @ *) * expr | ArrayAccess of expr * expr bracket (* a Call can be a thousand things, including function and module definitions diff --git a/languages/elixir/generic/Elixir_to_generic.ml b/languages/elixir/generic/Elixir_to_generic.ml index 9be473fe1..a8102d64f 100644 --- a/languages/elixir/generic/Elixir_to_generic.ml +++ b/languages/elixir/generic/Elixir_to_generic.ml @@ -118,6 +118,40 @@ let expr_of_expr_or_kwds (x : (G.expr, keywords_generic) Either_.t) : G.expr = | Left e -> e | Right kwds -> list_container_of_kwds kwds +(* This is a modified version of Ast_generic_helpers.expr_to_pattern *) +let rec expr_to_pattern (e : G.expr) : G.pattern = + match e.e with + | G.N (G.Id (id, info)) -> G.PatId (id, info) + | G.Container (G.Tuple, (t1, xs, t2)) -> + G.PatTuple (t1, List_.map expr_to_pattern xs, t2) + | G.L l -> G.PatLiteral l + | G.Container ((List | Dict), (t1, xs, t2)) -> + G.PatList (t1, List_.map expr_to_pattern xs, t2) + | G.Constructor (n, (_, args, _)) -> + G.PatConstructor (n, List_.map expr_to_pattern args) + | G.Ellipsis t -> G.PatEllipsis t + | G.OtherExpr (tag, [ G.E e ]) -> G.OtherPat (tag, [ G.P (expr_to_pattern e) ]) + | G.Cast (ty, _tok, expr) -> G.PatTyped (expr_to_pattern expr, ty) + | G.LetPattern (p, {e = G.N (G.Id (i, info)); _} ) -> G.PatAs (p, (i, info)) + | G.Call (f, args) -> + begin match f.e, Tok.unbracket args with + | G.N (G.Id (("<>", _), _) as n), + [ G.Arg ({ e = G.L (G.String _); _ } as l); + G.Arg ({ e = G.N _; _ } as r) ] -> + G.PatConstructor (n, [ expr_to_pattern l; expr_to_pattern r ]) + | G.N (G.Id (("^", _), _)), + [ G.Arg ({ e = G.N _; _ } as rhs) ] -> + let tmp = "__tmp", Tok.unsafe_fake_tok "__tmp" in + let tmp_info = G.empty_id_info ~hidden:true () in + let lhs = G.N (G.Id (tmp, tmp_info)) |> G.e in + let op = G.IdSpecial (G.Op G.Eq, Tok.unsafe_fake_tok "==") |> G.e in + let cmp = G.Call (op, Tok.unsafe_fake_bracket [ G.Arg lhs; G.Arg rhs ]) |> G.e in + G.PatWhen (G.PatId (tmp, tmp_info), cmp) + | _ -> OtherPat (("ExprToPattern", Tok.unsafe_fake_tok ""), [ G.E e ]) + end + (* TODO: PatKeyVal and more *) + | _ -> OtherPat (("ExprToPattern", Tok.unsafe_fake_tok ""), [ G.E e ]) + (* TODO: lots of work here to detect when args is really a single * pattern, or tuples *) let pat_of_args_and_when (args, when_opt) : G.pattern = @@ -129,8 +163,8 @@ let pat_of_args_and_when (args, when_opt) : G.pattern = let pats = List_.map (function - | G.OtherArg (("ArgKwdQuoted", _), [ G.E e ]) -> H.expr_to_pattern e - | arg -> H.argument_to_expr arg |> H.expr_to_pattern) + | G.OtherArg (("ArgKwdQuoted", _), [ G.E e ]) -> expr_to_pattern e + | arg -> H.argument_to_expr arg |> expr_to_pattern) args in let pat = @@ -432,7 +466,7 @@ and map_stmt env (v : stmt) : G.stmt = let comp_clauses = List_.map (fun (clause : for_clause) -> match clause with | ForGenerator (pat, tarrow, collection) -> - let pat = map_expr env pat |> H.expr_to_pattern in + let pat = map_expr env pat |> expr_to_pattern in let collection = map_expr env collection in G.CompFor (tfor, pat, tarrow, collection) | ForFilter e -> @@ -494,12 +528,12 @@ and map_param_to_gparam env (p : parameter) : G.parameter = G.Param (G.param_of_id ?pdefault id)) | OtherParamExpr e -> let e = map_expr env e in - G.ParamPattern (H.expr_to_pattern e) + G.ParamPattern (expr_to_pattern e) | OtherParamPair (kwd, e) -> let kwd = map_keyword env kwd in let e = map_expr env e in let e = keyval_of_pair (Left (kwd, e)) in - G.ParamPattern (H.expr_to_pattern e) + G.ParamPattern (expr_to_pattern e) (* Convert one rescue/catch stab clause to a G.catch arm. * Each stab has a list of exception-type expressions and a handler body. *) @@ -510,9 +544,9 @@ and map_rescue_stab_to_catch env tok (stab : stab_clause) : G.catch = | [] -> G.PatEllipsis tok | [arg] -> let e = map_expr env arg in - H.expr_to_pattern e + expr_to_pattern e | args -> - let pats = List_.map (fun a -> H.expr_to_pattern (map_expr env a)) args in + let pats = List_.map (fun a -> expr_to_pattern (map_expr env a)) args in let pat = List.fold_right (fun p acc -> G.DisjPat (p, acc)) (List.tl pats) (List.hd pats) @@ -708,10 +742,10 @@ and map_vardef env v1 v2 = (* TODO: Elixir also has these patterns: * ^x = 0 meaning x cannot be re-assigned later, and * [x|y] = [0, 1, 2] where x maps to 0, and y maps to the rest - * and H.expr_to_pattern doesn't cover these cases. + * and expr_to_pattern doesn't cover these cases. *) and map_letpattern env v1 v2 = - let e1 = H.expr_to_pattern (map_expr env v1) in + let e1 = expr_to_pattern (map_expr env v1) in let e2 = map_expr env v2 in G.LetPattern (e1, e2) |> G.e @@ -844,6 +878,10 @@ and map_expr env v : G.expr = G.OtherExpr (("DotAnon", tdot), [ G.E e ]) |> G.e (* only inside a Call *) | DotRemote v -> map_remote_dot env v + (* Elixir field access: `foo.bar` (no parens). Translate to a plain + * DotAccess so downstream analysis treats it as field access, not a + * zero-arity function call. *) + | FieldAccess v -> map_remote_dot env v | ModuleVarAccess (tat, v2) -> let e = map_expr env v2 in G.OtherExpr (("AttrExpr", tat), [ G.E e ]) |> G.e diff --git a/languages/elixir/tree-sitter/Parse_elixir_tree_sitter.ml b/languages/elixir/tree-sitter/Parse_elixir_tree_sitter.ml index 80ae8926f..beba22f72 100644 --- a/languages/elixir/tree-sitter/Parse_elixir_tree_sitter.ml +++ b/languages/elixir/tree-sitter/Parse_elixir_tree_sitter.ml @@ -546,10 +546,10 @@ and map_body (env : env) ((v1, v2, v3, v4) : CST.body) : body = let _v4 = map_terminator_opt env v4 in v2 :: v3 -and map_call (env : env) (x : CST.call) : call = +and map_call (env : env) (x : CST.call) : expr = match x with | `Call_with_parens_b98484c x -> map_call_without_parentheses env x - | `Call_with_parens_403315d x -> map_call_with_parentheses env x + | `Call_with_parens_403315d x -> Call (map_call_with_parentheses env x) and map_call_arguments_with_parentheses (env : env) ((v1, v2, v3) : CST.call_arguments_with_parentheses) : arguments bracket = @@ -627,26 +627,32 @@ and map_call_with_parentheses (env : env) (x : CST.call_with_parentheses) : call mk_call_parens (Call call1) args blopt and map_call_without_parentheses (env : env) (x : CST.call_without_parentheses) - : call = + : expr = match x with | `Local_call_with_parens (v1, v2, v3) -> let id = map_identifier env v1 in let args = map_call_arguments_without_parentheses env v2 in let blopt = map_anon_opt_opt_nl_before_do_do_blk_3eff85f env v3 in - mk_call_no_parens (Left id) args blopt + Call (mk_call_no_parens (Left id) args blopt) | `Local_call_just_do_blk (v1, v2) -> let id = map_identifier env v1 in let bl = map_do_block env v2 in - mk_call_no_parens (Left id) ([], []) (Some bl) + Call (mk_call_no_parens (Left id) ([], []) (Some bl)) | `Remote_call_with_parens (v1, v2, v3) -> let rdot = map_remote_dot env v1 in - let args : arguments = - match v2 with - | Some x -> map_call_arguments_without_parentheses env x - | None -> ([], []) - in let blopt = map_anon_opt_opt_nl_before_do_do_blk_3eff85f env v3 in - mk_call_no_parens (Right rdot) args blopt + (match v2, blopt with + | None, None -> + (* Elixir map/struct field access: `foo.bar` with no parens, + * no args, no do-block. Not a function call. *) + FieldAccess rdot + | _ -> + let args : arguments = + match v2 with + | Some x -> map_call_arguments_without_parentheses env x + | None -> ([], []) + in + Call (mk_call_no_parens (Right rdot) args blopt)) and map_capture_expression (env : env) (x : CST.capture_expression) = match x with @@ -668,7 +674,7 @@ and map_capture_expression (env : env) (x : CST.capture_expression) = | other -> other in (match actual_fun_name with - | I _ | Alias _ | DotAlias _ | DotRemote _ -> + | I _ | Alias _ | DotAlias _ | DotRemote _ | FieldAccess _ -> (* Convert &fun/arity to &(fun(&1, &2, ...)) *) let arity_int = Int64.to_int arity in (* Create PlaceHolder arguments: &1, &2, ... *) @@ -884,9 +890,7 @@ and map_expression (env : env) (x : CST.expression) : expr = | `Un_op x -> map_unary_operator env x | `Bin_op x -> map_binary_operator env x | `Dot x -> map_dot env x - | `Call x -> - let c = map_call env x in - Call c + | `Call x -> map_call env x (* semantic: transformed in Access.get/2 *) | `Access_call (v1, v2, v3, v4) -> let v1 = map_expression env v1 in diff --git a/languages/lisp/tree-sitter/Parse_clojure_tree_sitter.ml b/languages/lisp/tree-sitter/Parse_clojure_tree_sitter.ml index 908d874c6..9f8bc7bf7 100644 --- a/languages/lisp/tree-sitter/Parse_clojure_tree_sitter.ml +++ b/languages/lisp/tree-sitter/Parse_clojure_tree_sitter.ml @@ -1111,17 +1111,24 @@ and map_binding_form_map_lit (env : env) ((_meta, (lb, srcs, rb)) : CST.map_lit) in with_or_as s (token env tk) pats rest - (* Standard map binding, eg, {x :a, [y z] :b}. *) - | _bind_form :: `Kwd_lit _ :: _ -> - let rec keyval_and_rest acc = function - | bind_form :: `Kwd_lit kwd_lit :: rest_forms -> - let key = map_binding_form env bind_form in - (* TODO: PatRecord of (dotted_ident * pattern) list bracket *) + (* Standard map binding, eg, {x :a, [y z] :b, z "str-key"}. *) + | _bind_form :: (`Kwd_lit _ | `Str_lit _) :: _ -> + let map_value_key_pattern = function + | `Kwd_lit kwd_lit -> let atom_kind, tok_colon, atom_name = map_kwd_expr_aux env kwd_lit in - let value = G.OtherPat ((atom_kind, tok_colon), [G.Name atom_name]) in - (* let value = G.PatLiteral (map_kwd_lit env kwd_lit) in *) + G.OtherPat ((atom_kind, tok_colon), [G.Name atom_name]) + | `Str_lit str_tok -> + let s, t = H.str env str_tok in + let s_no_quotes = String.sub s 1 (String.length s - 2) in + G.PatLiteral (G.String (Tok.unsafe_fake_bracket (s_no_quotes, t))) + in + let rec keyval_and_rest acc = function + | bind_form :: (`Kwd_lit _ | `Str_lit _ as kv) :: rest_forms -> + let key = map_binding_form env bind_form in + (* TODO: PatRecord of (dotted_ident * pattern) list bracket *) + let value = map_value_key_pattern kv in keyval_and_rest (G.PatKeyVal (key, value) :: acc) rest_forms diff --git a/libs/ast_generic/AST_generic_helpers.ml b/libs/ast_generic/AST_generic_helpers.ml index ea1ac9ca0..d24f8b09c 100644 --- a/libs/ast_generic/AST_generic_helpers.ml +++ b/libs/ast_generic/AST_generic_helpers.ml @@ -199,9 +199,7 @@ let rec expr_to_pattern e = | Container (Tuple, (t1, xs, t2)) -> PatTuple (t1, xs |> List_.map expr_to_pattern, t2) | L l -> PatLiteral l - | Container (List, (t1, xs, t2)) -> - PatList (t1, xs |> List_.map expr_to_pattern, t2) - | Container (Dict, (t1, xs, t2)) -> + | Container ((List | Dict), (t1, xs, t2)) -> PatList (t1, xs |> List_.map expr_to_pattern, t2) | Constructor (n, (_, args, _)) -> PatConstructor (n, args |> List_.map expr_to_pattern) diff --git a/scripts/build-nuitka.sh b/scripts/build-nuitka.sh index 2d4a35260..e04f76cde 100755 --- a/scripts/build-nuitka.sh +++ b/scripts/build-nuitka.sh @@ -60,6 +60,7 @@ pushd cli --include-data-dir="$SRC_SEMGREP_DIR/templates=semgrep/templates" \ --include-data-file="$SRC_SEMGREP_DIR/semgrep_interfaces/lang.json=semgrep/semgrep_interfaces/lang.json" \ --include-data-file="$SRC_SEMGREP_DIR/semgrep_interfaces/rule_schema_v1.yaml=semgrep/semgrep_interfaces/rule_schema_v1.yaml" \ + --include-package=charset_normalizer \ --no-deployment-flag=self-execution \ --windows-icon-from-ico=spec/opengrep.ico \ --linux-icon=spec/opengrep.ico \ diff --git a/src/analyzing/AST_to_IL.ml b/src/analyzing/AST_to_IL.ml index 31762f98e..2de6d511a 100644 --- a/src/analyzing/AST_to_IL.ml +++ b/src/analyzing/AST_to_IL.ml @@ -45,9 +45,6 @@ let log_error ?tok msg : unit = Log.err (fun m -> m "%s" (locate ?tok msg)) (*****************************************************************************) (* Types *) (*****************************************************************************) -module IdentSet = Set.Make (String) - -type ctx = { entity_names : IdentSet.t } type stmts = stmt list type rec_point_lvals = @@ -63,18 +60,14 @@ type env = { break_labels : label list; cont_label : label option; rec_point_label : label option; - ctx : ctx; rec_point_lvals : rec_point_lvals option; inside_function : bool; } -let empty_ctx : ctx = { entity_names = IdentSet.empty } - let empty_env (lang : Lang.t) : env = { break_labels = []; cont_label = None; rec_point_label = None; - ctx = empty_ctx; rec_point_lvals = None; inside_function = false; lang } @@ -257,8 +250,6 @@ let mk_class_constructor_name (ty : G.type_) cons_id_info : G.name option = Some (G.Id (id, cons_id_info)) | __else__ -> None -let add_entity_name ctx ident : ctx = - { entity_names = IdentSet.add (H.str_of_ident ident) ctx.entity_names } let def_expr_evaluates_to_value (lang : Lang.t) : bool = match lang with @@ -411,6 +402,11 @@ and pattern env pat : stmts * lval * stmts = [G.Name _atom_name])) when env.lang =*= Lang.Clojure -> pattern env key_pat + (* Clojure string-key destructuring, e.g. `(let [{x "a"} o] x)`. The value + * is a string literal used as the map lookup key; only `key_pat` binds. *) + | G.PatKeyVal (key_pat, G.PatLiteral (G.String _)) + when env.lang =*= Lang.Clojure -> + pattern env key_pat (* Only seems to be used in Ruby, modulo the above case for Clojure. *) | G.PatKeyVal (_key_pat, val_pat) when env.lang =*= Lang.Ruby -> (* My understanding is that the new variables are introduced on the rhs. *) @@ -853,6 +849,17 @@ and expr_aux env ?(void = false) g_expr : stmts * exp = |> G.e) ] in call_generic env ~void tok eorig e (Tok.unsafe_fake_bracket arg_container) + (* Ruby do-block flattening: `f(args) do |x| ... end` is parsed as + Call(Call(f, args), [Lambda]) but the block is semantically an argument + to f, not to its return value. Flatten into Call(f, args @ [Lambda]). *) + | G.Call ({ e = G.Call (callee, inner_args); _ }, + (_, ([ G.Arg { G.e = G.Lambda _; _ } ] as outer_arg), _ )) + when env.lang =*= Lang.Ruby -> + let merged_args = + Tok.unsafe_fake_bracket + (Tok.unbracket inner_args @ outer_arg) + in + expr_aux env ~void (G.Call (callee, merged_args) |> G.e) | G.Call (e, args) -> let tok = G.fake "call" in call_generic env ~void tok eorig e args @@ -869,23 +876,7 @@ and expr_aux env ?(void = false) g_expr : stmts * exp = | G.ArrayAccess (_, _) | G.DeRef (_, _) -> let ss_lv, lval = lval env g_expr in - let exp = mk_e (Fetch lval) eorig in - let ident_function_call_hack ss exp = - (* Taking into account Ruby's ability to allow function calls without - * parameters or parentheses, we are conducting a check to determine - * if a function with the same name as the identifier exists, specifically - * for Ruby. *) - match lval with - | { base = Var { ident; id_info; _ }; _ } - when env.lang =*= Lang.Ruby - && Option.is_none !(id_info.id_resolved) - && IdentSet.mem (H.str_of_ident ident) env.ctx.entity_names -> - let tok = G.fake "call" in - let call_ss, call_exp = call_instr tok eorig ~void (fun res -> Call (res, exp, [])) in - (ss @ call_ss, call_exp) - | _ -> (ss, exp) - in - ident_function_call_hack ss_lv exp + (ss_lv, mk_e (Fetch lval) eorig) (* x = ClassName(args ...) in Python *) (* ClassName has been resolved to __init__ by the pro engine. *) (* Identified and treated as x = New ClassName(args ...) to support @@ -983,7 +974,7 @@ and expr_aux env ?(void = false) g_expr : stmts * exp = | G.Comprehension (_op, (_l, (er, clauses), _r)) -> comprehension env er clauses | G.Lambda fdef -> - let lval = fresh_lval (snd fdef.fkind) in + let lval = fresh_lval ~str:"_tmp_lambda" (snd fdef.fkind) in let final_fdef = (* NOTE: Reset control-flow labels so that break/continue/recur from * the enclosing scope don't bleed into the lambda body. *) @@ -1403,7 +1394,7 @@ and record env ((_tok, origfields, _) as record_def) : stmts * exp = (* Some languages such as javascript allow function definitions in object literal syntax. *) | G.FuncDef fdef -> - let lval = fresh_lval (snd fdef.fkind) in + let lval = fresh_lval ~str:"_tmp_lambda" (snd fdef.fkind) in (* See NOTE about resetting control-flow labels for lambdas. *) let fdef = function_definition @@ -2610,8 +2601,8 @@ and function_definition env fdef : function_definition = (* Entry points *) (****************************************************************************) -let function_definition lang ?ctx fdef : function_definition = - let env = { (empty_env lang) with ctx = ctx ||| empty_ctx } in +let function_definition lang fdef : function_definition = + let env = empty_env lang in function_definition env fdef let stmt lang st : stmts = diff --git a/src/analyzing/AST_to_IL.mli b/src/analyzing/AST_to_IL.mli index 3477658d2..3e6ce6a22 100644 --- a/src/analyzing/AST_to_IL.mli +++ b/src/analyzing/AST_to_IL.mli @@ -1,11 +1,5 @@ -type ctx - -val empty_ctx : ctx -val add_entity_name : ctx -> AST_generic.ident -> ctx - val function_definition : Lang.t -> - ?ctx:ctx -> AST_generic.function_definition -> IL.function_definition diff --git a/src/analyzing/CFG_build.ml b/src/analyzing/CFG_build.ml index 3d61c368d..0cb6282e9 100644 --- a/src/analyzing/CFG_build.ml +++ b/src/analyzing/CFG_build.ml @@ -456,6 +456,6 @@ and cfg_of_fdef fdef = mark_at_exit_nodes cfg; IL.{ params = fdef.fparams; cfg; lambdas } -let cfg_of_gfdef lang ?ctx fdef = - let fdef_il = AST_to_IL.function_definition lang ?ctx fdef in +let cfg_of_gfdef lang fdef = + let fdef_il = AST_to_IL.function_definition lang fdef in cfg_of_fdef fdef_il diff --git a/src/analyzing/CFG_build.mli b/src/analyzing/CFG_build.mli index 4305da9af..4d56b6361 100644 --- a/src/analyzing/CFG_build.mli +++ b/src/analyzing/CFG_build.mli @@ -9,5 +9,5 @@ val cfg_of_fdef : IL.function_definition -> IL.fun_cfg (** Compute the control flow graph of an IL function definition. *) val cfg_of_gfdef : - Lang.t -> ?ctx:AST_to_IL.ctx -> AST_generic.function_definition -> IL.fun_cfg + Lang.t -> AST_generic.function_definition -> IL.fun_cfg (** Same as 'cfg_of_fdef' but takes a Generic function definition. *) diff --git a/src/analyzing/Visit_function_defs.ml b/src/analyzing/Visit_function_defs.ml index f7e3bcf8f..98917f93e 100644 --- a/src/analyzing/Visit_function_defs.ml +++ b/src/analyzing/Visit_function_defs.ml @@ -251,6 +251,7 @@ class ['self] visitor_with_parent_path = self#visit_stmt f body) | G.DefStmt (ent, G.VarDef { vinit = Some { e = G.Lambda fdef; _ }; _ }) -> + (* Handle lambda assignments in class fields *) let class_il = Option.bind !current_class g_name_to_il_name in let func_il = entity_to_il_name ent in let visitor_parent_path, current_fn_id = diff --git a/src/call_graph/Call_graph.ml b/src/call_graph/Call_graph.ml index bb14a9142..624cf0600 100644 --- a/src/call_graph/Call_graph.ml +++ b/src/call_graph/Call_graph.ml @@ -63,11 +63,6 @@ module Dot = Graph.Graphviz.Dot (Display) module Topo = Graph.Topological.Make (G) module SCC = Graph.Components.Make (G) -let node_key (n : node) = - let name = Function_id.show n in - let filename, line, col = Function_id.to_file_line_col n in - Printf.sprintf "%s|%s|%d|%d" name filename line col - (** Helpers **) let pos_of_tok (tok : Tok.t) : Pos.t = diff --git a/src/call_graph/Function_id.ml b/src/call_graph/Function_id.ml index 991fcc636..4ac5a332e 100644 --- a/src/call_graph/Function_id.ml +++ b/src/call_graph/Function_id.ml @@ -19,13 +19,15 @@ type t = IL.ident let normalize_file (file : Fpath.t) : string = Fpath.to_string (Fpath.normalize file) +(* Extract position info whenever the token has it, whether the token is + real or fake. This keeps same-name functions defined in different files + (e.g. class-init or top-level nodes for same-basename files) distinct in + the call graph. Upstream's lambda-only specialization would collapse + non-lambda fake-token nodes across files. *) let key ((id, tok) : t) = match Tok.loc_of_tok tok with | Ok loc -> - let file = loc.pos.file in - let line = loc.pos.line in - let col = loc.pos.column in - (id, normalize_file file, line, col) + (id, normalize_file loc.Tok.pos.file, loc.Tok.pos.line, loc.Tok.pos.column) | Error _ -> (id, "", 0, 0) let hash (v : t) = Hashtbl.hash (key v) @@ -64,8 +66,12 @@ let show_debug (id, tok) : string = let of_il_name (n : IL.name) : t = n.IL.ident +(* Unlike [key], we don't gate on is_lambda_name here: this is only used for + display/serialization, not identity, so extracting position from any fake + token that has it is strictly better than returning "unknown". *) let to_file_line_col ((_, tok) : t) : string * int * int = - match Tok.loc_of_tok tok with - | Ok loc -> - (normalize_file loc.pos.file, loc.pos.line, loc.pos.column) - | Error _ -> ("unknown", 0, 0) + if Tok.is_fake tok then + match Tok.loc_of_tok tok with + | Ok loc -> (normalize_file loc.Tok.pos.file, loc.Tok.pos.line, loc.Tok.pos.column) + | _ -> ("unknown", 0, 0) + else (normalize_file (Tok.file_of_tok tok), Tok.line_of_tok tok, Tok.col_of_tok tok) diff --git a/src/engine/Match_tainting_mode.ml b/src/engine/Match_tainting_mode.ml index 11499c142..dbfe40dde 100644 --- a/src/engine/Match_tainting_mode.ml +++ b/src/engine/Match_tainting_mode.ml @@ -70,7 +70,6 @@ type project_target = { ast : G.program; taint_inst : Taint_rule_inst.t; spec_matches : Match_taint_spec.spec_matches; - ctx : AST_to_IL.ctx; object_mappings : (G.name * G.name) list; info_map : fun_info Shape_and_sig.FunctionMap.t; } @@ -536,9 +535,9 @@ let pms_of_effect ~match_on (effect_ : Effect.t) = (* Main entry points *) (*****************************************************************************) -let check_fundef (taint_inst : Taint_rule_inst.t) (name : IL.name) ctx ?glob_env ?class_name +let check_fundef (taint_inst : Taint_rule_inst.t) (name : IL.name) ?glob_env ?class_name ?signature_db ?builtin_signature_db ?call_graph fdef = - let fdef = AST_to_IL.function_definition taint_inst.lang ~ctx fdef in + let fdef = AST_to_IL.function_definition taint_inst.lang fdef in let fcfg = CFG_build.cfg_of_fdef fdef in let in_env, env_effects = Taint_input_env.mk_fun_input_env taint_inst ?glob_env fdef.fparams @@ -610,7 +609,7 @@ let prepend_default_assignments defaults (fbody : G.function_body) : let prologue = defaults |> List_.map default_assignment_stmt in G.FBStmt (G.Block (Tok.unsafe_fake_bracket (prologue @ [ body_stmt ])) |> G.s) -let extract_single_arity_signatures ~(lang : Lang.t) ~(ctx : AST_to_IL.ctx) +let extract_single_arity_signatures ~(lang : Lang.t) ~(taint_inst : Taint_rule_inst.t) ~(ast : G.program) ?(builtin_signature_db : Shape_and_sig.builtin_signature_database option) ~(call_graph : Call_graph.G.t) (info : fun_info) @@ -652,7 +651,7 @@ let extract_single_arity_signatures ~(lang : Lang.t) ~(ctx : AST_to_IL.ctx) } in let fdef_il = - AST_to_IL.function_definition lang ~ctx synthetic_fdef + AST_to_IL.function_definition lang synthetic_fdef in let cfg = CFG_build.cfg_of_fdef fdef_il in extract_signature_for cfg kept_arity acc) @@ -771,18 +770,7 @@ let extract_multi_arity_cases (fdef : G.function_definition) : | _ -> Some sorted) | _ -> None -let build_ast_ctx (ast : G.program) : AST_to_IL.ctx = - let ctx = ref AST_to_IL.empty_ctx in - Visit_function_defs.visit - (fun opt_ent _fdef -> - match opt_ent with - | Some { name = EN (Id (n, _)); _ } -> - ctx := AST_to_IL.add_entity_name !ctx n - | __else__ -> ()) - ast; - !ctx - -let collect_fun_info_map ~(lang : Lang.t) ~(ctx : AST_to_IL.ctx) +let collect_fun_info_map ~(lang : Lang.t) (ast : G.program) : fun_info Shape_and_sig.FunctionMap.t = let add_info info info_map = let fn_id = Function_id.of_il_name info.name in @@ -806,7 +794,7 @@ let collect_fun_info_map ~(lang : Lang.t) ~(ctx : AST_to_IL.ctx) | _ -> None in let fdef_il = - AST_to_IL.function_definition lang ~ctx fdef + AST_to_IL.function_definition lang fdef in let cfg = CFG_build.cfg_of_fdef fdef_il in let info = @@ -850,7 +838,7 @@ let collect_fun_info_map ~(lang : Lang.t) ~(ctx : AST_to_IL.ctx) [] in let fdef_il = - AST_to_IL.function_definition lang ~ctx fdef + AST_to_IL.function_definition lang fdef in let cfg = CFG_build.cfg_of_fdef fdef_il in let info = @@ -866,7 +854,7 @@ let collect_fun_info_map ~(lang : Lang.t) ~(ctx : AST_to_IL.ctx) add_info info info_map)) Shape_and_sig.FunctionMap.empty ast -let add_signatures_for_fun_info ~(lang : Lang.t) ~(ctx : AST_to_IL.ctx) +let add_signatures_for_fun_info ~(lang : Lang.t) ~(taint_inst : Taint_rule_inst.t) ~(ast : G.program) ?(builtin_signature_db : Shape_and_sig.builtin_signature_database option) ~(call_graph : Call_graph.G.t) (info : fun_info) @@ -885,7 +873,7 @@ let add_signatures_for_fun_info ~(lang : Lang.t) ~(ctx : AST_to_IL.ctx) } in let fdef_il = - AST_to_IL.function_definition lang ~ctx synthetic_fdef + AST_to_IL.function_definition lang synthetic_fdef in let cfg = CFG_build.cfg_of_fdef fdef_il in let db', _sig = @@ -898,16 +886,16 @@ let add_signatures_for_fun_info ~(lang : Lang.t) ~(ctx : AST_to_IL.ctx) db') db arity_cases | None -> - extract_single_arity_signatures ~lang ~ctx ~taint_inst ~ast + extract_single_arity_signatures ~lang ~taint_inst ~ast ?builtin_signature_db ~call_graph info db -let check_function_defs_for_matches ~(lang : Lang.t) ~(ctx : AST_to_IL.ctx) +let check_function_defs_for_matches ~(lang : Lang.t) ~(taint_inst : Taint_rule_inst.t) ~(glob_env : Taint_lval_env.t) ?(signature_db : Shape_and_sig.signature_database option) ?(builtin_signature_db : Shape_and_sig.builtin_signature_database option) ?(call_graph : Call_graph.G.t option) ~(record_matches : Shape_and_sig.Effects.t -> unit) (ast : G.program) : unit = - let info_map = collect_fun_info_map ~lang ~ctx ast in + let info_map = collect_fun_info_map ~lang ast in Shape_and_sig.FunctionMap.iter (fun _fn_id info -> if not info.is_lambda_assignment then ( @@ -919,7 +907,7 @@ let check_function_defs_for_matches ~(lang : Lang.t) ~(ctx : AST_to_IL.ctx) --------------------" (IL.str_of_name info.name)); let _flow, fdef_effects, _mapping = - check_fundef taint_inst info.name ctx ~glob_env + check_fundef taint_inst info.name ~glob_env ?class_name:info.class_name_str ?signature_db ?builtin_signature_db ?call_graph info.fdef in @@ -948,19 +936,17 @@ let build_interfile_rule_context (xconf : Match_env.xconfig) ~per_file_formula_cache ~require_source_sink:false xconf lang file (ast, []) rule in - let ctx = build_ast_ctx ast in let object_mappings = Taint_signature_extractor.detect_object_initialization ast taint_inst.lang in - let info_map = collect_fun_info_map ~lang ~ctx ast in + let info_map = collect_fun_info_map ~lang ast in Some { xtarget; ast; taint_inst; spec_matches; - ctx; object_mappings; info_map; }) @@ -1033,7 +1019,7 @@ let build_interfile_rule_context (xconf : Match_env.xconfig) match Shape_and_sig.FunctionMap.find_opt node project_info_map with | None -> db | Some { target; info } -> - add_signatures_for_fun_info ~lang ~ctx:target.ctx + add_signatures_for_fun_info ~lang ~taint_inst:target.taint_inst ~ast:target.ast ?builtin_signature_db:(Some builtin_signature_db) ~call_graph:relevant_graph info db) @@ -1119,8 +1105,6 @@ let check_rule per_file_formula_cache (rule : R.taint_rule) match_hook with | None -> (None, None) | Some (taint_inst, spec_matches, expls) -> - let ctx = build_ast_ctx ast in - let file_glob_env, glob_effects = Taint_input_env.mk_file_env taint_inst ast in let imported_glob_env = match interfile_rule_context with @@ -1153,7 +1137,7 @@ let check_rule per_file_formula_cache (rule : R.taint_rule) match_hook * and added to the signature database after IL conversion *) (* Collect function metadata and prepare call graph based ordering. *) - let info_map = collect_fun_info_map ~lang ~ctx ast in + let info_map = collect_fun_info_map ~lang ast in (* Use object mappings from Object_initialization.ml *) let all_object_mappings = object_mappings in let initial_signature_db = @@ -1238,17 +1222,48 @@ let check_rule per_file_formula_cache (rule : R.taint_rule) match_hook let run_check_fundef_if_needed (info : fun_info) (updated_db : Shape_and_sig.signature_database) : Shape_and_sig.signature_database = - if info.is_lambda_assignment then updated_db - else begin - let _flow, fdef_effects, _mapping = - check_fundef taint_inst info.name ctx ~glob_env - ?class_name:info.class_name_str ~signature_db:updated_db - ?builtin_signature_db - ?call_graph:(Some relevant_graph) info.fdef - in - record_matches fdef_effects; - updated_db - end + let _flow, fdef_effects, _mapping = + check_fundef taint_inst info.name ~glob_env + ?class_name:info.class_name_str ~signature_db:updated_db + ?builtin_signature_db + ?call_graph:(Some relevant_graph) info.fdef + in + (* For lambda assignments we only record "unconditional" ToSink + effects — those where the taint at the sink comes from a + concrete pattern-source match (e.g. a parameter declared as a + source via `pattern-inside: function $X(..., $RES, ...) {...}`). + Effects whose taint is purely parameterized (BArg) still ride + through the signature at resolved call sites; effects mixing + both get an Src-only slice surfaced here. *) + let keep_src_toSink_only (eff : Effect.t) : Effect.t option = + match eff with + | Effect.ToSink si -> + let items, precond = si.taints_with_precondition in + let src_items = + List.filter + (fun (i : Effect.taint_to_sink_item) -> + match i.taint.orig with + | Taint.Src _ -> true + | _ -> false) + items + in + if List_.null src_items then None + else + Some + (Effect.ToSink + { + si with + taints_with_precondition = (src_items, precond); + }) + | _ -> None + in + let effects_to_record = + if info.is_lambda_assignment then + Effects.filter_map keep_src_toSink_only fdef_effects + else fdef_effects + in + record_matches effects_to_record; + updated_db in let process_fun_info info db = @@ -1267,8 +1282,7 @@ let check_rule per_file_formula_cache (rule : R.taint_rule) match_hook } in let fdef_il = - AST_to_IL.function_definition lang ~ctx - synthetic_fdef + AST_to_IL.function_definition lang synthetic_fdef in let cfg = CFG_build.cfg_of_fdef fdef_il in let db', _sig = @@ -1284,7 +1298,7 @@ let check_rule per_file_formula_cache (rule : R.taint_rule) match_hook run_check_fundef_if_needed info updated_db | None -> let updated_db = - extract_single_arity_signatures ~lang ~ctx ~taint_inst ~ast + extract_single_arity_signatures ~lang ~taint_inst ~ast ?builtin_signature_db ~call_graph:relevant_graph info db in run_check_fundef_if_needed info updated_db @@ -1328,7 +1342,7 @@ let check_rule per_file_formula_cache (rule : R.taint_rule) match_hook missing signatures from the full graph. Top-level direct calls and callback sources can otherwise be skipped because the call graph is oriented callee -> caller. *) - add_signatures_for_fun_info ~lang ~ctx ~taint_inst ~ast + add_signatures_for_fun_info ~lang ~taint_inst ~ast ?builtin_signature_db ~call_graph info db) info_map signature_db_after_order in @@ -1362,7 +1376,7 @@ let check_rule per_file_formula_cache (rule : R.taint_rule) match_hook --------------------" (IL.str_of_name name)); let _flow, fdef_effects, _mapping = - check_fundef taint_inst name ctx ~glob_env + check_fundef taint_inst name ~glob_env ?builtin_signature_db fdef in record_matches fdef_effects) @@ -1370,7 +1384,7 @@ let check_rule per_file_formula_cache (rule : R.taint_rule) match_hook (None, None, false) in if needs_function_match_pass then - check_function_defs_for_matches ~lang ~ctx ~taint_inst ~glob_env + check_function_defs_for_matches ~lang ~taint_inst ~glob_env ?signature_db:final_signature_db ?builtin_signature_db ?call_graph:relevant_graph ~record_matches ast; diff --git a/src/engine/Match_tainting_mode.mli b/src/engine/Match_tainting_mode.mli index 341685e72..aa49f702e 100644 --- a/src/engine/Match_tainting_mode.mli +++ b/src/engine/Match_tainting_mode.mli @@ -33,7 +33,6 @@ val build_interfile_contexts : val check_fundef : Taint_rule_inst.t -> IL.name (** entity being analyzed *) -> - AST_to_IL.ctx -> ?glob_env:Taint_lval_env.t -> ?class_name:string -> ?signature_db:Shape_and_sig.signature_database -> diff --git a/src/engine/tests/Test_dataflow_tainting.ml b/src/engine/tests/Test_dataflow_tainting.ml index 240f264aa..bc0586cd7 100644 --- a/src/engine/tests/Test_dataflow_tainting.ml +++ b/src/engine/tests/Test_dataflow_tainting.ml @@ -32,7 +32,7 @@ let test_tainting taint_inst def = let fcfg, _effects_IGNORED, mapping = Match_tainting_mode.check_fundef taint_inst test_name - AST_to_IL.empty_ctx def + def in DataflowX.display_mapping fcfg.cfg mapping Taint_lval_env.to_string diff --git a/src/parsing/Disambiguate_ruby_calls.ml b/src/parsing/Disambiguate_ruby_calls.ml index 8f40031d1..f0c190a2f 100644 --- a/src/parsing/Disambiguate_ruby_calls.ml +++ b/src/parsing/Disambiguate_ruby_calls.ml @@ -23,8 +23,16 @@ class ['self] visitor = method! visit_expr_kind env ek = match ek with - (* Do not recurse into the callee of a Call -- only visit arguments. *) + (* Visit the callee of a Call unless it is a bare N(Id(...)) — + visiting that would wrap it in another Call, producing a spurious + Call(Call(f, []), args). For compound callees (DotAccess, + ArrayAccess, etc.) we DO recurse so that nested bare identifiers + like `helper` in `helper.process()` get properly wrapped. *) + | Call ({ e = N (Id _); _ } as callee, args) -> + let args = self#visit_arguments env args in + Call (callee, args) | Call (callee, args) -> + let callee = self#visit_expr env callee in let args = self#visit_arguments env args in Call (callee, args) (* Bare unresolved lowercase identifier -- wrap in a zero-arg Call. *) diff --git a/src/tainting/Dataflow_tainting.ml b/src/tainting/Dataflow_tainting.ml index b7b9e08b6..1d6114701 100644 --- a/src/tainting/Dataflow_tainting.ml +++ b/src/tainting/Dataflow_tainting.ml @@ -961,6 +961,17 @@ let lambdas_to_analyze_in_node env lambdas node = in Option.to_list unused_lambda_def @ lambdas_used_in_node lambdas node +(* Collect ALL lambdas recursively from a fun_cfg, in innermost-first order. + This ensures nested lambda signatures are extracted before their parents. *) +let rec collect_all_lambdas_innermost_first (fun_cfg : IL.fun_cfg) + : (IL.name * IL.fun_cfg) list = + IL.NameMap.fold (fun name lcfg results -> + (* First collect nested lambdas from this lambda *) + let nested = collect_all_lambdas_innermost_first lcfg in + (* Then add this lambda after its nested ones *) + results @ nested @ [(name, lcfg)] + ) fun_cfg.lambdas [] + (*****************************************************************************) (* Miscellaneous *) (*****************************************************************************) @@ -1722,7 +1733,35 @@ let check_function_call ?receiver_lval env fun_exp args (Display_IL.string_of_exp fun_exp) arity env.taint_inst.options.taint_intrafile); let sig_result = - if env.taint_inst.options.taint_intrafile then lookup_signature env fun_exp arity + if env.taint_inst.options.taint_intrafile then + let from_db = lookup_signature env fun_exp arity in + match from_db with + | Some _ -> from_db + | None -> + (* lookup_signature failed - check if callee has a Fun shape in lval_env. + * This handles two cases: + * callback(source()) -- direct call, lval = callback + * callback.run(source()) -- invoke method, lval = callback.run + * For invoke methods (e.g. Java Runnable.run), strip the method offset + * and look up the base variable. *) + (match fun_exp.e with + | Fetch lval -> + let lval_to_check = + let invoke_methods = (Lang_config.get env.taint_inst.lang).invoke_methods in + match lval.rev_offset with + | [{ o = Dot method_name; _ }] + when List.mem (fst method_name.ident) invoke_methods -> + { lval with rev_offset = [] } + | _ -> lval + in + (match Lval_env.find_lval env.lval_env lval_to_check with + | Some (S.Cell (_, S.Fun fun_sig)) -> + Log.debug (fun m -> + m "SIG_FROM_SHAPE: Found Fun shape for %s" + (Display_IL.string_of_exp fun_exp)); + Some fun_sig + | _ -> None) + | _ -> None) else None in match sig_result with @@ -2346,16 +2385,12 @@ let call_with_intrafile lval_opt e env args instr = * In this case we return empty taints - the callback's return will be handled * when the ToSinkInCall effect is instantiated. *) let is_method_callback_invoke = - (* Check if this is a method call pattern on a callback parameter *) - match env.taint_inst.lang, e_obj, e.e with - | Lang.Java, `Obj (_, S.Arg _), Fetch { rev_offset = { o = Dot name; _ } :: _; _ } -> - (* Java Function.apply or similar callback invocation methods *) - let method_name = fst name.ident in - method_name = "apply" || method_name = "accept" || method_name = "test" || method_name = "get" - | Lang.Ruby, `Obj (_, S.Arg _), Fetch { rev_offset = { o = Dot name; _ } :: _; _ } -> - (* Ruby proc/lambda.call invocation *) - let method_name = fst name.ident in - method_name = "call" + (* Check if this is a method call on a callback parameter + * via a configured invoke method (e.g. .apply, .call, .run). *) + match e_obj, e.e with + | `Obj (_, S.Arg _), Fetch { rev_offset = { o = Dot name; _ } :: _; _ } -> + let invoke_methods = (Lang_config.get env.taint_inst.lang).invoke_methods in + List.mem (fst name.ident) invoke_methods | _ -> false in let callee_is_callback = @@ -2574,20 +2609,10 @@ let check_tainted_instr env instr : Taints.t * S.shape * Lval_env.t = (* Check if this is a call to a function parameter (either direct or via method) *) (match e_obj with | `Obj (_obj_taints, S.Arg _fun_arg) -> - (* This is a method call on a function parameter (e.g., callback.apply in Java). - * Treat it as invoking the callback. - * EXCEPTION: Ruby's .call method should NOT be treated this way during signature - * extraction, as it creates infinite recursion. Ruby blocks are handled via - * implicit lambda detection instead. *) - let is_ruby_call_method = - match (e.e, env.taint_inst.lang) with - | Fetch { base = _; rev_offset = [{ o = Dot method_name; _ }] }, lang - when Lang.(lang =*= Ruby) && fst method_name.ident = "call" -> true - | _ -> false - in - if not is_ruby_call_method then - effects_of_call_func_arg e (match e_obj with `Obj (_, shape) -> shape | `Fun -> e_shape) args_taints - |> record_effects { env with lval_env } + (* This is a method call on a function parameter (e.g., callback.apply in Java, + * callback.call in Ruby). Treat it as invoking the callback. *) + effects_of_call_func_arg e (match e_obj with `Obj (_, shape) -> shape | `Fun -> e_shape) args_taints + |> record_effects { env with lval_env } | _ -> effects_of_call_func_arg e e_shape args_taints |> record_effects { env with lval_env }); @@ -3152,13 +3177,17 @@ and (fixpoint : | None -> in_env else in_env in - (* Extract signatures for all lambdas in the function for HOF support *) + (* Extract signatures for all lambdas in the function for HOF support. + We collect ALL lambdas (including nested ones) in innermost-first order, + so nested lambda signatures are available when processing their parents. *) let signature_db_with_lambdas = if taint_inst.options.taint_intrafile then match signature_db with | Some db -> - IL.NameMap.fold - (fun lambda_name lambda_cfg acc_db -> + (* Collect all lambdas recursively, innermost first *) + let all_lambdas_list = collect_all_lambdas_innermost_first fun_cfg in + List.fold_left + (fun acc_db (lambda_name, lambda_cfg) -> try Log.debug (fun m -> m "Extracting signature for lambda %s" @@ -3255,7 +3284,7 @@ and (fixpoint : (IL.str_of_name lambda_name) (Printexc.to_string e)); acc_db) - fun_cfg.lambdas db + db all_lambdas_list |> Option.some | None -> signature_db else signature_db diff --git a/src/tainting/Graph_from_AST.ml b/src/tainting/Graph_from_AST.ml index 9389f7dd3..7e07c587d 100644 --- a/src/tainting/Graph_from_AST.ml +++ b/src/tainting/Graph_from_AST.ml @@ -565,12 +565,12 @@ let fn_id_of_entity ~(lang : Lang.t) (opt_ent : G.entity option) Some (adjusted_parent_path @ [Some name]) | None -> None) | None -> - (* Anonymous function - use _tmp with fake token to match AST_to_IL behavior. - AST_to_IL.fresh_var creates fake tokens for _tmp variables. *) + (* Anonymous function - use _tmp_lambda with fake token to match AST_to_IL behavior. + AST_to_IL.fresh_var creates fake tokens for lambda variables. *) let tok = match fdef.fkind with (_, tok) -> tok in - let fake_tok = Tok.fake_tok tok "_tmp" in + let fake_tok = Tok.fake_tok tok "_tmp_lambda" in let tmp_name = IL.{ - ident = ("_tmp", fake_tok); + ident = ("_tmp_lambda", fake_tok); sid = G.SId.unsafe_default; id_info = G.empty_id_info (); } in @@ -1133,7 +1133,26 @@ let extract_calls ~(lang : Lang.t) ?(object_mappings = []) ?(all_funcs = []) ?(c with | Some fn_id -> calls := (fn_id, tok) :: !calls - | None -> ()); + | None -> + (* Invoke-method pattern: var.run() where var is a lambda. + If the method name is a configured invoke method, look for + a lambda with the receiver's name in the current scope. *) + let invoke_methods = (Lang_config.get lang).invoke_methods in + (match callee.G.e with + | G.DotAccess ({ e = G.N (G.Id ((var_name, _), _)); _ }, _, + G.FN (G.Id ((method_name, method_tok), _))) + when List.mem method_name invoke_methods -> + let lambda_match = List.find_opt (fun (f : func_info) -> + match List_.init_and_last_opt f.fn_id with + | Some (f_parent, Some name) + when String.equal (fst name.IL.ident) var_name -> + equal_with_pos f_parent caller_parent_path + | _ -> false + ) all_funcs in + (match lambda_match with + | Some f -> calls := (f.fn_id, method_tok) :: !calls + | None -> ()) + | _ -> ())); (* Check arguments for unresolved function calls (Ruby-style) *) List.iter check_arg_for_unresolved_function_call args_list; (* Visit callee expression for nested calls (e.g., Ruby's File.open(path_for(x)) do ... end @@ -1255,18 +1274,21 @@ let extract_callback_from_arg (arg_expr : G.expr) : (IL.name * Tok.t * IL.name o | G.DotAccess (_, _, G.FN (G.Id (id, id_info))) -> let callback_name = AST_to_IL.var_of_id_info id id_info in Some (callback_name, snd id, None) - (* Elixir: &func/n - ShortLambda wrapping a call to the named function. - Structure: OtherExpr("ShortLambda", [Params[&1,...]; S(ExprStmt(Call(func, args)))]) + (* Elixir: &func/n or &Mod.func/n - ShortLambda wrapping a call to the + named (local or remote) function. Structure: + OtherExpr("ShortLambda", [Params[&1,...]; S(ExprStmt(Call(func, args)))]) + where func is either a plain Id or a DotAccess(..., FN(Id)). Create a _tmp node to match what AST_to_IL creates for the anonymous wrapper. *) | G.OtherExpr (("ShortLambda", shortlambda_tok), [G.Params _; G.S { G.s = G.ExprStmt (inner_e, _); _ }]) -> (match inner_e.G.e with - | G.Call ({ e = G.N (G.Id (id, id_info)); _ }, _) -> + | G.Call ({ e = G.N (G.Id (id, id_info)) + | G.DotAccess (_, _, G.FN (G.Id (id, id_info))); _ }, _) -> let callback_name = AST_to_IL.var_of_id_info id id_info in - (* Create _tmp IL.name using Tok.fake_tok like AST_to_IL.fresh_var does *) - let tmp_tok = Tok.fake_tok shortlambda_tok "_tmp" in + (* Create _tmp_lambda IL.name using Tok.fake_tok like AST_to_IL.fresh_var does *) + let tmp_tok = Tok.fake_tok shortlambda_tok "_tmp_lambda" in let tmp_name = IL.{ - ident = ("_tmp", tmp_tok); + ident = ("_tmp_lambda", tmp_tok); sid = G.SId.unsafe_default; id_info = G.empty_id_info (); } in @@ -1439,6 +1461,18 @@ let extract_hof_callbacks ?(_object_mappings = []) ?(all_funcs = []) inherit [_] G.iter as super method! visit_expr env e = (match e.G.e with + (* Ruby/Scala block pattern: f(args) { block } is Call(Call(callee, inner_args), [block]). + Merge inner_args and block args so the HOF detection sees all arguments together. *) + | G.Call ({ e = G.Call (callee, inner_args); _ }, + (_, ([ G.Arg { G.e = G.Lambda _; _ } ] as outer_arg), _)) + when Lang.(lang =*= Ruby || lang =*= Scala) -> + let merged_args = Tok.unsafe_fake_bracket + (Tok.unbracket inner_args @ outer_arg) in + let found = extract_hof_callbacks_from_call + ~method_hofs ~function_hofs ~all_funcs ~caller_parent_path + callee merged_args + in + callbacks := found @ !callbacks | G.Call (callee, args) -> let found = extract_hof_callbacks_from_call ~method_hofs ~function_hofs ~all_funcs ~imported_entity_index diff --git a/src/tainting/Lang_config.ml b/src/tainting/Lang_config.ml index f7a2bffd3..91f617921 100644 --- a/src/tainting/Lang_config.ml +++ b/src/tainting/Lang_config.ml @@ -52,6 +52,10 @@ type t = { collection_configs : collection_model_kind list; constructor_names : string list; uses_new_keyword : bool; + (* Methods that invoke `self` as a function. E.g. Runnable.run() in Java, + Proc#call in Ruby. When a variable with Fun shape is the receiver of one + of these methods, the call is treated as a direct lambda invocation. *) + invoke_methods : string list; } (* ========================================================================== *) @@ -63,6 +67,7 @@ let empty = { collection_configs = []; constructor_names = []; uses_new_keyword = false; + invoke_methods = []; } let python = { @@ -85,6 +90,7 @@ let python = { ]; constructor_names = ["__init__"]; uses_new_keyword = false; + invoke_methods = []; } let ruby = { @@ -113,6 +119,7 @@ let ruby = { ]; constructor_names = ["initialize"]; uses_new_keyword = false; + invoke_methods = ["call"]; } let javascript = { @@ -153,6 +160,7 @@ let javascript = { ]; constructor_names = ["constructor"]; uses_new_keyword = true; + invoke_methods = []; } let typescript = { @@ -190,6 +198,7 @@ let java = { ]; constructor_names = [""]; uses_new_keyword = true; + invoke_methods = ["run"; "call"; "apply"; "accept"; "invoke"]; } let kotlin = { @@ -222,6 +231,7 @@ let kotlin = { ]; constructor_names = [""; "init"; "constructor"]; uses_new_keyword = false; + invoke_methods = ["invoke"]; } let scala = { @@ -245,6 +255,7 @@ let scala = { ]; constructor_names = [""]; uses_new_keyword = false; + invoke_methods = []; } let csharp = { @@ -270,6 +281,7 @@ let csharp = { ]; constructor_names = [".ctor"]; uses_new_keyword = true; + invoke_methods = ["Invoke"]; } let go = { @@ -281,6 +293,7 @@ let go = { ]; constructor_names = []; uses_new_keyword = false; + invoke_methods = []; } let rust = { @@ -304,6 +317,7 @@ let rust = { ]; constructor_names = ["new"]; uses_new_keyword = false; + invoke_methods = []; } let swift = { @@ -328,6 +342,7 @@ let swift = { ]; constructor_names = ["init"]; uses_new_keyword = false; + invoke_methods = []; } let php = { @@ -338,6 +353,7 @@ let php = { collection_configs = []; (* PHP collections are mostly handled via builtin functions *) constructor_names = ["__construct"]; uses_new_keyword = true; + invoke_methods = []; } let cpp = { @@ -348,6 +364,7 @@ let cpp = { collection_configs = []; constructor_names = []; uses_new_keyword = false; + invoke_methods = []; } let c = { @@ -360,6 +377,7 @@ let ocaml_lang = { collection_configs = []; constructor_names = []; uses_new_keyword = false; + invoke_methods = []; } let lua = { @@ -367,6 +385,7 @@ let lua = { collection_configs = []; constructor_names = []; uses_new_keyword = false; + invoke_methods = []; } let dart = { @@ -374,6 +393,7 @@ let dart = { collection_configs = []; constructor_names = ["constructor"]; uses_new_keyword = false; + invoke_methods = []; } let elixir = { @@ -389,6 +409,7 @@ let elixir = { collection_configs = []; constructor_names = []; uses_new_keyword = false; + invoke_methods = []; } let julia = { @@ -398,6 +419,7 @@ let julia = { collection_configs = []; constructor_names = []; uses_new_keyword = false; + invoke_methods = []; } let clojure = { @@ -419,6 +441,7 @@ let clojure = { collection_configs = []; constructor_names = []; uses_new_keyword = false; + invoke_methods = []; } let apex = { @@ -426,6 +449,7 @@ let apex = { collection_configs = []; constructor_names = [""]; uses_new_keyword = true; + invoke_methods = []; } let vb = { @@ -433,6 +457,7 @@ let vb = { collection_configs = []; constructor_names = ["New"]; uses_new_keyword = true; + invoke_methods = []; } (* ========================================================================== *) diff --git a/tests/parsing/clojure/map_destructuring_string_keys.clj b/tests/parsing/clojure/map_destructuring_string_keys.clj new file mode 100644 index 000000000..03900f43e --- /dev/null +++ b/tests/parsing/clojure/map_destructuring_string_keys.clj @@ -0,0 +1,11 @@ +;; Parsing test: map destructuring with string keys. + +(defn f [{x "a"}] x) + +(defn g [{x "a" y "b"}] [x y]) + +(defn h [{x :kw y "str"}] [x y]) + +(let [{x "a"} {"a" 1}] x) + +(defn i [{x "a" :as opts :or {x 0}}] [x opts]) diff --git a/tests/rules/cross_function_tainting/test_hof_callback_taint_ruby.rb b/tests/rules/cross_function_tainting/test_hof_callback_taint_ruby.rb index a68e65a8e..158c8c7d3 100644 --- a/tests/rules/cross_function_tainting/test_hof_callback_taint_ruby.rb +++ b/tests/rules/cross_function_tainting/test_hof_callback_taint_ruby.rb @@ -22,13 +22,12 @@ def app_with_direct_flow(f, x) # === Callback-only HOF tests === def test_callback_only_propagating_lambda() - # todoruleid: test-hof-callback-taint - # TODO: Ruby lambda return propagation is still not modeled reliably here. + # ruleid: test-hof-callback-taint sink(app_callback_only(->(x) { x }, source())) end -# NOTE: Ruby callback-only sanitizing lambdas are still overtainted. -# Keep this disabled until callback return sanitization is implemented. +# NOTE: Ruby lambda callbacks not yet working +# This test would pass for wrong reason - skipping until callbacks work # def test_callback_only_sanitizing_lambda() # # ok: test-hof-callback-taint # sink(app_callback_only(->(x) { "3" }, source())) @@ -37,14 +36,12 @@ def test_callback_only_propagating_lambda() # === Direct flow HOF tests (taint always flows via + x) === def test_direct_flow_propagating_lambda() - # todoruleid: test-hof-callback-taint - # TODO: Ruby lambda callback invocation is still not modeled reliably here. + # ruleid: test-hof-callback-taint sink(app_with_direct_flow(->(x) { x }, source())) end def test_direct_flow_sanitizing_lambda() - # todoruleid: test-hof-callback-taint - # TODO: Ruby lambda callback invocation is still not modeled reliably here. + # ruleid: test-hof-callback-taint sink(app_with_direct_flow(->(x) { "3" }, source())) end diff --git a/tests/rules/cross_function_tainting/test_hof_comprehensive_elixir.ex b/tests/rules/cross_function_tainting/test_hof_comprehensive_elixir.ex index 40ce3f911..bde811e49 100644 --- a/tests/rules/cross_function_tainting/test_hof_comprehensive_elixir.ex +++ b/tests/rules/cross_function_tainting/test_hof_comprehensive_elixir.ex @@ -248,6 +248,27 @@ defmodule TestHOF do # Top-level user-defined HOF custom_for_each(toplevel_items, &toplevel_handler/1) end + + # Remote-capture short lambdas `&Mod.fun/arity`: the left of `/` is a + # dot expression, exercising the ShortLambda conversion path for + # remote dots (FieldAccess) -- local captures `&fn/arity` do not. + def test_remote_capture_builtin() do + arr = [source()] + mapped = Enum.map(arr, &RemoteHelper.process_remote/1) + end + + def test_remote_capture_custom() do + arr = [source()] + mapped = custom_map_builtin(arr, &RemoteHelper.process_remote/1) + end +end + +defmodule RemoteHelper do + def process_remote(x) do + # ruleid: test-hof-taint + sink(x) + x + end end def toplevel_handler(x) do diff --git a/tests/rules/cross_function_tainting/test_invoke_methods_csharp.cs b/tests/rules/cross_function_tainting/test_invoke_methods_csharp.cs new file mode 100644 index 000000000..2aa85eb0a --- /dev/null +++ b/tests/rules/cross_function_tainting/test_invoke_methods_csharp.cs @@ -0,0 +1,31 @@ +// C#: nested lambdas invoked via .Invoke() +class TestInvokeMethods { + + static void test_invoke() { + var x = source(); + Action outer = () => { + Action inner = () => { + // ruleid: test-invoke-methods-csharp + sink(x); + }; + inner.Invoke(); + }; + outer.Invoke(); + } + + // Negative: no taint + static void test_no_taint() { + var x = "clean"; + Action outer = () => { + Action inner = () => { + // ok: test-invoke-methods-csharp + sink(x); + }; + inner.Invoke(); + }; + outer.Invoke(); + } + + static string source() { return "tainted"; } + static void sink(string x) {} +} diff --git a/tests/rules/cross_function_tainting/test_invoke_methods_csharp.yaml b/tests/rules/cross_function_tainting/test_invoke_methods_csharp.yaml new file mode 100644 index 000000000..da83a39a0 --- /dev/null +++ b/tests/rules/cross_function_tainting/test_invoke_methods_csharp.yaml @@ -0,0 +1,11 @@ +rules: + - id: test-invoke-methods-csharp + message: Taint flows through lambda invoked via .Invoke() + languages: + - csharp + severity: WARNING + mode: taint + pattern-sources: + - pattern: source(...) + pattern-sinks: + - pattern: sink(...) diff --git a/tests/rules/cross_function_tainting/test_invoke_methods_java.java b/tests/rules/cross_function_tainting/test_invoke_methods_java.java new file mode 100644 index 000000000..a0646b5b4 --- /dev/null +++ b/tests/rules/cross_function_tainting/test_invoke_methods_java.java @@ -0,0 +1,72 @@ +class TestInvokeMethods { + + // Function.apply: nested lambdas invoked via apply + static void test_apply() { + String x = source(); + Function outer = (a) -> { + Function inner = (b) -> { + // ruleid: test-invoke-methods-java + sink(b); + return b; + }; + return inner.apply(a); + }; + outer.apply(x); + } + + // Consumer.accept: nested lambdas invoked via accept + static void test_accept() { + String x = source(); + Consumer outer = (a) -> { + Consumer inner = (b) -> { + // ruleid: test-invoke-methods-java + sink(b); + }; + inner.accept(a); + }; + outer.accept(x); + } + + // Runnable.run: nested lambdas capturing tainted variable + static void test_run() { + String x = source(); + Runnable outer = () -> { + Runnable inner = () -> { + // ruleid: test-invoke-methods-java + sink(x); + }; + inner.run(); + }; + outer.run(); + } + + // Callable.call: nested lambdas capturing tainted variable + static void test_call() { + String x = source(); + Callable outer = () -> { + Callable inner = () -> { + // ruleid: test-invoke-methods-java + sink(x); + return x; + }; + return inner.call(); + }; + outer.call(); + } + + // Negative: no taint source, nested + static void test_no_taint() { + String x = "clean"; + Runnable outer = () -> { + Runnable inner = () -> { + // ok: test-invoke-methods-java + sink(x); + }; + inner.run(); + }; + outer.run(); + } + + static String source() { return "tainted"; } + static void sink(String x) {} +} diff --git a/tests/rules/cross_function_tainting/test_invoke_methods_java.yaml b/tests/rules/cross_function_tainting/test_invoke_methods_java.yaml new file mode 100644 index 000000000..e5e81a6ec --- /dev/null +++ b/tests/rules/cross_function_tainting/test_invoke_methods_java.yaml @@ -0,0 +1,11 @@ +rules: + - id: test-invoke-methods-java + message: Taint flows through lambda invoked via functional interface method + languages: + - java + severity: WARNING + mode: taint + pattern-sources: + - pattern: source(...) + pattern-sinks: + - pattern: sink(...) diff --git a/tests/rules/cross_function_tainting/test_invoke_methods_kotlin.kt b/tests/rules/cross_function_tainting/test_invoke_methods_kotlin.kt new file mode 100644 index 000000000..3668a55e8 --- /dev/null +++ b/tests/rules/cross_function_tainting/test_invoke_methods_kotlin.kt @@ -0,0 +1,28 @@ +// Kotlin: nested lambdas invoked via .invoke() +fun test_invoke() { + val x = source() + val outer: () -> Unit = { + val inner: () -> Unit = { + // ruleid: test-invoke-methods-kotlin + sink(x) + } + inner.invoke() + } + outer.invoke() +} + +// Negative: no taint +fun test_no_taint() { + val x = "clean" + val outer: () -> Unit = { + val inner: () -> Unit = { + // ok: test-invoke-methods-kotlin + sink(x) + } + inner.invoke() + } + outer.invoke() +} + +fun source(): String = "tainted" +fun sink(x: String) {} diff --git a/tests/rules/cross_function_tainting/test_invoke_methods_kotlin.yaml b/tests/rules/cross_function_tainting/test_invoke_methods_kotlin.yaml new file mode 100644 index 000000000..498f0d30d --- /dev/null +++ b/tests/rules/cross_function_tainting/test_invoke_methods_kotlin.yaml @@ -0,0 +1,11 @@ +rules: + - id: test-invoke-methods-kotlin + message: Taint flows through lambda invoked via .invoke() + languages: + - kotlin + severity: WARNING + mode: taint + pattern-sources: + - pattern: source() + pattern-sinks: + - pattern: sink($X) diff --git a/tests/rules/cross_function_tainting/test_invoke_methods_ruby.rb b/tests/rules/cross_function_tainting/test_invoke_methods_ruby.rb new file mode 100644 index 000000000..cc0c051a4 --- /dev/null +++ b/tests/rules/cross_function_tainting/test_invoke_methods_ruby.rb @@ -0,0 +1,32 @@ +# Ruby: nested lambdas invoked via .call() +def test_call() + x = source() + outer = ->(a) { + inner = ->(b) { + # ruleid: test-invoke-methods-ruby + sink(b) + } + inner.call(a) + } + outer.call(x) +end + +# Negative: no taint +def test_no_taint() + x = "clean" + outer = ->() { + inner = ->() { + # ok: test-invoke-methods-ruby + sink(x) + } + inner.call() + } + outer.call() +end + +def source() + "tainted" +end + +def sink(x) +end diff --git a/tests/rules/cross_function_tainting/test_invoke_methods_ruby.yaml b/tests/rules/cross_function_tainting/test_invoke_methods_ruby.yaml new file mode 100644 index 000000000..396ac8f67 --- /dev/null +++ b/tests/rules/cross_function_tainting/test_invoke_methods_ruby.yaml @@ -0,0 +1,11 @@ +rules: + - id: test-invoke-methods-ruby + message: Taint flows through lambda invoked via .call() + languages: + - ruby + severity: WARNING + mode: taint + pattern-sources: + - pattern: source(...) + pattern-sinks: + - pattern: sink(...) diff --git a/tests/rules/cross_function_tainting/test_lambda_deeply_nested.go b/tests/rules/cross_function_tainting/test_lambda_deeply_nested.go new file mode 100644 index 000000000..ebf8f12fd --- /dev/null +++ b/tests/rules/cross_function_tainting/test_lambda_deeply_nested.go @@ -0,0 +1,37 @@ +package main + +// Test: Deeply nested lambdas (3 levels) +func test4() { + x := source() + level1 := func() { + level2 := func() { + level3 := func() { + // ruleid: test-lambda-deeply-nested + sink(x) + } + level3() + } + level2() + } + level1() +} + +// Test: Deeply nested lambdas split across functions +func test4_level1(x string) { + level2 := func() { + level3 := func() { + // ruleid: test-lambda-deeply-nested + sink(x) + } + level3() + } + level2() +} + +func test4_caller() { + x := source() + test4_level1(x) +} + +func source() string { return "tainted" } +func sink(s string) {} diff --git a/tests/rules/cross_function_tainting/test_lambda_deeply_nested.yaml b/tests/rules/cross_function_tainting/test_lambda_deeply_nested.yaml new file mode 100644 index 000000000..7ab092a0c --- /dev/null +++ b/tests/rules/cross_function_tainting/test_lambda_deeply_nested.yaml @@ -0,0 +1,11 @@ +rules: + - id: test-lambda-deeply-nested + message: Tainted data flows to sink through deeply nested lambdas + languages: + - go + severity: WARNING + mode: taint + pattern-sources: + - pattern: source() + pattern-sinks: + - pattern: sink($X) diff --git a/tests/rules/cross_function_tainting/test_lambda_deeply_nested_java.java b/tests/rules/cross_function_tainting/test_lambda_deeply_nested_java.java new file mode 100644 index 000000000..c5d733cab --- /dev/null +++ b/tests/rules/cross_function_tainting/test_lambda_deeply_nested_java.java @@ -0,0 +1,38 @@ +// Test: Deeply nested lambdas (3 levels) +class TestLambdaDeeplyNested { + + static void test4() { + String x = source(); + Runnable level1 = () -> { + Runnable level2 = () -> { + Runnable level3 = () -> { + // ruleid: test-lambda-deeply-nested-java + sink(x); + }; + level3.run(); + }; + level2.run(); + }; + level1.run(); + } + + // Test: Deeply nested lambdas split across functions + static void test4_level1(String x) { + Runnable level2 = () -> { + Runnable level3 = () -> { + // ruleid: test-lambda-deeply-nested-java + sink(x); + }; + level3.run(); + }; + level2.run(); + } + + static void test4_caller() { + String x = source(); + test4_level1(x); + } + + static String source() { return "tainted"; } + static void sink(String x) {} +} diff --git a/tests/rules/cross_function_tainting/test_lambda_deeply_nested_java.yaml b/tests/rules/cross_function_tainting/test_lambda_deeply_nested_java.yaml new file mode 100644 index 000000000..8a8e8526d --- /dev/null +++ b/tests/rules/cross_function_tainting/test_lambda_deeply_nested_java.yaml @@ -0,0 +1,11 @@ +rules: + - id: test-lambda-deeply-nested-java + message: Tainted data flows to sink through deeply nested lambdas + languages: + - java + severity: WARNING + mode: taint + pattern-sources: + - pattern: source(...) + pattern-sinks: + - pattern: sink(...) diff --git a/tests/rules/cross_function_tainting/test_lambda_deeply_nested_js.js b/tests/rules/cross_function_tainting/test_lambda_deeply_nested_js.js new file mode 100644 index 000000000..df5fe41f8 --- /dev/null +++ b/tests/rules/cross_function_tainting/test_lambda_deeply_nested_js.js @@ -0,0 +1,35 @@ +// Test: Deeply nested lambdas (3 levels) +function test4() { + let x = source(); + let level1 = () => { + let level2 = () => { + let level3 = () => { + // ruleid: test-lambda-deeply-nested-js + sink(x); + }; + level3(); + }; + level2(); + }; + level1(); +} + +// Test: Deeply nested lambdas split across functions +function test4_level1(x) { + let level2 = () => { + let level3 = () => { + // ruleid: test-lambda-deeply-nested-js + sink(x); + }; + level3(); + }; + level2(); +} + +function test4_caller() { + let x = source(); + test4_level1(x); +} + +function source() { return "tainted"; } +function sink(x) {} diff --git a/tests/rules/cross_function_tainting/test_lambda_deeply_nested_js.yaml b/tests/rules/cross_function_tainting/test_lambda_deeply_nested_js.yaml new file mode 100644 index 000000000..8f30e926c --- /dev/null +++ b/tests/rules/cross_function_tainting/test_lambda_deeply_nested_js.yaml @@ -0,0 +1,11 @@ +rules: + - id: test-lambda-deeply-nested-js + message: Tainted data flows to sink through deeply nested lambdas + languages: + - javascript + severity: WARNING + mode: taint + pattern-sources: + - pattern: source() + pattern-sinks: + - pattern: sink($X) diff --git a/tests/rules/cross_function_tainting/test_lambda_deeply_nested_php.php b/tests/rules/cross_function_tainting/test_lambda_deeply_nested_php.php new file mode 100644 index 000000000..3a3fb6b96 --- /dev/null +++ b/tests/rules/cross_function_tainting/test_lambda_deeply_nested_php.php @@ -0,0 +1,37 @@ + String { String::from("tainted") } +fn sink(_s: &String) {} diff --git a/tests/rules/cross_function_tainting/test_lambda_deeply_nested_rust.yaml b/tests/rules/cross_function_tainting/test_lambda_deeply_nested_rust.yaml new file mode 100644 index 000000000..51e71027a --- /dev/null +++ b/tests/rules/cross_function_tainting/test_lambda_deeply_nested_rust.yaml @@ -0,0 +1,11 @@ +rules: + - id: test-lambda-deeply-nested-rust + message: Tainted data flows to sink through deeply nested lambdas + languages: + - rust + severity: WARNING + mode: taint + pattern-sources: + - pattern: source() + pattern-sinks: + - pattern: sink($X) diff --git a/tests/rules/cross_function_tainting/test_lambda_in_object_literal.js b/tests/rules/cross_function_tainting/test_lambda_in_object_literal.js new file mode 100644 index 000000000..9268b90b0 --- /dev/null +++ b/tests/rules/cross_function_tainting/test_lambda_in_object_literal.js @@ -0,0 +1,15 @@ +// Lambda assigned as a property of an object literal, then called via +// unresolved property access. The parameter `data` matches the source +// pattern (concrete source), so `sink(data)` should fire regardless of +// whether the call graph can resolve `x.success(a)` back to the lambda. + +function test1(a) { + var x = { + url: '/api/settings', + success: function(data) { + // ruleid: taint-func-param + sink(data); + } + }; + x.success(a); +} diff --git a/tests/rules/cross_function_tainting/test_lambda_in_object_literal.yaml b/tests/rules/cross_function_tainting/test_lambda_in_object_literal.yaml new file mode 100644 index 000000000..e88144206 --- /dev/null +++ b/tests/rules/cross_function_tainting/test_lambda_in_object_literal.yaml @@ -0,0 +1,13 @@ +rules: + - id: taint-func-param + message: Tainted parameter reaches sink + languages: [javascript] + severity: ERROR + mode: taint + pattern-sources: + - patterns: + - pattern-inside: | + function $X(..., $RES, ...) {...} + - focus-metavariable: $RES + pattern-sinks: + - pattern: sink(...) diff --git a/tests/rules/cross_function_tainting/test_lambda_nested_captured.go b/tests/rules/cross_function_tainting/test_lambda_nested_captured.go new file mode 100644 index 000000000..b6de184ca --- /dev/null +++ b/tests/rules/cross_function_tainting/test_lambda_nested_captured.go @@ -0,0 +1,16 @@ +package main + +// Test: Nested lambda capturing parent lambda's parameter +func test2() { + outer := func(a string) { + inner := func() { + // ruleid: test-lambda-nested-captured + sink(a) + } + inner() + } + outer(source()) +} + +func source() string { return "tainted" } +func sink(s string) {} diff --git a/tests/rules/cross_function_tainting/test_lambda_nested_captured.yaml b/tests/rules/cross_function_tainting/test_lambda_nested_captured.yaml new file mode 100644 index 000000000..907f95ee4 --- /dev/null +++ b/tests/rules/cross_function_tainting/test_lambda_nested_captured.yaml @@ -0,0 +1,11 @@ +rules: + - id: test-lambda-nested-captured + message: Tainted data flows to sink via captured variable from outer lambda + languages: + - go + severity: WARNING + mode: taint + pattern-sources: + - pattern: source() + pattern-sinks: + - pattern: sink($X) diff --git a/tests/rules/cross_function_tainting/test_lambda_nested_param.go b/tests/rules/cross_function_tainting/test_lambda_nested_param.go new file mode 100644 index 000000000..4f428a8ed --- /dev/null +++ b/tests/rules/cross_function_tainting/test_lambda_nested_param.go @@ -0,0 +1,16 @@ +package main + +// Test: Nested lambda with param at each level +func test5() { + outer := func(a string) { + inner := func(b string) { + // ruleid: test-lambda-nested-param + sink(b) + } + inner(a) + } + outer(source()) +} + +func source() string { return "tainted" } +func sink(s string) {} diff --git a/tests/rules/cross_function_tainting/test_lambda_nested_param.yaml b/tests/rules/cross_function_tainting/test_lambda_nested_param.yaml new file mode 100644 index 000000000..8f88a8d58 --- /dev/null +++ b/tests/rules/cross_function_tainting/test_lambda_nested_param.yaml @@ -0,0 +1,11 @@ +rules: + - id: test-lambda-nested-param + message: Tainted data flows to sink through nested lambda parameters + languages: + - go + severity: WARNING + mode: taint + pattern-sources: + - pattern: source() + pattern-sinks: + - pattern: sink($X) diff --git a/tests/rules/cross_function_tainting/test_lambda_no_taint.go b/tests/rules/cross_function_tainting/test_lambda_no_taint.go new file mode 100644 index 000000000..60c642065 --- /dev/null +++ b/tests/rules/cross_function_tainting/test_lambda_no_taint.go @@ -0,0 +1,14 @@ +package main + +// Test: No taint - should have NO findings +func test6() { + x := "clean" + callback := func() { + // ok: test-lambda-no-taint + sink(x) + } + callback() +} + +func source() string { return "tainted" } +func sink(s string) {} diff --git a/tests/rules/cross_function_tainting/test_lambda_no_taint.yaml b/tests/rules/cross_function_tainting/test_lambda_no_taint.yaml new file mode 100644 index 000000000..7358c0472 --- /dev/null +++ b/tests/rules/cross_function_tainting/test_lambda_no_taint.yaml @@ -0,0 +1,11 @@ +rules: + - id: test-lambda-no-taint + message: Tainted data flows to sink + languages: + - go + severity: WARNING + mode: taint + pattern-sources: + - pattern: source() + pattern-sinks: + - pattern: sink($X) diff --git a/tests/rules/cross_function_tainting/test_lambda_param_flow.go b/tests/rules/cross_function_tainting/test_lambda_param_flow.go new file mode 100644 index 000000000..fed615ee8 --- /dev/null +++ b/tests/rules/cross_function_tainting/test_lambda_param_flow.go @@ -0,0 +1,13 @@ +package main + +// Test: Lambda parameter receives taint at call site +func test3() { + callback := func(x string) { + // ruleid: test-lambda-param-flow + sink(x) + } + callback(source()) +} + +func source() string { return "tainted" } +func sink(s string) {} diff --git a/tests/rules/cross_function_tainting/test_lambda_param_flow.yaml b/tests/rules/cross_function_tainting/test_lambda_param_flow.yaml new file mode 100644 index 000000000..7ca1ba90e --- /dev/null +++ b/tests/rules/cross_function_tainting/test_lambda_param_flow.yaml @@ -0,0 +1,11 @@ +rules: + - id: test-lambda-param-flow + message: Tainted data flows to sink via lambda parameter + languages: + - go + severity: WARNING + mode: taint + pattern-sources: + - pattern: source() + pattern-sinks: + - pattern: sink($X) diff --git a/tests/rules/cross_function_tainting/test_lambda_simple_captured.go b/tests/rules/cross_function_tainting/test_lambda_simple_captured.go new file mode 100644 index 000000000..9f0125f11 --- /dev/null +++ b/tests/rules/cross_function_tainting/test_lambda_simple_captured.go @@ -0,0 +1,14 @@ +package main + +// Test: Simple lambda with captured variable +func test1() { + x := source() + callback := func() { + // ruleid: test-lambda-simple-captured + sink(x) + } + callback() +} + +func source() string { return "tainted" } +func sink(s string) {} diff --git a/tests/rules/cross_function_tainting/test_lambda_simple_captured.yaml b/tests/rules/cross_function_tainting/test_lambda_simple_captured.yaml new file mode 100644 index 000000000..b24076992 --- /dev/null +++ b/tests/rules/cross_function_tainting/test_lambda_simple_captured.yaml @@ -0,0 +1,11 @@ +rules: + - id: test-lambda-simple-captured + message: Tainted data flows to sink via captured variable in lambda + languages: + - go + severity: WARNING + mode: taint + pattern-sources: + - pattern: source() + pattern-sinks: + - pattern: sink($X) diff --git a/tests/rules/cross_function_tainting/test_ruby_chained_method.rb b/tests/rules/cross_function_tainting/test_ruby_chained_method.rb new file mode 100644 index 000000000..8ca291a22 --- /dev/null +++ b/tests/rules/cross_function_tainting/test_ruby_chained_method.rb @@ -0,0 +1,20 @@ +# Test that taint flows through chained method calls where the receiver +# is itself a method call: get_data.strip should call get_data() first. + +class Controller + def show + # ruleid: test-ruby-chained-method + sink(get_data.strip) + end + + def get_data + source() + end +end + +def source() + "tainted" +end + +def sink(x) +end diff --git a/tests/rules/cross_function_tainting/test_ruby_chained_method.yaml b/tests/rules/cross_function_tainting/test_ruby_chained_method.yaml new file mode 100644 index 000000000..56b1e8436 --- /dev/null +++ b/tests/rules/cross_function_tainting/test_ruby_chained_method.yaml @@ -0,0 +1,13 @@ +rules: + - id: test-ruby-chained-method + message: taint through chained method call + languages: + - ruby + severity: WARNING + mode: taint + options: + taint_intrafile: true + pattern-sources: + - pattern: source(...) + pattern-sinks: + - pattern: sink(...) diff --git a/tests/rules/cross_function_tainting/test_same_name_functions.go b/tests/rules/cross_function_tainting/test_same_name_functions.go index 7b9c850d3..875f558cd 100644 --- a/tests/rules/cross_function_tainting/test_same_name_functions.go +++ b/tests/rules/cross_function_tainting/test_same_name_functions.go @@ -11,6 +11,7 @@ func test(input string) { func test(input string) { var fn = func(s string) { // ok: taint-func-param - sink(s) + safe(s) } + fn("") } diff --git a/tests/rules/taint_param_source3.js b/tests/rules/taint_param_source3.js index 39cff30b4..a0e80842b 100644 --- a/tests/rules/taint_param_source3.js +++ b/tests/rules/taint_param_source3.js @@ -4,8 +4,7 @@ function test() { req.on('data', function (chunk) { buf += chunk }); - // todoruleid: test - // TODO: callback writes through captured outer variables are not modeled yet. + // ruleid: test sink(buf); }; } diff --git a/tests/tainting_rules/clojure/taint-propagation.clj b/tests/tainting_rules/clojure/taint-propagation.clj index 72b07b076..6c84449ac 100644 --- a/tests/tainting_rules/clojure/taint-propagation.clj +++ b/tests/tainting_rules/clojure/taint-propagation.clj @@ -101,6 +101,20 @@ ;; ruleid: taint-call (sink y1)) +;; map destructuring with string keys +(defn f [{x "a"}] + ;; ruleid: taint-call + (sink x)) + +(defn f [{x "a" y "b"}] + ;; ruleid: taint-call + (sink y)) + +;; mixed keyword and string keys +(defn f [{x :kw y "str"}] + ;; ruleid: taint-call + (sink y)) + (defn f [{:syms [::x y] :as opts}] (if opts ;; ruleid: taint-call diff --git a/tests/tainting_rules/elixir/taint-field-access.ex b/tests/tainting_rules/elixir/taint-field-access.ex new file mode 100644 index 000000000..37b174775 --- /dev/null +++ b/tests/tainting_rules/elixir/taint-field-access.ex @@ -0,0 +1,42 @@ +defmodule TaintFieldAccess do + # Field access `x.y` (no parens, no args, no do-block) must propagate + # taint even when `taint_assume_safe_functions: true`, because it is + # map/struct field access, not a zero-arity function call. + def field(upload) do + # ruleid: taint-field-access + sink(upload.path) + end + + # Chained field access. + def chained(conn) do + # ruleid: taint-field-access + sink(conn.assigns.current_user) + end + + # Zero-arity remote call `x.y()` IS a function call; under + # `taint_assume_safe_functions: true` the taint is dropped. + def zero_arity_call(upload) do + # ok: taint-field-access + sink(upload.path()) + end + + # Remote call with args — also a call, taint dropped. + def call_with_args(upload) do + # ok: taint-field-access + sink(upload.compute(1, 2)) + end + + # Remote call without parens but with args — still a call. + def call_no_parens_args(upload) do + # ok: taint-field-access + sink(upload.compute 1, 2) + end + + # Remote call with do-block — a call. + def call_do_block(upload) do + # ok: taint-field-access + sink(upload.with_block do + :ok + end) + end +end diff --git a/tests/tainting_rules/elixir/taint-field-access.yaml b/tests/tainting_rules/elixir/taint-field-access.yaml new file mode 100644 index 000000000..ac5375bea --- /dev/null +++ b/tests/tainting_rules/elixir/taint-field-access.yaml @@ -0,0 +1,17 @@ +rules: +- id: taint-field-access + mode: taint + languages: [elixir] + message: "tainted field access reaches sink" + severity: INFO + options: + taint_assume_safe_functions: true + pattern-sources: + - patterns: + - pattern-inside: | + def $_(..., $P, ...) do + ... + end + - focus-metavariable: $P + pattern-sinks: + - pattern: sink(...) diff --git a/tests/tainting_rules/elixir/taint-pin-pattern.ex b/tests/tainting_rules/elixir/taint-pin-pattern.ex new file mode 100644 index 000000000..6c491d631 --- /dev/null +++ b/tests/tainting_rules/elixir/taint-pin-pattern.ex @@ -0,0 +1,7 @@ +def foo() do + val x = source() + case foo() do + #ruleid: taint + ^x -> sink(x) + end +end diff --git a/tests/tainting_rules/elixir/taint-pin-pattern.yaml b/tests/tainting_rules/elixir/taint-pin-pattern.yaml new file mode 100644 index 000000000..0757e34dd --- /dev/null +++ b/tests/tainting_rules/elixir/taint-pin-pattern.yaml @@ -0,0 +1,12 @@ +rules: +- id: taint + languages: [elixir] + message: "tainted data reached sink" + severity: ERROR + mode: taint + pattern-sources: + - pattern: | + source() + pattern-sinks: + - pattern: | + sink(...) diff --git a/tests/tainting_rules/elixir/taint-string-concat-pattern.ex b/tests/tainting_rules/elixir/taint-string-concat-pattern.ex new file mode 100644 index 000000000..749e1c572 --- /dev/null +++ b/tests/tainting_rules/elixir/taint-string-concat-pattern.ex @@ -0,0 +1,4 @@ +def foo("a" <> x) do + #ruleid: taint + sink(x) +end diff --git a/tests/tainting_rules/elixir/taint-string-concat-pattern.yaml b/tests/tainting_rules/elixir/taint-string-concat-pattern.yaml new file mode 100644 index 000000000..58eb1c4c6 --- /dev/null +++ b/tests/tainting_rules/elixir/taint-string-concat-pattern.yaml @@ -0,0 +1,17 @@ +rules: +- id: taint + languages: [elixir] + message: "tainted data reached sink" + severity: ERROR + mode: taint + pattern-sources: + - patterns: + - pattern-either: + - pattern-inside: | + def $_(..., $X, ...) do + ... + end + - focus-metavariable: $X + pattern-sinks: + - pattern: | + sink(...)