diff --git a/scripts/version_scanner/tests/unit/test_version_scanner.py b/scripts/version_scanner/tests/unit/test_version_scanner.py index f5a909e849e8..f887dfc12fd4 100644 --- a/scripts/version_scanner/tests/unit/test_version_scanner.py +++ b/scripts/version_scanner/tests/unit/test_version_scanner.py @@ -151,7 +151,9 @@ def test_write_csv_report(tmp_path): "rule_name": "python_requires_check", "line_number": 1, "matched_string": "python_requires = '>=3.7'", - "context_line": "python_requires = '>=3.7'" + "context_line": "python_requires = '>=3.7'", + "dependency": "python", + "version": "3.7" } ] @@ -164,11 +166,14 @@ def test_write_csv_report(tmp_path): rows = list(reader) assert len(rows) == 1 + assert rows[0]["file_name"] == "setup.py" assert rows[0]["file_path"] == "./setup.py" assert rows[0]["rule_name"] == "python_requires_check" assert rows[0]["line_number"] == "1" assert rows[0]["matched_string"] == "python_requires = '>=3.7'" assert rows[0]["context_line"] == "python_requires = '>=3.7'" + assert rows[0]["dependency"] == "python" + assert rows[0]["version"] == "3.7" def test_load_config(tmp_path): @@ -227,7 +232,6 @@ def test_main_package_file_permission_error(tmp_path, capsys): package_file = tmp_path / "packages.txt" package_file.write_text("packages/pkg_a") - import sys test_args = ["version_scanner.py", "-d", "python", "-v", "3.7", "--package-file", str(package_file)] real_open = open @@ -246,7 +250,6 @@ def side_effect(file, *args, **kwargs): captured = capsys.readouterr() assert "Error: Permission denied reading package file" in captured.err def test_main_package_file_not_found(capsys): - import sys test_args = ["version_scanner.py", "-d", "python", "-v", "3.7", "--package-file", "non_existent_file.txt"] with patch("sys.argv", test_args): @@ -323,7 +326,6 @@ def test_main_loads_ignore_from_script_dir(mock_scan, mock_load_ignore): mock_load_ignore.return_value = [] mock_scan.return_value = [] - import sys test_args = ["version_scanner.py", "-d", "python", "-v", "3.7"] with mock.patch('sys.argv', test_args): @@ -339,7 +341,8 @@ def test_main_loads_ignore_from_script_dir(mock_scan, mock_load_ignore): try: - import googleapiclient + # Ruff linter F401: Imported solely to detect Google API Client library presence for test skipping + import googleapiclient # noqa: F401 HAS_GOOGLE_API = True except ImportError: HAS_GOOGLE_API = False @@ -392,7 +395,7 @@ def test_upload_to_drive(mock_auth, mock_build): body = kwargs.get('body', {}) values = body.get('values', []) assert len(values) > 1 - assert "HYPERLINK" in values[1][3] # line_number is at index 3 + assert "HYPERLINK" in values[1][6] # line_number is at index 6 def test_regex_examples_from_config(): @@ -638,39 +641,67 @@ def test_format_for_raw_csv_handles_empty_line_number(): def test_format_for_raw_csv(): match = { + "file_name": "setup.py", "file_path": "google-cloud-python/main/packages/pkg_a/setup.py", "repo_path": "packages/pkg_a/setup.py", "package_name": "pkg_a", "rule_name": "python_requires_check", "line_number": "123", "matched_string": "3.7", - "context_line": "python_requires = '>=3.7'" + "context_line": "python_requires = '>=3.7'", + "dependency": "python", + "version": "3.7" } formatted = format_for_raw_csv(match) + assert formatted["file_name"] == "setup.py" assert formatted["file_path"] == "google-cloud-python/main/packages/pkg_a/setup.py" assert formatted["package_name"] == "pkg_a" assert formatted["rule_name"] == "python_requires_check" assert formatted["line_number"] == 123 # Int conversion assert formatted["matched_string"] == "3.7" # No formula wrapping assert formatted["context_line"] == "python_requires = '>=3.7'" + assert formatted["dependency"] == "python" + assert formatted["version"] == "3.7" + +def test_format_for_raw_csv_fallback_filename(): + match = { + "file_path": "google-cloud-python/main/packages/pkg_a/setup.py", + "repo_path": "packages/pkg_a/setup.py", + "package_name": "pkg_a", + "rule_name": "python_requires_check", + "line_number": "123", + "matched_string": "3.7", + "context_line": "python_requires = '>=3.7'", + "dependency": "python", + "version": "3.7" + } + + formatted = format_for_raw_csv(match) + assert formatted["file_name"] == "setup.py" def test_format_for_spreadsheet(): match = { + "file_name": "setup.py", "file_path": "google-cloud-python/main/packages/pkg_a/setup.py", "repo_path": "packages/pkg_a/setup.py", "package_name": "pkg_a", "rule_name": "python_requires_check", "line_number": 123, "matched_string": "3.7", - "context_line": "python_requires = '>=3.7'" + "context_line": "python_requires = '>=3.7'", + "dependency": "python", + "version": "3.7" } # Without github_repo formatted_no_repo = format_for_spreadsheet(match) + assert formatted_no_repo["file_name"] == "setup.py" assert formatted_no_repo["line_number"] == 123 assert formatted_no_repo["matched_string"] == '="3.7"' # Decimal protection formula + assert formatted_no_repo["dependency"] == "python" + assert formatted_no_repo["version"] == "3.7" # With github_repo formatted_repo = format_for_spreadsheet(match, github_repo="https://github.com/user/repo", branch="main") diff --git a/scripts/version_scanner/version_scanner.py b/scripts/version_scanner/version_scanner.py index 90234a967665..484a6eacacae 100644 --- a/scripts/version_scanner/version_scanner.py +++ b/scripts/version_scanner/version_scanner.py @@ -146,7 +146,7 @@ def load_config(self) -> List[Dict[str, str]]: return resolved_rules -def scan_file(file_path: str, compiled_rules: List[Dict[str, re.Pattern]]) -> List[Dict[str, str]]: +def scan_file(file_path: str, compiled_rules: List[Dict[str, re.Pattern]]) -> List[Dict[str, Any]]: """ Scan a single file for matching patterns. @@ -239,23 +239,29 @@ def _safe_int(value: Any, default: int = 0) -> int: return default -def format_for_raw_csv(match: Dict[str, str]) -> Dict[str, str]: +def format_for_raw_csv(match: Dict[str, Any]) -> Dict[str, Any]: """Prepares a full raw dataset (n + x columns) with clean text values.""" + file_name = match.get("file_name") + if not file_name and match.get("file_path"): + file_name = os.path.basename(match.get("file_path")) return { + "file_name": file_name or "", "file_path": match.get("file_path", ""), "package_name": match.get("package_name", ""), "rule_name": match.get("rule_name", ""), "line_number": _safe_int(match.get("line_number")), "matched_string": match.get("matched_string", ""), - "context_line": _truncate_context(match.get("context_line", ""), match.get("matched_string", "")) + "context_line": _truncate_context(match.get("context_line", ""), match.get("matched_string", "")), + "dependency": match.get("dependency", ""), + "version": match.get("version", "") } def format_for_spreadsheet( - match: Dict[str, str], + match: Dict[str, Any], github_repo: str = None, branch: str = "main" -) -> Dict[str, str]: +) -> Dict[str, Any]: """Builds on top of raw CSV but applies Sheets-specific formulas.""" formatted = format_for_raw_csv(match) @@ -270,7 +276,7 @@ def format_for_spreadsheet( return formatted -def format_for_console(match: Dict[str, str]) -> str: +def format_for_console(match: Dict[str, Any]) -> str: """Prepares a slim, readable string representation (n columns) for stdout/logs.""" file_path = match.get("file_path", "") line_number = match.get("line_number", "") @@ -280,7 +286,7 @@ def format_for_console(match: Dict[str, str]) -> str: -def get_match_counts(matches: List[Dict[str, str]]) -> Tuple[Dict[str, int], Dict[str, int]]: +def get_match_counts(matches: List[Dict[str, Any]]) -> Tuple[Dict[str, int], Dict[str, int]]: """ Aggregate matches by rule and by package. """ @@ -333,7 +339,7 @@ def load_ignore_file(file_path: str) -> List[str]: def write_csv_report( output_path: str, - matches: List[Dict[str, str]] + matches: List[Dict[str, Any]] ) -> None: """ Write the collected matches to a CSV file. @@ -342,7 +348,7 @@ def write_csv_report( output_path: Path to the output CSV file. matches: A list of dictionaries containing match details. """ - fieldnames = ["file_path", "package_name", "rule_name", "line_number", "matched_string", "context_line"] + fieldnames = ["file_name", "file_path", "package_name", "rule_name", "dependency", "version", "line_number", "matched_string", "context_line"] try: with open(output_path, 'w', encoding='utf-8', newline='') as f: @@ -360,7 +366,7 @@ def write_csv_report( print(f"Error writing CSV report: {e}", file=sys.stderr) -def upload_to_drive(csv_path: str, matches: List[Dict[str, str]], github_repo: str = None, branch: str = "main") -> str: +def upload_to_drive(csv_path: str, matches: List[Dict[str, Any]], github_repo: str = None, branch: str = "main") -> str: """ Upload matches to a Google Sheet in Drive. """ @@ -391,13 +397,16 @@ def upload_to_drive(csv_path: str, matches: List[Dict[str, str]], github_repo: s spreadsheet_id = spreadsheet.get('spreadsheetId') # Prepare data - values = [["file_path", "package_name", "rule_name", "line_number", "matched_string", "context_line"]] + values = [["file_name", "file_path", "package_name", "rule_name", "dependency", "version", "line_number", "matched_string", "context_line"]] for m in matches: formatted_m = format_for_spreadsheet(m, github_repo=github_repo, branch=branch) values.append([ + formatted_m.get("file_name", ""), formatted_m.get("file_path", ""), formatted_m.get("package_name", ""), formatted_m.get("rule_name", ""), + formatted_m.get("dependency", ""), + formatted_m.get("version", ""), str(formatted_m.get("line_number", "")), formatted_m.get("matched_string", ""), formatted_m.get("context_line", "") @@ -460,7 +469,7 @@ def scan_repository( target_packages: List[str] = None, ignore_dirs: List[str] = None, version_string: str = None -) -> List[Dict[str, str]]: +) -> List[Dict[str, Any]]: """ Scans the repository directory tree applying resolved regex patterns to files. @@ -510,7 +519,6 @@ def scan_repository( files = [f for f in files if f.lower() not in ignore_lower] rel_root = os.path.relpath(root, root_path) - parts = rel_root.split(os.sep) # Layout-agnostic generic subdirectory filtering if target_packages: @@ -559,6 +567,7 @@ def scan_repository( display_path = rel_file_path for m in matches: + m["file_name"] = file m["file_path"] = display_path m["repo_path"] = rel_file_path m["package_name"] = package_name @@ -663,7 +672,7 @@ def main(): print(f"Starting scan for dependency: {args.dependency} version: {args.version}") print(f"Root path: {args.path}") - print(f"Targets to scan:") + print("Targets to scan:") if target_packages: for pkg in target_packages: print(f" - {os.path.join(args.path, pkg)}")