Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 39 additions & 8 deletions scripts/version_scanner/tests/unit/test_version_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,9 @@ def test_write_csv_report(tmp_path):
"rule_name": "python_requires_check",
"line_number": 1,
"matched_string": "python_requires = '>=3.7'",
"context_line": "python_requires = '>=3.7'"
"context_line": "python_requires = '>=3.7'",
"dependency": "python",
"version": "3.7"
}
]

Expand All @@ -164,11 +166,14 @@ def test_write_csv_report(tmp_path):
rows = list(reader)

assert len(rows) == 1
assert rows[0]["file_name"] == "setup.py"
assert rows[0]["file_path"] == "./setup.py"
assert rows[0]["rule_name"] == "python_requires_check"
assert rows[0]["line_number"] == "1"
assert rows[0]["matched_string"] == "python_requires = '>=3.7'"
assert rows[0]["context_line"] == "python_requires = '>=3.7'"
assert rows[0]["dependency"] == "python"
assert rows[0]["version"] == "3.7"


def test_load_config(tmp_path):
Expand Down Expand Up @@ -227,7 +232,6 @@ def test_main_package_file_permission_error(tmp_path, capsys):
package_file = tmp_path / "packages.txt"
package_file.write_text("packages/pkg_a")

import sys
test_args = ["version_scanner.py", "-d", "python", "-v", "3.7", "--package-file", str(package_file)]

real_open = open
Expand All @@ -246,7 +250,6 @@ def side_effect(file, *args, **kwargs):
captured = capsys.readouterr()
assert "Error: Permission denied reading package file" in captured.err
def test_main_package_file_not_found(capsys):
import sys
test_args = ["version_scanner.py", "-d", "python", "-v", "3.7", "--package-file", "non_existent_file.txt"]

with patch("sys.argv", test_args):
Expand Down Expand Up @@ -323,7 +326,6 @@ def test_main_loads_ignore_from_script_dir(mock_scan, mock_load_ignore):
mock_load_ignore.return_value = []
mock_scan.return_value = []

import sys
test_args = ["version_scanner.py", "-d", "python", "-v", "3.7"]

with mock.patch('sys.argv', test_args):
Expand All @@ -339,7 +341,8 @@ def test_main_loads_ignore_from_script_dir(mock_scan, mock_load_ignore):


try:
import googleapiclient
# Ruff linter F401: Imported solely to detect Google API Client library presence for test skipping
import googleapiclient # noqa: F401
HAS_GOOGLE_API = True
except ImportError:
HAS_GOOGLE_API = False
Expand Down Expand Up @@ -392,7 +395,7 @@ def test_upload_to_drive(mock_auth, mock_build):
body = kwargs.get('body', {})
values = body.get('values', [])
assert len(values) > 1
assert "HYPERLINK" in values[1][3] # line_number is at index 3
assert "HYPERLINK" in values[1][6] # line_number is at index 6


def test_regex_examples_from_config():
Expand Down Expand Up @@ -638,39 +641,67 @@ def test_format_for_raw_csv_handles_empty_line_number():

def test_format_for_raw_csv():
match = {
"file_name": "setup.py",
"file_path": "google-cloud-python/main/packages/pkg_a/setup.py",
"repo_path": "packages/pkg_a/setup.py",
"package_name": "pkg_a",
"rule_name": "python_requires_check",
"line_number": "123",
"matched_string": "3.7",
"context_line": "python_requires = '>=3.7'"
"context_line": "python_requires = '>=3.7'",
"dependency": "python",
"version": "3.7"
}

formatted = format_for_raw_csv(match)

assert formatted["file_name"] == "setup.py"
assert formatted["file_path"] == "google-cloud-python/main/packages/pkg_a/setup.py"
assert formatted["package_name"] == "pkg_a"
assert formatted["rule_name"] == "python_requires_check"
assert formatted["line_number"] == 123 # Int conversion
assert formatted["matched_string"] == "3.7" # No formula wrapping
assert formatted["context_line"] == "python_requires = '>=3.7'"
assert formatted["dependency"] == "python"
assert formatted["version"] == "3.7"

def test_format_for_raw_csv_fallback_filename():
match = {
"file_path": "google-cloud-python/main/packages/pkg_a/setup.py",
"repo_path": "packages/pkg_a/setup.py",
"package_name": "pkg_a",
"rule_name": "python_requires_check",
"line_number": "123",
"matched_string": "3.7",
"context_line": "python_requires = '>=3.7'",
"dependency": "python",
"version": "3.7"
}

formatted = format_for_raw_csv(match)
assert formatted["file_name"] == "setup.py"

def test_format_for_spreadsheet():
match = {
"file_name": "setup.py",
"file_path": "google-cloud-python/main/packages/pkg_a/setup.py",
"repo_path": "packages/pkg_a/setup.py",
"package_name": "pkg_a",
"rule_name": "python_requires_check",
"line_number": 123,
"matched_string": "3.7",
"context_line": "python_requires = '>=3.7'"
"context_line": "python_requires = '>=3.7'",
"dependency": "python",
"version": "3.7"
}

# Without github_repo
formatted_no_repo = format_for_spreadsheet(match)
assert formatted_no_repo["file_name"] == "setup.py"
assert formatted_no_repo["line_number"] == 123
assert formatted_no_repo["matched_string"] == '="3.7"' # Decimal protection formula
assert formatted_no_repo["dependency"] == "python"
assert formatted_no_repo["version"] == "3.7"

# With github_repo
formatted_repo = format_for_spreadsheet(match, github_repo="https://github.com/user/repo", branch="main")
Expand Down
37 changes: 23 additions & 14 deletions scripts/version_scanner/version_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def load_config(self) -> List[Dict[str, str]]:

return resolved_rules

def scan_file(file_path: str, compiled_rules: List[Dict[str, re.Pattern]]) -> List[Dict[str, str]]:
def scan_file(file_path: str, compiled_rules: List[Dict[str, re.Pattern]]) -> List[Dict[str, Any]]:
"""
Scan a single file for matching patterns.

Expand Down Expand Up @@ -239,23 +239,29 @@ def _safe_int(value: Any, default: int = 0) -> int:
return default


def format_for_raw_csv(match: Dict[str, str]) -> Dict[str, str]:
def format_for_raw_csv(match: Dict[str, Any]) -> Dict[str, Any]:
"""Prepares a full raw dataset (n + x columns) with clean text values."""
file_name = match.get("file_name")
Comment thread
chalmerlowe marked this conversation as resolved.
if not file_name and match.get("file_path"):
file_name = os.path.basename(match.get("file_path"))
return {
"file_name": file_name or "",
"file_path": match.get("file_path", ""),
"package_name": match.get("package_name", ""),
"rule_name": match.get("rule_name", ""),
"line_number": _safe_int(match.get("line_number")),
"matched_string": match.get("matched_string", ""),
"context_line": _truncate_context(match.get("context_line", ""), match.get("matched_string", ""))
"context_line": _truncate_context(match.get("context_line", ""), match.get("matched_string", "")),
"dependency": match.get("dependency", ""),
"version": match.get("version", "")
}


def format_for_spreadsheet(
match: Dict[str, str],
match: Dict[str, Any],
github_repo: str = None,
branch: str = "main"
) -> Dict[str, str]:
) -> Dict[str, Any]:
"""Builds on top of raw CSV but applies Sheets-specific formulas."""
formatted = format_for_raw_csv(match)

Expand All @@ -270,7 +276,7 @@ def format_for_spreadsheet(
return formatted


def format_for_console(match: Dict[str, str]) -> str:
def format_for_console(match: Dict[str, Any]) -> str:
"""Prepares a slim, readable string representation (n columns) for stdout/logs."""
file_path = match.get("file_path", "")
line_number = match.get("line_number", "")
Expand All @@ -280,7 +286,7 @@ def format_for_console(match: Dict[str, str]) -> str:



def get_match_counts(matches: List[Dict[str, str]]) -> Tuple[Dict[str, int], Dict[str, int]]:
def get_match_counts(matches: List[Dict[str, Any]]) -> Tuple[Dict[str, int], Dict[str, int]]:
"""
Aggregate matches by rule and by package.
"""
Expand Down Expand Up @@ -333,7 +339,7 @@ def load_ignore_file(file_path: str) -> List[str]:

def write_csv_report(
output_path: str,
matches: List[Dict[str, str]]
matches: List[Dict[str, Any]]
) -> None:
"""
Write the collected matches to a CSV file.
Expand All @@ -342,7 +348,7 @@ def write_csv_report(
output_path: Path to the output CSV file.
matches: A list of dictionaries containing match details.
"""
fieldnames = ["file_path", "package_name", "rule_name", "line_number", "matched_string", "context_line"]
fieldnames = ["file_name", "file_path", "package_name", "rule_name", "dependency", "version", "line_number", "matched_string", "context_line"]

try:
with open(output_path, 'w', encoding='utf-8', newline='') as f:
Expand All @@ -360,7 +366,7 @@ def write_csv_report(
print(f"Error writing CSV report: {e}", file=sys.stderr)


def upload_to_drive(csv_path: str, matches: List[Dict[str, str]], github_repo: str = None, branch: str = "main") -> str:
def upload_to_drive(csv_path: str, matches: List[Dict[str, Any]], github_repo: str = None, branch: str = "main") -> str:
"""
Upload matches to a Google Sheet in Drive.
"""
Expand Down Expand Up @@ -391,13 +397,16 @@ def upload_to_drive(csv_path: str, matches: List[Dict[str, str]], github_repo: s
spreadsheet_id = spreadsheet.get('spreadsheetId')

# Prepare data
values = [["file_path", "package_name", "rule_name", "line_number", "matched_string", "context_line"]]
values = [["file_name", "file_path", "package_name", "rule_name", "dependency", "version", "line_number", "matched_string", "context_line"]]
for m in matches:
formatted_m = format_for_spreadsheet(m, github_repo=github_repo, branch=branch)
values.append([
formatted_m.get("file_name", ""),
formatted_m.get("file_path", ""),
formatted_m.get("package_name", ""),
formatted_m.get("rule_name", ""),
formatted_m.get("dependency", ""),
formatted_m.get("version", ""),
str(formatted_m.get("line_number", "")),
formatted_m.get("matched_string", ""),
formatted_m.get("context_line", "")
Expand Down Expand Up @@ -460,7 +469,7 @@ def scan_repository(
target_packages: List[str] = None,
ignore_dirs: List[str] = None,
version_string: str = None
) -> List[Dict[str, str]]:
) -> List[Dict[str, Any]]:
"""
Scans the repository directory tree applying resolved regex patterns to files.

Expand Down Expand Up @@ -510,7 +519,6 @@ def scan_repository(
files = [f for f in files if f.lower() not in ignore_lower]

rel_root = os.path.relpath(root, root_path)
parts = rel_root.split(os.sep)

# Layout-agnostic generic subdirectory filtering
if target_packages:
Expand Down Expand Up @@ -559,6 +567,7 @@ def scan_repository(
display_path = rel_file_path

for m in matches:
m["file_name"] = file
m["file_path"] = display_path
m["repo_path"] = rel_file_path
m["package_name"] = package_name
Expand Down Expand Up @@ -663,7 +672,7 @@ def main():

print(f"Starting scan for dependency: {args.dependency} version: {args.version}")
print(f"Root path: {args.path}")
print(f"Targets to scan:")
print("Targets to scan:")
if target_packages:
for pkg in target_packages:
print(f" - {os.path.join(args.path, pkg)}")
Expand Down
Loading