Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions scripts/version_scanner/tests/unit/test_version_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -782,3 +782,90 @@ def test_format_for_console():
assert "3.7" in log_str
assert "python_requires = " not in log_str # Slim format doesn't print context line


def test_parse_targets_inline_json():
from version_scanner import parse_targets
json_str = '{"python": ["3.7", "3.8"], "protobuf": "4.25.8"}'
targets = parse_targets(json_str)
assert targets == [("python", "3.7"), ("python", "3.8"), ("protobuf", "4.25.8")]

def test_parse_targets_inline_yaml():
from version_scanner import parse_targets
yaml_str = """
python:
- "3.7"
- "3.8"
protobuf: "4.25.8"
"""
targets = parse_targets(yaml_str)
assert targets == [("python", "3.7"), ("python", "3.8"), ("protobuf", "4.25.8")]

def test_parse_targets_from_file(tmp_path):
from version_scanner import parse_targets
yaml_file = tmp_path / "targets.yaml"
yaml_file.write_text("""
python:
- "3.7"
- "3.8"
protobuf: "4.25.8"
""")
targets = parse_targets(str(yaml_file))
assert targets == [("python", "3.7"), ("python", "3.8"), ("protobuf", "4.25.8")]

def test_parse_targets_invalid_syntax():
from version_scanner import parse_targets
with pytest.raises(SystemExit) as excinfo:
parse_targets('{"invalid"')
assert excinfo.value.code == 1

def test_scan_repository_multi_targets(tmp_path):
# Setup files in tmp repository
file1 = tmp_path / "packages" / "pkg1" / "setup.py"
file1.parent.mkdir(parents=True)
file1.write_text("python_requires = '>=3.7'\n")

file2 = tmp_path / "packages" / "pkg2" / "requirements.txt"
file2.parent.mkdir(parents=True)
file2.write_text("protobuf==4.25.8\n")

# Let's mock a config file with rules for both python and protobuf
config_file = tmp_path / "regex_config.yaml"
config_file.write_text("""
rules:
- name: python_requires_check
applies_to:
- python
rules:
- python_requires\\s*=\\s*['\"]>={version}['\"]
- name: protobuf_check
applies_to:
- protobuf
rules:
- protobuf=={version}
""")

from version_scanner import ConfigManager, scan_repository

targets = [("python", "3.7"), ("protobuf", "4.25.8")]
rules = []
for dep, ver in targets:
cm = ConfigManager(str(config_file), dep, ver)
rules.extend(cm.load_config())

results = scan_repository(str(tmp_path), rules, targets=targets)

# We should have 2 matches
assert len(results) == 2

# Match for python
python_match = [r for r in results if r["dependency"] == "python"]
assert len(python_match) == 1
assert python_match[0]["version"] == "3.7"
assert python_match[0]["rule_name"] == "python_requires_check"

# Match for protobuf
protobuf_match = [r for r in results if r["dependency"] == "protobuf"]
assert len(protobuf_match) == 1
assert protobuf_match[0]["version"] == "4.25.8"
assert protobuf_match[0]["rule_name"] == "protobuf_check"

142 changes: 121 additions & 21 deletions scripts/version_scanner/version_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,9 @@ def load_config(self) -> List[Dict[str, str]]:
resolved_pattern = template.strip().format(**self.variables)
resolved_rules.append({
"name": name,
"pattern": resolved_pattern
"pattern": resolved_pattern,
"dependency": self.dependency,
"version": self.version
})
except KeyError as e:
print(f"Warning: Missing variable for interpolation in rule {name}: {e}", file=sys.stderr)
Expand Down Expand Up @@ -178,7 +180,9 @@ def scan_file(file_path: str, compiled_rules: List[Dict[str, re.Pattern]]) -> Li
"rule_name": rule["name"],
"line_number": line_num,
"matched_string": match.group(0).strip(),
"context_line": line.strip()
"context_line": line.strip(),
"dependency": rule.get("dependency", ""),
"version": rule.get("version", "")
})
except IOError as e:
print(f"Warning: Could not read file {file_path}: {e}", file=sys.stderr)
Expand Down Expand Up @@ -465,10 +469,11 @@ def read_package_file(file_path: str) -> List[str]:

def scan_repository(
root_path: str,
rules: List[Dict[str, str]],
rules: List[Dict[str, Any]],
target_packages: List[str] = None,
ignore_dirs: List[str] = None,
version_string: str = None
version_string: str = None,
targets: List[Tuple[str, str]] = None
) -> List[Dict[str, Any]]:
"""
Scans the repository directory tree applying resolved regex patterns to files.
Expand All @@ -487,21 +492,30 @@ def scan_repository(
performs a full recursive scan of the repository.
ignore_dirs: Optional list of directory names or glob-like files to ignore (case-insensitive).
version_string: Optional target version string (e.g. "3.7") to scan for in filenames.
targets: Optional list of (dependency, version) tuples.

Returns:
A list of dictionaries detailing each match: 'file_path', 'repo_path',
'package_name', 'rule_name', 'line_number', 'matched_string', 'context_line'.
A list of dictionaries detailing each match.
"""
ignore_lower = {i.lower() for i in ignore_dirs} if ignore_dirs else set()
results = []

filename_targets = []
if targets:
filename_targets = targets
elif version_string:
dep = rules[0].get("dependency") if rules else None
filename_targets = [(dep, version_string)]

# Compile patterns once here
compiled_rules = []
for rule in rules:
try:
compiled_rules.append({
"name": rule["name"],
"pattern": re.compile(rule["pattern"], re.IGNORECASE)
"pattern": re.compile(rule["pattern"], re.IGNORECASE),
"dependency": rule.get("dependency", ""),
"version": rule.get("version", "")
})
except re.error as e:
print(f"Error compiling regex for rule {rule['name']}: {e}", file=sys.stderr)
Expand Down Expand Up @@ -541,13 +555,16 @@ def scan_repository(
matches = scan_file(file_path, compiled_rules)

# Add filename match if applicable
if version_string and version_string in file:
matches.append({
"rule_name": "filename_match",
"line_number": 0,
"matched_string": version_string,
"context_line": f"Filename contains {version_string}"
})
for dep, ver in filename_targets:
if ver and ver in file:
matches.append({
"rule_name": "filename_match",
"line_number": 0,
"matched_string": ver,
"context_line": f"Filename contains {ver}",
"dependency": dep or "",
"version": ver
})

# Compute display path and package name
rel_file_path = os.path.relpath(file_path, root_path)
Expand Down Expand Up @@ -576,6 +593,46 @@ def scan_repository(
return results


def parse_targets(targets_input: str) -> List[Tuple[str, str]]:
"""
Parses a targets input (file path or inline YAML/JSON string) into a list of (dependency, version) tuples.
"""
raw_targets = {}
content = targets_input

# Check if the input is a file path
if os.path.exists(targets_input):
try:
with open(targets_input, 'r', encoding='utf-8') as f:
content = f.read()
except PermissionError:
print(f"Error: Permission denied reading targets file: {targets_input}", file=sys.stderr)
sys.exit(1)
except Exception as e:
print(f"Error reading targets file {targets_input}: {e}", file=sys.stderr)
sys.exit(1)

try:
raw_targets = yaml.safe_load(content)
except Exception as e:
print(f"Error parsing targets YAML/JSON content: {e}", file=sys.stderr)
sys.exit(1)

if not isinstance(raw_targets, dict):
print("Error: Targets input must resolve to a JSON object or YAML mapping", file=sys.stderr)
sys.exit(1)

targets = []
for dep, versions in raw_targets.items():
if isinstance(versions, list):
for v in versions:
targets.append((str(dep), str(v)))
else:
targets.append((str(dep), str(versions)))

return targets
Comment on lines +600 to +633

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

This implementation has a few issues:

  1. Windows Crash: Calling os.path.exists() on Windows with an inline JSON/YAML string (which contains invalid path characters like {, ", :, [, ]) will raise an OSError and crash the script.
  2. Confusing Error Message: If a user passes a file path that does not exist, os.path.exists() returns False, and the script falls back to parsing the path string as YAML. This succeeds (parsing as a plain string) but then fails the isinstance(raw_targets, dict) check, printing a confusing error about the input not resolving to a JSON object or YAML mapping instead of a "File not found" error.
  3. Invalid Version Types: If a dependency has a null/None version or a nested dictionary/list structure, it is converted directly to a string (e.g., "None" or "{'major': 3}"), leading to incorrect scanning behavior.

We can resolve all of these by checking if the input looks like a file path first, handling non-existent files gracefully, and validating the parsed version types. Avoid broad except-blocks that silently pass; instead, log the exception to aid in debugging.

    raw_targets = {}
    content = targets_input
    
    # Check if the input is a file path
    is_file = False
    looks_like_file = not any(char in targets_input for char in ('{', '\\n', ':')) or targets_input.endswith(('.json', '.yaml', '.yml'))
    if looks_like_file:
        try:
            if os.path.exists(targets_input):
                is_file = True
            else:
                print(f"Error: Targets file not found: {targets_input}", file=sys.stderr)
                sys.exit(1)
        except Exception as e:
            logging.warning(f"Failed to check if path exists: {e}")

    if is_file:
        try:
            with open(targets_input, 'r', encoding='utf-8') as f:
                content = f.read()
        except PermissionError:
            print(f"Error: Permission denied reading targets file: {targets_input}", file=sys.stderr)
            sys.exit(1)
        except Exception as e:
            print(f"Error reading targets file {targets_input}: {e}", file=sys.stderr)
            sys.exit(1)

    try:
        raw_targets = yaml.safe_load(content)
    except Exception as e:
        print(f"Error parsing targets YAML/JSON content: {e}", file=sys.stderr)
        sys.exit(1)
        
    if not isinstance(raw_targets, dict):
        print("Error: Targets input must resolve to a JSON object or YAML mapping", file=sys.stderr)
        sys.exit(1)
        
    targets = []
    for dep, versions in raw_targets.items():
        if isinstance(versions, list):
            for v in versions:
                if v is None or isinstance(v, (dict, list)):
                    print(f"Error: Invalid version '{v}' for dependency '{dep}'", file=sys.stderr)
                    sys.exit(1)
                targets.append((str(dep), str(v)))
        elif versions is not None and not isinstance(versions, dict):
            targets.append((str(dep), str(versions)))
        else:
            print(f"Error: Invalid version '{versions}' for dependency '{dep}'", file=sys.stderr)
            sys.exit(1)
            
    return targets
References
  1. Avoid broad except Exception: blocks that silently return None. Instead, log the exception (e.g., using logger.warning) to aid in debugging and prevent masking underlying issues.



def main():
script_dir = os.path.dirname(os.path.abspath(__file__))
default_config = os.path.join(script_dir, "regex_config.yaml")
Expand All @@ -586,16 +643,19 @@ def main():

parser.add_argument(
"-d", "--dependency",
required=True,
help="Name of the dependency (e.g., python, protobuf)"
)

parser.add_argument(
"-v", "--version",
required=True,
help="Specific version to search for (e.g., 3.7, 4.25.8)"
)

parser.add_argument(
"--targets",
help="Path to a YAML/JSON targets file, or an inline YAML/JSON string (e.g. 'python: [3.8, 3.9]')"
)

parser.add_argument(
"-p", "--path",
default=".",
Expand Down Expand Up @@ -659,6 +719,25 @@ def main():

args = parser.parse_args()

# Validation of required inputs
has_single_target = bool(args.dependency and args.version)
has_targets_list = bool(args.targets)

if not (has_single_target or has_targets_list):
parser.error("Must specify either (-d/--dependency AND -v/--version) OR (--targets)")
if has_single_target and has_targets_list:
parser.error("Cannot specify both single target (-d/-v) and targets list (--targets)")

targets = []
if has_targets_list:
targets = parse_targets(args.targets)
else:
targets = [(args.dependency, args.version)]

if not targets:
print("Error: No targets resolved to scan.", file=sys.stderr)
sys.exit(1)

# Resolve target packages if filtering is requested
target_packages = []
if args.package:
Expand All @@ -670,7 +749,12 @@ def main():
elif args.package_file:
target_packages = read_package_file(args.package_file)

print(f"Starting scan for dependency: {args.dependency} version: {args.version}")
if has_targets_list:
print("Starting scan for multiple targets:")
for dep, ver in targets:
print(f" - {dep}: {ver}")
else:
print(f"Starting scan for dependency: {args.dependency} version: {args.version}")
print(f"Root path: {args.path}")
print("Targets to scan:")
if target_packages:
Expand All @@ -681,8 +765,10 @@ def main():
print(f"Using config: {args.config}")

# Load and resolve rules
config_manager = ConfigManager(args.config, args.dependency, args.version)
rules = config_manager.load_config()
rules = []
for dep, ver in targets:
config_manager = ConfigManager(args.config, dep, ver)
rules.extend(config_manager.load_config())



Expand All @@ -695,7 +781,14 @@ def main():
print(f"Loaded {len(ignore_dirs)} ignore patterns from {ignore_file_path}")

# Scan repository
all_matches = scan_repository(args.path, rules, target_packages, ignore_dirs, version_string=args.version)
all_matches = scan_repository(
args.path,
rules,
target_packages,
ignore_dirs,
version_string=(None if has_targets_list else args.version),
targets=targets
)

print(f"\nFound {len(all_matches)} matches.")
display_matches = all_matches if args.stdout else all_matches[:10]
Expand All @@ -717,7 +810,14 @@ def main():
script_dir = os.path.dirname(os.path.abspath(__file__))
results_dir = os.path.join(script_dir, "results")
os.makedirs(results_dir, exist_ok=True)
output_path = os.path.join(results_dir, f"{args.dependency}-{args.version}-{timestamp}.csv")
if has_targets_list:
if os.path.exists(args.targets):
base_name = os.path.splitext(os.path.basename(args.targets))[0]
else:
base_name = "targets"
output_path = os.path.join(results_dir, f"{base_name}-{timestamp}.csv")
Comment on lines +813 to +818

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Calling os.path.exists(args.targets) when args.targets is an inline JSON/YAML string can raise an OSError on Windows due to invalid path characters (such as {, ", [). This will crash the script at the very end of its execution. Wrapping this check in a try-except block ensures it fails gracefully and defaults to "targets" as the base name. Be sure to log the exception rather than letting it pass silently.

Suggested change
if has_targets_list:
if os.path.exists(args.targets):
base_name = os.path.splitext(os.path.basename(args.targets))[0]
else:
base_name = "targets"
output_path = os.path.join(results_dir, f"{base_name}-{timestamp}.csv")
if has_targets_list:
base_name = "targets"
try:
if os.path.exists(args.targets):
base_name = os.path.splitext(os.path.basename(args.targets))[0]
except Exception as e:
logging.warning(f"Failed to check if targets path exists: {e}")
output_path = os.path.join(results_dir, f"{base_name}-{timestamp}.csv")
References
  1. Avoid broad except Exception: blocks that silently return None. Instead, log the exception (e.g., using logger.warning) to aid in debugging and prevent masking underlying issues.

else:
output_path = os.path.join(results_dir, f"{args.dependency}-{args.version}-{timestamp}.csv")

write_csv_report(output_path, all_matches)

Expand Down
Loading