From 4cd77ad9f352f4cc565fead00d823a6663ee6ba7 Mon Sep 17 00:00:00 2001 From: Prajakta Kamble Date: Fri, 26 Jun 2026 08:41:24 +0530 Subject: [PATCH] Fix flag_and_ignore_files_over_max_size inconsistent return value and docstring The function docstring claimed to return the remaining files queryset within the file size limit, but .update() actually returns an integer count of updated rows. The caller in scanpipe/pipes/scancode.py already re-filters manually: resource_qs.filter(~Q(status=flag.IGNORED_BY_MAX_FILE_SIZE)) proving the original return value was misleading. Fix: explicitly return the flagged_count integer and update the docstring to accurately describe the return value. Add regression tests to verify the return type and that files are correctly flagged with IGNORED_BY_MAX_FILE_SIZE status. Signed-off-by: Prajakta Kamble --- scanpipe/pipes/flag.py | 5 ++-- scanpipe/tests/pipes/test_flag.py | 50 +++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/scanpipe/pipes/flag.py b/scanpipe/pipes/flag.py index e8a983d40d..0113f92388 100644 --- a/scanpipe/pipes/flag.py +++ b/scanpipe/pipes/flag.py @@ -111,14 +111,15 @@ def flag_ignored_patterns(codebaseresources, patterns, status=IGNORED_PATTERN): def flag_and_ignore_files_over_max_size(resource_qs, file_size_limit): """ Flag codebase resources which are over the max file size for scanning - and return all other files within the file size limit. + and return the count of flagged resources. """ if not file_size_limit: return resource_qs - return resource_qs.filter(size__gte=file_size_limit).update( + flagged_count = resource_qs.filter(size__gte=file_size_limit).update( status=IGNORED_BY_MAX_FILE_SIZE ) + return flagged_count def analyze_scanned_files(project): diff --git a/scanpipe/tests/pipes/test_flag.py b/scanpipe/tests/pipes/test_flag.py index c971590dd7..c317a528f0 100644 --- a/scanpipe/tests/pipes/test_flag.py +++ b/scanpipe/tests/pipes/test_flag.py @@ -137,3 +137,53 @@ def test_scanpipe_pipes_flag_flag_mapped_resources(self): self.resource2.refresh_from_db() self.assertEqual("mapped", self.resource1.status) self.assertEqual("mapped", self.resource2.status) + def test_flag_and_ignore_files_over_max_size_returns_count(self): + """ + Regression test: flag_and_ignore_files_over_max_size must return + an integer count of flagged files, not a queryset or None. + + Bug: the docstring claimed to return remaining files within limit, + but .update() returns an integer count. The docstring and return + value were inconsistent, making callers unclear about what they get. + """ + resource_qs = self.project1.codebaseresources + + # Create resources with different sizes + resource_qs.create(path="small.py", size=100) + resource_qs.create(path="large.py", size=999999) + resource_qs.create(path="huge.py", size=9999999) + + all_files = resource_qs.files() + result = flag.flag_and_ignore_files_over_max_size( + resource_qs=all_files, + file_size_limit=500000, + ) + + # Must return an integer — the count of flagged files + self.assertIsInstance(result, int) + self.assertEqual(result, 2) # large.py and huge.py flagged + + # Verify the flagged files actually got the correct status + flagged = resource_qs.filter(status=flag.IGNORED_BY_MAX_FILE_SIZE) + self.assertEqual(flagged.count(), 2) + + # Small file must NOT be flagged + small = resource_qs.get(path="small.py") + self.assertNotEqual(small.status, flag.IGNORED_BY_MAX_FILE_SIZE) + +def test_flag_and_ignore_files_over_max_size_no_limit_returns_qs(self): + """ + When file_size_limit is None/0, the function returns the original + queryset unchanged — verify this still works correctly. + """ + resource_qs = self.project1.codebaseresources + resource_qs.create(path="any_file.py", size=999999) + + all_files = resource_qs.files() + result = flag.flag_and_ignore_files_over_max_size( + resource_qs=all_files, + file_size_limit=None, + ) + + # No limit = returns original queryset untouched + self.assertEqual(result, all_files) \ No newline at end of file