Skip to content

Commit 06e0b14

Browse files
committed
feat(import-languages): optimize language import process with bulk creation and improved validation
1 parent db8b6c6 commit 06e0b14

2 files changed

Lines changed: 119 additions & 38 deletions

File tree

dojo/api_v2/serializers.py

Lines changed: 29 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2856,30 +2856,36 @@ def save(self):
28562856
msg = "Invalid format"
28572857
raise Exception(msg)
28582858

2859+
# Filter out ignored keys
2860+
language_names = [name for name in deserialized if name not in {"header", "SUM"}]
2861+
# Prepopulate existing Language_Type objects
2862+
existing_types = {
2863+
lt.language: lt
2864+
for lt in Language_Type.objects.filter(language__in=language_names)
2865+
}
2866+
# Determine which Language_Type objects need to be created
2867+
new_language_names = [name for name in language_names if name not in existing_types]
2868+
new_types = [Language_Type(language=name) for name in new_language_names]
2869+
Language_Type.objects.bulk_create(new_types)
2870+
# Add newly created Language_Type objects to cache
2871+
for lt in Language_Type.objects.filter(language__in=new_language_names):
2872+
existing_types[lt.language] = lt
2873+
# Delete all Languages for this product
28592874
Languages.objects.filter(product=product).delete()
2860-
2861-
for name in deserialized:
2862-
if name not in {"header", "SUM"}:
2863-
element = deserialized[name]
2864-
2865-
try:
2866-
(
2867-
language_type,
2868-
_created,
2869-
) = Language_Type.objects.get_or_create(language=name)
2870-
except Language_Type.MultipleObjectsReturned:
2871-
language_type = Language_Type.objects.filter(
2872-
language=name,
2873-
).first()
2874-
2875-
language = Languages()
2876-
language.product = product
2877-
language.language = language_type
2878-
language.files = element.get("nFiles", 0)
2879-
language.blank = element.get("blank", 0)
2880-
language.comment = element.get("comment", 0)
2881-
language.code = element.get("code", 0)
2882-
language.save()
2875+
# Prepare Languages objects for bulk insert
2876+
languages_to_create = [
2877+
Languages(
2878+
product=product,
2879+
language=existing_types[name],
2880+
files=deserialized[name].get("nFiles", 0),
2881+
blank=deserialized[name].get("blank", 0),
2882+
comment=deserialized[name].get("comment", 0),
2883+
code=deserialized[name].get("code", 0),
2884+
)
2885+
for name in language_names
2886+
]
2887+
# Bulk insert all Languages in one query
2888+
Languages.objects.bulk_create(languages_to_create)
28832889

28842890
def validate(self, data):
28852891
if is_scan_file_too_large(data["file"]):

unittests/test_rest_framework.py

Lines changed: 90 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3788,26 +3788,101 @@ def __init__(self, *args, **kwargs):
37883788
def __del__(self: object):
37893789
self.payload["file"].close()
37903790

3791+
def _build_payload(self, data):
3792+
return {
3793+
"product": 1,
3794+
"file": SimpleUploadedFile(
3795+
"defectdojo_cloc.json",
3796+
json.dumps(data).encode("utf-8"),
3797+
content_type="application/json",
3798+
),
3799+
}
3800+
37913801
def test_create(self):
3792-
BaseClass.CreateRequestTest.test_create(self)
3802+
self.payload["file"].close()
3803+
base_data = json.loads(
3804+
Path("unittests/files/defectdojo_cloc.json").read_text(
3805+
encoding="utf-8",
3806+
),
3807+
)
3808+
updated_data = json.loads(json.dumps(base_data))
3809+
updated_data.pop("JSON", None)
3810+
updated_data["Python"]["code"] = 51057
3811+
updated_data["Go"] = {
3812+
"nFiles": 1,
3813+
"blank": 2,
3814+
"comment": 3,
3815+
"code": 4,
3816+
}
37933817

3794-
languages = Languages.objects.filter(product=1).order_by("language")
3818+
test_cases = [
3819+
(
3820+
"initial",
3821+
base_data,
3822+
{
3823+
"JSON": {
3824+
"files": 21,
3825+
"blank": 7,
3826+
"comment": 0,
3827+
"code": 63996,
3828+
},
3829+
"Python": {
3830+
"files": 432,
3831+
"blank": 10813,
3832+
"comment": 5054,
3833+
"code": 51056,
3834+
},
3835+
},
3836+
),
3837+
(
3838+
"updated",
3839+
updated_data,
3840+
{
3841+
"Go": {
3842+
"files": 1,
3843+
"blank": 2,
3844+
"comment": 3,
3845+
"code": 4,
3846+
},
3847+
"Python": {
3848+
"files": 432,
3849+
"blank": 10813,
3850+
"comment": 5054,
3851+
"code": 51057,
3852+
},
3853+
},
3854+
),
3855+
]
37953856

3796-
self.assertEqual(2, len(languages))
3857+
product = Product.objects.get(id=1)
3858+
for case_name, payload_data, expected in test_cases:
3859+
with self.subTest(case=case_name):
3860+
self.payload = self._build_payload(payload_data)
3861+
response = self.client.post(self.url, self.payload)
3862+
self.assertEqual(201, response.status_code, response.content[:1000])
3863+
self.check_schema_response("post", "201", response)
37973864

3798-
self.assertEqual(languages[0].product, Product.objects.get(id=1))
3799-
self.assertEqual(languages[0].language, Language_Type.objects.get(id=1))
3800-
self.assertEqual(languages[0].files, 21)
3801-
self.assertEqual(languages[0].blank, 7)
3802-
self.assertEqual(languages[0].comment, 0)
3803-
self.assertEqual(languages[0].code, 63996)
3865+
languages = (
3866+
Languages.objects.filter(product=1)
3867+
.select_related("language")
3868+
.order_by("language__language")
3869+
)
3870+
self.assertEqual(len(expected), languages.count())
38043871

3805-
self.assertEqual(languages[1].product, Product.objects.get(id=1))
3806-
self.assertEqual(languages[1].language, Language_Type.objects.get(id=2))
3807-
self.assertEqual(languages[1].files, 432)
3808-
self.assertEqual(languages[1].blank, 10813)
3809-
self.assertEqual(languages[1].comment, 5054)
3810-
self.assertEqual(languages[1].code, 51056)
3872+
languages_by_name = {
3873+
language.language.language: language
3874+
for language in languages
3875+
}
3876+
self.assertEqual(set(expected.keys()), set(languages_by_name.keys()))
3877+
3878+
for name, counts in expected.items():
3879+
language = languages_by_name[name]
3880+
self.assertEqual(product, language.product)
3881+
self.assertEqual(name, language.language.language)
3882+
self.assertEqual(counts["files"], language.files)
3883+
self.assertEqual(counts["blank"], language.blank)
3884+
self.assertEqual(counts["comment"], language.comment)
3885+
self.assertEqual(counts["code"], language.code)
38113886

38123887

38133888
@versioned_fixtures

0 commit comments

Comments
 (0)