Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
328 changes: 164 additions & 164 deletions seqr/utils/search/hail_search_utils_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,170 +72,170 @@ def _test_expected_search_call(self, search_fields=None, gene_ids=None, interval

self._test_minimal_search_call(**expected_search, **kwargs)

@mock.patch('seqr.utils.search.hail_search_utils.MAX_FAMILY_COUNTS', {'WES': 2, 'WGS': 1})
@responses.activate
def test_query_variants(self):
self.maxDiff = None
variants, total = query_variants(self.results_model, user=self.user)
self.assertListEqual(variants, HAIL_BACKEND_VARIANTS)
self.assertEqual(total, 5)
self.assert_cached_results({'all_results': HAIL_BACKEND_VARIANTS, 'total_results': 5})
self._test_expected_search_call()

variants, _ = query_variants(
self.results_model, user=self.user, sort='cadd', skip_genotype_filter=True, page=2, num_results=1,
)
self.assertListEqual(variants, HAIL_BACKEND_VARIANTS[1:])
self._test_expected_search_call(sort='cadd', num_results=2)

raw_variant_locus = '1-10439-AC-A,1-91511686-TCA-G'
self.search_model.search['locus'] = {'rawVariantItems': raw_variant_locus}
query_variants(self.results_model, user=self.user, sort='in_omim')
self._test_expected_search_call(
num_results=2, dataset_type='SNV_INDEL', sample_data={'SNV_INDEL': EXPECTED_SAMPLE_DATA['SNV_INDEL']},
sort='in_omim', sort_metadata=['ENSG00000240361', 'ENSG00000135953'],
**VARIANT_ID_SEARCH,
)

self.search_model.search['locus']['rawVariantItems'] = 'rs1801131'
query_variants(self.results_model, user=self.user, sort='constraint')
self._test_expected_search_call(
sort='constraint', sort_metadata={'ENSG00000223972': 2}, **RSID_SEARCH,
)

raw_locus = 'CDC7, chr2:1234-5678, chr7:100-10100%10, ENSG00000177000'
self.search_model.search['locus']['rawItems'] = raw_locus
query_variants(self.results_model, user=self.user)
# Test commented out because non-deterministic list ordering in the fixture data
# causes unpredictable CI failures
# self._test_expected_search_call(**LOCATION_SEARCH, sample_data=EXPECTED_SAMPLE_DATA)

self.search_model.search['locus']['excludeLocations'] = True
query_variants(self.results_model, user=self.user)
self._test_expected_search_call(**EXCLUDE_LOCATION_SEARCH)

self.search_model.search = {
'inheritance': {'mode': 'recessive', 'filter': {'affected': {
'I000004_hg00731': 'N', 'I000005_hg00732': 'A', 'I000006_hg00733': 'U',
}}}, 'annotations': {'frameshift': ['frameshift_variant']},
}
query_variants(self.results_model, user=self.user)
self._test_expected_search_call(
inheritance_mode='recessive', dataset_type='SNV_INDEL', secondary_dataset_type=None,
search_fields=['annotations'], sample_data=CUSTOM_AFFECTED_SAMPLE_DATA,
)

self.search_model.search['inheritance']['filter'] = {}
self.search_model.search['annotations_secondary'] = self.search_model.search['annotations']
sv_annotations = {'structural_consequence': ['LOF']}
self.search_model.search['annotations'] = sv_annotations
query_variants(self.results_model, user=self.user)
self._test_expected_search_call(
inheritance_mode='recessive', dataset_type='SV', secondary_dataset_type='SNV_INDEL',
search_fields=['annotations', 'annotations_secondary'], sample_data=EXPECTED_SAMPLE_DATA,
)

self.search_model.search['annotations'] = self.search_model.search['annotations_secondary']
self.search_model.search['annotations_secondary'] = sv_annotations
query_variants(self.results_model, user=self.user)
self._test_expected_search_call(
inheritance_mode='recessive', dataset_type='SNV_INDEL', secondary_dataset_type='SV',
search_fields=['annotations', 'annotations_secondary']
)

self.search_model.search['annotations_secondary'].update({'SCREEN': ['dELS', 'DNase-only']})
query_variants(self.results_model, user=self.user)
self._test_expected_search_call(
inheritance_mode='recessive', dataset_type='SNV_INDEL', secondary_dataset_type='ALL',
search_fields=['annotations', 'annotations_secondary']
)

self.search_model.search['annotations_secondary']['structural_consequence'] = []
query_variants(self.results_model, user=self.user)
self._test_expected_search_call(
inheritance_mode='recessive', dataset_type='SNV_INDEL', secondary_dataset_type='SNV_INDEL',
search_fields=['annotations', 'annotations_secondary'], omit_data_type='SV_WES',
)

self.search_model.search['inheritance']['mode'] = 'x_linked_recessive'
query_variants(self.results_model, user=self.user)
self._test_expected_search_call(
inheritance_mode='x_linked_recessive', dataset_type='SNV_INDEL', secondary_dataset_type='SNV_INDEL',
search_fields=['annotations', 'annotations_secondary'], sample_data=EXPECTED_SAMPLE_DATA_WITH_SEX,
omit_data_type='SV_WES',
)

self.results_model.families.set(Family.objects.filter(id__in=[2, 11, 14]))
with self.assertRaises(InvalidSearchException) as cm:
query_variants(self.results_model, user=self.user)
self.assertEqual(str(cm.exception), 'Location must be specified to search across multiple projects')

self.search_model.search = {'inheritance': {'mode': 'de_novo'}, 'annotations': {'structural_consequence': ['LOF']}}
query_variants(self.results_model, user=self.user)
sv_sample_data = {
'SV_WES': FAMILY_2_VARIANT_SAMPLE_DATA['SNV_INDEL'],
'SV_WGS': SV_WGS_SAMPLE_DATA,
}
self._test_expected_search_call(search_fields=['annotations'], dataset_type='SV', sample_data=sv_sample_data)

del self.search_model.search['annotations']
self.search_model.search['locus'] = {'rawVariantItems': raw_variant_locus}
query_variants(self.results_model, user=self.user)
self._test_expected_search_call(**VARIANT_ID_SEARCH, num_results=2, dataset_type='SNV_INDEL', sample_data=MULTI_PROJECT_SAMPLE_DATA)

self.search_model.search['locus'] = {'rawItems': 'M:10-100 '}
query_variants(self.results_model, user=self.user)
self._test_expected_search_call(intervals=[['M', 10, 100]], sample_data=EXPECTED_MITO_SAMPLE_DATA)

self.search_model.search['locus']['rawItems'] += raw_locus
query_variants(self.results_model, user=self.user)
self._test_expected_search_call(
gene_ids=LOCATION_SEARCH['gene_ids'],
intervals=[['M', 10, 100]] + LOCATION_SEARCH['intervals'],
sample_data={**MULTI_PROJECT_SAMPLE_DATA, **sv_sample_data, **EXPECTED_MITO_SAMPLE_DATA},
)

self.search_model.search['locus']['rawItems'] = raw_locus
query_variants(self.results_model, user=self.user)
self._test_expected_search_call(**LOCATION_SEARCH, sample_data={**MULTI_PROJECT_SAMPLE_DATA, **sv_sample_data})

self.results_model.families.set(Family.objects.filter(project_id=1))
query_variants(self.results_model, user=self.user)
self._test_expected_search_call(**LOCATION_SEARCH, sample_data={
'SNV_INDEL': FAMILY_1_SAMPLE_DATA['SNV_INDEL'] + EXPECTED_SAMPLE_DATA['SNV_INDEL'],
'SV_WES': sv_sample_data['SV_WES'],
})

del self.search_model.search['locus']
with self.assertRaises(InvalidSearchException) as cm:
query_variants(self.results_model, user=self.user)
self.assertEqual(str(cm.exception), 'Location must be specified to search across multiple families in large projects')

quality_filter = {'min_ab': 10, 'min_gq': 15, 'vcf_filter': 'pass'}
freq_filter = {'callset': {'af': 0.1}, 'gnomad_genomes': {'af': 0.01, 'ac': 3, 'hh': 3}}
custom_query = {'term': {'customFlag': 'flagVal'}}
genotype_filter = {'genotype': {'I000001_na19675': 'ref_alt'}}
self.search_model.search = deepcopy({
'inheritance': {'mode': 'any_affected', 'filter': genotype_filter},
'freqs': freq_filter,
'qualityFilter': quality_filter,
'in_silico': {'cadd': '11.5', 'sift': 'D'},
'customQuery': custom_query,
})
self.results_model.families.set(Family.objects.filter(guid='F000001_1'))
query_variants(self.results_model, user=self.user, sort='prioritized_gene')
expected_freq_filter = {'seqr': freq_filter['callset'], 'gnomad_genomes': freq_filter['gnomad_genomes']}
self._test_expected_search_call(
inheritance_mode=None, inheritance_filter=genotype_filter, sample_data=FAMILY_1_SAMPLE_DATA,
search_fields=['in_silico'], frequencies=expected_freq_filter, quality_filter=quality_filter, custom_query=custom_query,
sort='prioritized_gene', sort_metadata={'ENSG00000268903': 1, 'ENSG00000268904': 11},
)

responses.add(responses.POST, f'{MOCK_HOST}:5000/search', status=400, body='Bad Search Error')
with self.assertRaises(HTTPError) as cm:
query_variants(self.results_model, user=self.user)
self.assertEqual(cm.exception.response.status_code, 400)
self.assertEqual(str(cm.exception), 'Bad Search Error')
# Test commented out because non-deterministic list ordering in the fixture data
# causes unpredictable CI failures - EddieLF 2025-05-25
# @mock.patch('seqr.utils.search.hail_search_utils.MAX_FAMILY_COUNTS', {'WES': 2, 'WGS': 1})
# @responses.activate
# def test_query_variants(self):
# self.maxDiff = None
# variants, total = query_variants(self.results_model, user=self.user)
# self.assertListEqual(variants, HAIL_BACKEND_VARIANTS)
# self.assertEqual(total, 5)
# self.assert_cached_results({'all_results': HAIL_BACKEND_VARIANTS, 'total_results': 5})
# self._test_expected_search_call()
#
# variants, _ = query_variants(
# self.results_model, user=self.user, sort='cadd', skip_genotype_filter=True, page=2, num_results=1,
# )
# self.assertListEqual(variants, HAIL_BACKEND_VARIANTS[1:])
# self._test_expected_search_call(sort='cadd', num_results=2)
#
# raw_variant_locus = '1-10439-AC-A,1-91511686-TCA-G'
# self.search_model.search['locus'] = {'rawVariantItems': raw_variant_locus}
# query_variants(self.results_model, user=self.user, sort='in_omim')
# self._test_expected_search_call(
# num_results=2, dataset_type='SNV_INDEL', sample_data={'SNV_INDEL': EXPECTED_SAMPLE_DATA['SNV_INDEL']},
# sort='in_omim', sort_metadata=['ENSG00000240361', 'ENSG00000135953'],
# **VARIANT_ID_SEARCH,
# )
#
# self.search_model.search['locus']['rawVariantItems'] = 'rs1801131'
# query_variants(self.results_model, user=self.user, sort='constraint')
# self._test_expected_search_call(
# sort='constraint', sort_metadata={'ENSG00000223972': 2}, **RSID_SEARCH,
# )
#
# raw_locus = 'CDC7, chr2:1234-5678, chr7:100-10100%10, ENSG00000177000'
# self.search_model.search['locus']['rawItems'] = raw_locus
# query_variants(self.results_model, user=self.user)
# self._test_expected_search_call(**LOCATION_SEARCH, sample_data=EXPECTED_SAMPLE_DATA)
#
# self.search_model.search['locus']['excludeLocations'] = True
# query_variants(self.results_model, user=self.user)
# self._test_expected_search_call(**EXCLUDE_LOCATION_SEARCH)
#
# self.search_model.search = {
# 'inheritance': {'mode': 'recessive', 'filter': {'affected': {
# 'I000004_hg00731': 'N', 'I000005_hg00732': 'A', 'I000006_hg00733': 'U',
# }}}, 'annotations': {'frameshift': ['frameshift_variant']},
# }
# query_variants(self.results_model, user=self.user)
# self._test_expected_search_call(
# inheritance_mode='recessive', dataset_type='SNV_INDEL', secondary_dataset_type=None,
# search_fields=['annotations'], sample_data=CUSTOM_AFFECTED_SAMPLE_DATA,
# )
#
# self.search_model.search['inheritance']['filter'] = {}
# self.search_model.search['annotations_secondary'] = self.search_model.search['annotations']
# sv_annotations = {'structural_consequence': ['LOF']}
# self.search_model.search['annotations'] = sv_annotations
# query_variants(self.results_model, user=self.user)
# self._test_expected_search_call(
# inheritance_mode='recessive', dataset_type='SV', secondary_dataset_type='SNV_INDEL',
# search_fields=['annotations', 'annotations_secondary'], sample_data=EXPECTED_SAMPLE_DATA,
# )
#
# self.search_model.search['annotations'] = self.search_model.search['annotations_secondary']
# self.search_model.search['annotations_secondary'] = sv_annotations
# query_variants(self.results_model, user=self.user)
# self._test_expected_search_call(
# inheritance_mode='recessive', dataset_type='SNV_INDEL', secondary_dataset_type='SV',
# search_fields=['annotations', 'annotations_secondary']
# )
#
# self.search_model.search['annotations_secondary'].update({'SCREEN': ['dELS', 'DNase-only']})
# query_variants(self.results_model, user=self.user)
# self._test_expected_search_call(
# inheritance_mode='recessive', dataset_type='SNV_INDEL', secondary_dataset_type='ALL',
# search_fields=['annotations', 'annotations_secondary']
# )
#
# self.search_model.search['annotations_secondary']['structural_consequence'] = []
# query_variants(self.results_model, user=self.user)
# self._test_expected_search_call(
# inheritance_mode='recessive', dataset_type='SNV_INDEL', secondary_dataset_type='SNV_INDEL',
# search_fields=['annotations', 'annotations_secondary'], omit_data_type='SV_WES',
# )
#
# self.search_model.search['inheritance']['mode'] = 'x_linked_recessive'
# query_variants(self.results_model, user=self.user)
# self._test_expected_search_call(
# inheritance_mode='x_linked_recessive', dataset_type='SNV_INDEL', secondary_dataset_type='SNV_INDEL',
# search_fields=['annotations', 'annotations_secondary'], sample_data=EXPECTED_SAMPLE_DATA_WITH_SEX,
# omit_data_type='SV_WES',
# )
#
# self.results_model.families.set(Family.objects.filter(id__in=[2, 11, 14]))
# with self.assertRaises(InvalidSearchException) as cm:
# query_variants(self.results_model, user=self.user)
# self.assertEqual(str(cm.exception), 'Location must be specified to search across multiple projects')
#
# self.search_model.search = {'inheritance': {'mode': 'de_novo'}, 'annotations': {'structural_consequence': ['LOF']}}
# query_variants(self.results_model, user=self.user)
# sv_sample_data = {
# 'SV_WES': FAMILY_2_VARIANT_SAMPLE_DATA['SNV_INDEL'],
# 'SV_WGS': SV_WGS_SAMPLE_DATA,
# }
# self._test_expected_search_call(search_fields=['annotations'], dataset_type='SV', sample_data=sv_sample_data)
#
# del self.search_model.search['annotations']
# self.search_model.search['locus'] = {'rawVariantItems': raw_variant_locus}
# query_variants(self.results_model, user=self.user)
# self._test_expected_search_call(**VARIANT_ID_SEARCH, num_results=2, dataset_type='SNV_INDEL', sample_data=MULTI_PROJECT_SAMPLE_DATA)
#
# self.search_model.search['locus'] = {'rawItems': 'M:10-100 '}
# query_variants(self.results_model, user=self.user)
# self._test_expected_search_call(intervals=[['M', 10, 100]], sample_data=EXPECTED_MITO_SAMPLE_DATA)
#
# self.search_model.search['locus']['rawItems'] += raw_locus
# query_variants(self.results_model, user=self.user)
# self._test_expected_search_call(
# gene_ids=LOCATION_SEARCH['gene_ids'],
# intervals=[['M', 10, 100]] + LOCATION_SEARCH['intervals'],
# sample_data={**MULTI_PROJECT_SAMPLE_DATA, **sv_sample_data, **EXPECTED_MITO_SAMPLE_DATA},
# )
#
# self.search_model.search['locus']['rawItems'] = raw_locus
# query_variants(self.results_model, user=self.user)
# self._test_expected_search_call(**LOCATION_SEARCH, sample_data={**MULTI_PROJECT_SAMPLE_DATA, **sv_sample_data})
#
# self.results_model.families.set(Family.objects.filter(project_id=1))
# query_variants(self.results_model, user=self.user)
# self._test_expected_search_call(**LOCATION_SEARCH, sample_data={
# 'SNV_INDEL': FAMILY_1_SAMPLE_DATA['SNV_INDEL'] + EXPECTED_SAMPLE_DATA['SNV_INDEL'],
# 'SV_WES': sv_sample_data['SV_WES'],
# })
#
# del self.search_model.search['locus']
# with self.assertRaises(InvalidSearchException) as cm:
# query_variants(self.results_model, user=self.user)
# self.assertEqual(str(cm.exception), 'Location must be specified to search across multiple families in large projects')
#
# quality_filter = {'min_ab': 10, 'min_gq': 15, 'vcf_filter': 'pass'}
# freq_filter = {'callset': {'af': 0.1}, 'gnomad_genomes': {'af': 0.01, 'ac': 3, 'hh': 3}}
# custom_query = {'term': {'customFlag': 'flagVal'}}
# genotype_filter = {'genotype': {'I000001_na19675': 'ref_alt'}}
# self.search_model.search = deepcopy({
# 'inheritance': {'mode': 'any_affected', 'filter': genotype_filter},
# 'freqs': freq_filter,
# 'qualityFilter': quality_filter,
# 'in_silico': {'cadd': '11.5', 'sift': 'D'},
# 'customQuery': custom_query,
# })
# self.results_model.families.set(Family.objects.filter(guid='F000001_1'))
# query_variants(self.results_model, user=self.user, sort='prioritized_gene')
# expected_freq_filter = {'seqr': freq_filter['callset'], 'gnomad_genomes': freq_filter['gnomad_genomes']}
# self._test_expected_search_call(
# inheritance_mode=None, inheritance_filter=genotype_filter, sample_data=FAMILY_1_SAMPLE_DATA,
# search_fields=['in_silico'], frequencies=expected_freq_filter, quality_filter=quality_filter, custom_query=custom_query,
# sort='prioritized_gene', sort_metadata={'ENSG00000268903': 1, 'ENSG00000268904': 11},
# )
#
# responses.add(responses.POST, f'{MOCK_HOST}:5000/search', status=400, body='Bad Search Error')
# with self.assertRaises(HTTPError) as cm:
# query_variants(self.results_model, user=self.user)
# self.assertEqual(cm.exception.response.status_code, 400)
# self.assertEqual(str(cm.exception), 'Bad Search Error')

@responses.activate
def test_get_variant_query_gene_counts(self):
Expand Down