From 47eeb740655718ee7e7d996023eef9dcd8d01ce5 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 8 Apr 2024 16:54:38 -0400 Subject: [PATCH 001/736] create DynamicAnalysisGroup model --- seqr/migrations/0063_dynamicanalysisgroup.py | 33 ++++++++++++++++++++ seqr/models.py | 17 ++++++++++ 2 files changed, 50 insertions(+) create mode 100644 seqr/migrations/0063_dynamicanalysisgroup.py diff --git a/seqr/migrations/0063_dynamicanalysisgroup.py b/seqr/migrations/0063_dynamicanalysisgroup.py new file mode 100644 index 0000000000..510eef1719 --- /dev/null +++ b/seqr/migrations/0063_dynamicanalysisgroup.py @@ -0,0 +1,33 @@ +# Generated by Django 3.2.23 on 2024-04-08 20:54 + +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion +import django.utils.timezone + + +class Migration(migrations.Migration): + + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ('seqr', '0062_individual_solve_status'), + ] + + operations = [ + migrations.CreateModel( + name='DynamicAnalysisGroup', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('guid', models.CharField(db_index=True, max_length=30, unique=True)), + ('created_date', models.DateTimeField(db_index=True, default=django.utils.timezone.now)), + ('last_modified_date', models.DateTimeField(blank=True, db_index=True, null=True)), + ('name', models.TextField()), + ('criteria', models.JSONField()), + ('created_by', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL)), + ('project', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='seqr.project')), + ], + options={ + 'unique_together': {('project', 'name')}, + }, + ), + ] diff --git a/seqr/models.py b/seqr/models.py index 8410f4236f..f6b78b3957 100644 --- a/seqr/models.py +++ b/seqr/models.py @@ -1040,6 +1040,23 @@ class Meta: json_fields = ['guid', 'name', 'description'] +class DynamicAnalysisGroup(ModelWithGUID): + project = models.ForeignKey('Project', on_delete=models.CASCADE, null=True, blank=True) + name = models.TextField() + criteria = JSONField() + + def __unicode__(self): + return self.name.strip() + + def _compute_guid(self): + return 'AG%07d_%s' % (self.id, _slugify(str(self))) + + class Meta: + unique_together = ('project', 'name') + + json_fields = ['guid', 'name', 'criteria'] + + class VariantSearch(ModelWithGUID): name = models.CharField(max_length=200, null=True) order = models.FloatField(null=True, blank=True) From 87d00013231df043e62da80bb50612fb23c91689 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 8 Apr 2024 17:18:59 -0400 Subject: [PATCH 002/736] todo --- seqr/views/apis/analysis_group_api.py | 2 ++ seqr/views/utils/project_context_utils.py | 1 + 2 files changed, 3 insertions(+) diff --git a/seqr/views/apis/analysis_group_api.py b/seqr/views/apis/analysis_group_api.py index a2014272c5..751680531e 100644 --- a/seqr/views/apis/analysis_group_api.py +++ b/seqr/views/apis/analysis_group_api.py @@ -60,3 +60,5 @@ def delete_analysis_group_handler(request, project_guid, analysis_group_guid): AnalysisGroup.objects.get(guid=analysis_group_guid, project=project).delete_model(request.user, user_can_delete=True) return create_json_response({'analysisGroupsByGuid': {analysis_group_guid: None}}) + +# TODO add dynamic group endpoints \ No newline at end of file diff --git a/seqr/views/utils/project_context_utils.py b/seqr/views/utils/project_context_utils.py index f774e66a7f..96b65a9e08 100644 --- a/seqr/views/utils/project_context_utils.py +++ b/seqr/views/utils/project_context_utils.py @@ -41,6 +41,7 @@ def get_projects_child_entities(projects, project_guid, user): def get_project_analysis_groups(projects, project_guid): + # TODO also needs to include dynamic groups, must include familyGuids analysis_group_models = AnalysisGroup.objects.filter(project__in=projects) analysis_groups = get_json_for_analysis_groups( analysis_group_models, project_guid=project_guid, skip_nested=True, is_analyst=False) From 0243ae966b8a2aa144474a2262fba7134d782763 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 10 Apr 2024 15:48:43 -0400 Subject: [PATCH 003/736] add create dynamic group ui --- .../components/AnalysisGroupButtons.jsx | 38 +++++++++++++++---- ui/pages/Project/components/ProjectPageUI.jsx | 7 +++- 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/ui/pages/Project/components/AnalysisGroupButtons.jsx b/ui/pages/Project/components/AnalysisGroupButtons.jsx index a4cf7e52d5..e1b2fb9cda 100644 --- a/ui/pages/Project/components/AnalysisGroupButtons.jsx +++ b/ui/pages/Project/components/AnalysisGroupButtons.jsx @@ -6,6 +6,7 @@ import { connect } from 'react-redux' import UpdateButton from 'shared/components/buttons/UpdateButton' import DeleteButton from 'shared/components/buttons/DeleteButton' +import { Multiselect } from 'shared/components/form/Inputs' import FileUploadField from 'shared/components/form/XHRUploaderField' import PedigreeImagePanel from 'shared/components/panel/view-pedigree-image/PedigreeImagePanel' import { SelectableTableFormInput } from 'shared/components/table/DataTable' @@ -13,8 +14,13 @@ import { FAMILY_DISPLAY_NAME, FAMILY_FIELD_PEDIGREE, FAMILY_FIELD_DESCRIPTION, + FAMILY_FIELD_ANALYSIS_STATUS, + FAMILY_FIELD_ANALYSED_BY, + FAMILY_FIELD_FIRST_SAMPLE, + FAMILY_FIELD_NAME_LOOKUP, } from 'shared/utils/constants' +import { CATEGORY_FAMILY_FILTERS } from '../constants' import { updateAnalysisGroup } from '../reducers' import { getProjectFamiliesByGuid, getCurrentProject } from '../selectors' @@ -75,9 +81,13 @@ const mapTableInputStateToProps = state => ({ data: Object.values(getProjectFamiliesByGuid(state)), }) -const FORM_FIELDS = [ +const BASE_FORM_FIELDS = [ { name: 'name', label: 'Name', validate: value => (value ? undefined : 'Name is required') }, { name: 'description', label: 'Description' }, +] + +const FORM_FIELDS = [ + ...BASE_FORM_FIELDS, { name: UPLOADED_FAMILIES_FIELD, key: 'familyUpload', @@ -96,6 +106,18 @@ const FORM_FIELDS = [ }, ] +const DYNAMIC_FORM_FIELDS = [ + ...BASE_FORM_FIELDS, + ...[FAMILY_FIELD_ANALYSIS_STATUS, FAMILY_FIELD_ANALYSED_BY, FAMILY_FIELD_FIRST_SAMPLE].map(category => ({ + name: `criteria.${category}`, + label: `Criteria: ${FAMILY_FIELD_NAME_LOOKUP[category]}`, + component: Multiselect, + options: CATEGORY_FAMILY_FILTERS[category], + includeCategories: true, + color: 'blue', + })), +] + const DECORATORS = [ createDecorator({ field: UPLOADED_FAMILIES_FIELD, @@ -107,25 +129,26 @@ const DECORATORS = [ }), ] -export const UpdateAnalysisGroup = React.memo(({ project, analysisGroup, onSubmit, iconOnly }) => { +export const UpdateAnalysisGroup = React.memo(({ project, analysisGroup, onSubmit, iconOnly, isDynamic }) => { if (!project.canEdit) { return null } - const title = `${analysisGroup ? 'Edit' : 'Create New'} Analysis Group` + const title = `${analysisGroup ? 'Edit' : 'Create New'} ${isDynamic ? 'Dynamic ' : ''}Analysis Group` + const entityName = `${isDynamic ? 'Dynamic' : ''}AnalysisGroup` return ( ) }) @@ -134,6 +157,7 @@ UpdateAnalysisGroup.propTypes = { project: PropTypes.object, analysisGroup: PropTypes.object, iconOnly: PropTypes.bool, + isDynamic: PropTypes.bool, onSubmit: PropTypes.func, } diff --git a/ui/pages/Project/components/ProjectPageUI.jsx b/ui/pages/Project/components/ProjectPageUI.jsx index ce712401e3..c594739a32 100644 --- a/ui/pages/Project/components/ProjectPageUI.jsx +++ b/ui/pages/Project/components/ProjectPageUI.jsx @@ -26,6 +26,11 @@ import { GeneLists, AddGeneListsButton } from './GeneLists' import FamilyTable from './FamilyTable/FamilyTable' import VariantTags from './VariantTags' +const CreateAnalysisGroupButtons = () => ([ + , + , +]) + const ProjectSectionComponent = React.memo(( { loading, label, children, editButton, linkPath, linkText, project, collaboratorEdit }, ) => ([ @@ -79,7 +84,7 @@ const ProjectPageUI = React.memo(({ analysisGroupGuid, load, loading, familiesLo {analysisGroupGuid ? null : ( - }> + }> )} From 516ee81d2419b4f6e20a2d4d9845d2a4a43f8484 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 10 Apr 2024 16:03:17 -0400 Subject: [PATCH 004/736] validation and dynamic specific endpoint --- ui/pages/Project/components/AnalysisGroupButtons.jsx | 3 ++- ui/pages/Project/reducers.js | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ui/pages/Project/components/AnalysisGroupButtons.jsx b/ui/pages/Project/components/AnalysisGroupButtons.jsx index e1b2fb9cda..4a531bafaf 100644 --- a/ui/pages/Project/components/AnalysisGroupButtons.jsx +++ b/ui/pages/Project/components/AnalysisGroupButtons.jsx @@ -108,13 +108,14 @@ const FORM_FIELDS = [ const DYNAMIC_FORM_FIELDS = [ ...BASE_FORM_FIELDS, - ...[FAMILY_FIELD_ANALYSIS_STATUS, FAMILY_FIELD_ANALYSED_BY, FAMILY_FIELD_FIRST_SAMPLE].map(category => ({ + ...[FAMILY_FIELD_ANALYSIS_STATUS, FAMILY_FIELD_ANALYSED_BY, FAMILY_FIELD_FIRST_SAMPLE].map((category, i) => ({ name: `criteria.${category}`, label: `Criteria: ${FAMILY_FIELD_NAME_LOOKUP[category]}`, component: Multiselect, options: CATEGORY_FAMILY_FILTERS[category], includeCategories: true, color: 'blue', + validate: i === 0 ? (value, allValues) => (allValues.criteria ? undefined : 'At least one criteria is required') : null, })), ] diff --git a/ui/pages/Project/reducers.js b/ui/pages/Project/reducers.js index bad50d36ea..52785cb6f7 100644 --- a/ui/pages/Project/reducers.js +++ b/ui/pages/Project/reducers.js @@ -242,7 +242,7 @@ export const updateCollaboratorGroup = values => updateEntity( ) export const updateAnalysisGroup = values => updateEntity( - values, RECEIVE_DATA, null, 'analysisGroupGuid', null, state => `/api/project/${state.currentProjectGuid}/analysis_groups`, + values, RECEIVE_DATA, null, 'analysisGroupGuid', null, state => `/api/project/${state.currentProjectGuid}/${values.criteria ? 'dynamic_' : ''}analysis_groups`, ) export const getMmeMatches = submissionGuid => (dispatch, getState) => { From a4e71a872bb772560d6ec4bf4f8f8fe4ff33050c Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 10 Apr 2024 16:45:24 -0400 Subject: [PATCH 005/736] dyanmic analysis group endpoints --- seqr/urls.py | 6 ++- seqr/views/apis/analysis_group_api.py | 73 +++++++++++++++++++-------- 2 files changed, 58 insertions(+), 21 deletions(-) diff --git a/seqr/urls.py b/seqr/urls.py index 3df8c22acd..8882ca41ba 100644 --- a/seqr/urls.py +++ b/seqr/urls.py @@ -134,7 +134,8 @@ from seqr.views.apis.auth_api import login_required_error, login_view, logout_view, policies_required_error from seqr.views.apis.igv_api import fetch_igv_track, receive_igv_table_handler, update_individual_igv_sample, \ igv_genomes_proxy, receive_bulk_igv_table_handler -from seqr.views.apis.analysis_group_api import update_analysis_group_handler, delete_analysis_group_handler +from seqr.views.apis.analysis_group_api import update_analysis_group_handler, delete_analysis_group_handler, \ + update_dynamic_analysis_group_handler, delete_dynamic_analysis_group_handler from seqr.views.apis.project_api import create_project_handler, update_project_handler, delete_project_handler, \ project_page_data, project_families, project_overview, project_mme_submisssions, project_individuals, \ project_analysis_groups, update_project_workspace, project_family_notes, project_collaborators, project_locus_lists, \ @@ -240,6 +241,9 @@ 'project/(?P[^/]+)/analysis_groups/create': update_analysis_group_handler, 'project/(?P[^/]+)/analysis_groups/(?P[^/]+)/update': update_analysis_group_handler, 'project/(?P[^/]+)/analysis_groups/(?P[^/]+)/delete': delete_analysis_group_handler, + 'project/(?P[^/]+)/dynamic_analysis_groups/create': update_dynamic_analysis_group_handler, + 'project/(?P[^/]+)/dynamic_analysis_groups/(?P[^/]+)/update': update_dynamic_analysis_group_handler, + 'project/(?P[^/]+)/dynamic_analysis_groups/(?P[^/]+)/delete': delete_dynamic_analysis_group_handler, 'project/(?P[^/]+)/update_saved_variant_json': update_saved_variant_json, 'project/(?P[^/]+)/add_workspace_data': add_workspace_data, diff --git a/seqr/views/apis/analysis_group_api.py b/seqr/views/apis/analysis_group_api.py index 751680531e..79bea55563 100644 --- a/seqr/views/apis/analysis_group_api.py +++ b/seqr/views/apis/analysis_group_api.py @@ -1,6 +1,7 @@ +from django.core.exceptions import PermissionDenied import json -from seqr.models import AnalysisGroup, Family +from seqr.models import AnalysisGroup, DynamicAnalysisGroup, Family from seqr.views.utils.json_utils import create_json_response from seqr.views.utils.json_to_orm_utils import update_model_from_json, get_or_create_model_from_json from seqr.views.utils.orm_to_json_utils import get_json_for_analysis_group @@ -10,34 +11,30 @@ REQUIRED_FIELDS = {'name': 'Name', 'familyGuids': 'Families'} -@login_and_policies_required -def update_analysis_group_handler(request, project_guid, analysis_group_guid=None): +def _update_analysis_group(request, project_guid, analysis_group_guid, model_cls, required_fields, + validate_body=lambda x: None, post_process_model=lambda x: None): project = get_project_and_check_permissions(project_guid, request.user, can_edit=True) request_json = json.loads(request.body) - missing_fields = [field for field in REQUIRED_FIELDS.keys() if not request_json.get(field)] + missing_fields = [field for field in required_fields.keys() if not request_json.get(field)] if missing_fields: return create_json_response( {}, status=400, reason='Missing required field(s): {missing_field_names}'.format( - missing_field_names=', '.join([REQUIRED_FIELDS[field] for field in missing_fields]) + missing_field_names=', '.join([required_fields[field] for field in missing_fields]) )) - families = Family.objects.filter(guid__in=request_json['familyGuids']).only('guid') - if len(families) != len(request_json['familyGuids']): - return create_json_response( - {}, status=400, reason='The following families do not exist: {missing_families}'.format( - missing_families=', '.join(set(request_json['familyGuids']) - set([family.guid for family in families])) - )) + error = validate_body(request_json) + if error: + return create_json_response({}, status=400, reason=error) if analysis_group_guid: - analysis_group = AnalysisGroup.objects.get(guid=analysis_group_guid, project=project) - update_model_from_json(analysis_group, request_json, user=request.user, allow_unknown_keys=True) + analysis_group = model_cls.objects.get(guid=analysis_group_guid, project=project) + update_model_from_json(analysis_group, request_json, user=request.user) else: - analysis_group, created = get_or_create_model_from_json(AnalysisGroup, { + analysis_group, created = get_or_create_model_from_json(model_cls, { 'project': project, - 'name': request_json['name'], - 'description': request_json.get('description'), 'created_by': request.user, + **request_json, }, update_json=None, user=request.user) if not created: return create_json_response( @@ -45,7 +42,7 @@ def update_analysis_group_handler(request, project_guid, analysis_group_guid=Non name=request_json['name'], project=project.name )) - analysis_group.families.set(families) + post_process_model(analysis_group) return create_json_response({ 'analysisGroupsByGuid': { @@ -55,10 +52,46 @@ def update_analysis_group_handler(request, project_guid, analysis_group_guid=Non @login_and_policies_required -def delete_analysis_group_handler(request, project_guid, analysis_group_guid): +def update_analysis_group_handler(request, project_guid, analysis_group_guid=None): + valid_families = set() + + def _validate_families(request_json): + family_guids = request_json.pop('familyGuids') + families = Family.objects.filter(guid__in=family_guids).only('guid') + if len(families) != len(family_guids): + return 'The following families do not exist: {missing_families}'.format( + missing_families=', '.join(set(family_guids) - set([family.guid for family in families]))) + valid_families.update(families) + + return _update_analysis_group( + request, project_guid, analysis_group_guid, AnalysisGroup, REQUIRED_FIELDS, validate_body=_validate_families, + post_process_model=lambda analysis_group: analysis_group.families.set(valid_families), + ) + + +@login_and_policies_required +def update_dynamic_analysis_group_handler(request, project_guid, analysis_group_guid=None): + return _update_analysis_group( + request, project_guid, analysis_group_guid, DynamicAnalysisGroup, + required_fields={f: f.title() for f in ['name', 'criteria']}, + ) + + +@login_and_policies_required +def delete_analysis_group_handler(request, project_guid, analysis_group_guid, model_cls=AnalysisGroup, user_can_delete=True, validate_can_delete=lambda x: None): project = get_project_and_check_permissions(project_guid, request.user, can_edit=True) - AnalysisGroup.objects.get(guid=analysis_group_guid, project=project).delete_model(request.user, user_can_delete=True) + analysis_group = model_cls.objects.get(guid=analysis_group_guid, project=project) + error = validate_can_delete(analysis_group) + if error: + raise error + analysis_group.delete_model(request.user, user_can_delete=user_can_delete) return create_json_response({'analysisGroupsByGuid': {analysis_group_guid: None}}) -# TODO add dynamic group endpoints \ No newline at end of file + +@login_and_policies_required +def delete_dynamic_analysis_group_handler(request, project_guid, analysis_group_guid): + return delete_analysis_group_handler( + request, project_guid, analysis_group_guid, model_cls=DynamicAnalysisGroup, user_can_delete=False, + validate_can_delete=lambda analysis_group: None if analysis_group.project_id else 'Cannot delete shared analysis group', + ) \ No newline at end of file From 243134efb598f166a004cb01f92a7d90b944061a Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Thu, 11 Apr 2024 12:17:02 -0400 Subject: [PATCH 006/736] submission ui component --- .../panel/variants/Pathogenicity.jsx | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/ui/shared/components/panel/variants/Pathogenicity.jsx b/ui/shared/components/panel/variants/Pathogenicity.jsx index 7aaa4e2405..a14a6b416e 100644 --- a/ui/shared/components/panel/variants/Pathogenicity.jsx +++ b/ui/shared/components/panel/variants/Pathogenicity.jsx @@ -65,6 +65,24 @@ PathogenicityLink.propTypes = { popup: PropTypes.string, } +const SubmissionLabel = React.memo(({ submitter }) => ( + +)) + +SubmissionLabel.propTypes = { + submitter: PropTypes.string.isRequired, +} + +const Submissions = React.memo(({ submissions }) => submissions.map(([submitter, condition]) => ( + } content={condition} /> +))) + +Submissions.propTypes = { + submissions: PropTypes.object.isRequired, +} + const clinvarUrl = (clinvar) => { const baseUrl = 'http://www.ncbi.nlm.nih.gov/clinvar' const variantPath = clinvar.alleleId ? `?term=${clinvar.alleleId}[alleleid]` : `/variation/${clinvar.variationId}` @@ -85,8 +103,13 @@ const clinvarLabel = (pathogenicity, assertions, conflictingPathogenicities) => return label } +const clinvarSubmissions = (submitters, conditions) => submitters.map((submitter, index) => ( + [submitter, conditions[index]] +)) + const Pathogenicity = React.memo(({ variant, showHgmd }) => { const clinvar = variant.clinvar || {} + console.log(clinvar) const pathogenicity = [] if ((clinvar.clinicalSignificance || clinvar.pathogenicity) && (clinvar.variationId || clinvar.alleleId)) { const { pathogenicity: clinvarPathogenicity, assertions, severity } = clinvarSignificance(clinvar) @@ -96,6 +119,7 @@ const Pathogenicity = React.memo(({ variant, showHgmd }) => { href: clinvarUrl(clinvar), goldStars: clinvar.goldStars, popup: clinvar.version && `Last Updated: ${new Date(clinvar.version).toLocaleDateString()}`, + submissions: clinvarSubmissions(clinvar.submitters, clinvar.conditions), }]) } if (showHgmd) { @@ -117,6 +141,14 @@ const Pathogenicity = React.memo(({ variant, showHgmd }) => { {`${title}:`} + { + title === 'ClinVar' && ( +
+ + +
+ ) + } )) }) From 107cab0a8764e40d5553d04ce88bd699026b2cf0 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 11 Apr 2024 12:51:16 -0400 Subject: [PATCH 007/736] first pass return analysis groups --- seqr/models.py | 2 +- seqr/views/apis/analysis_group_api.py | 6 +++--- seqr/views/utils/orm_to_json_utils.py | 13 ++++++++----- seqr/views/utils/project_context_utils.py | 13 +++++++------ ui/redux/selectors.js | 1 + 5 files changed, 20 insertions(+), 15 deletions(-) diff --git a/seqr/models.py b/seqr/models.py index f6b78b3957..aa89945fd6 100644 --- a/seqr/models.py +++ b/seqr/models.py @@ -1049,7 +1049,7 @@ def __unicode__(self): return self.name.strip() def _compute_guid(self): - return 'AG%07d_%s' % (self.id, _slugify(str(self))) + return 'DAG%07d_%s' % (self.id, _slugify(str(self))) class Meta: unique_together = ('project', 'name') diff --git a/seqr/views/apis/analysis_group_api.py b/seqr/views/apis/analysis_group_api.py index 79bea55563..bdd190fb82 100644 --- a/seqr/views/apis/analysis_group_api.py +++ b/seqr/views/apis/analysis_group_api.py @@ -11,7 +11,7 @@ REQUIRED_FIELDS = {'name': 'Name', 'familyGuids': 'Families'} -def _update_analysis_group(request, project_guid, analysis_group_guid, model_cls, required_fields, +def _update_analysis_group(request, project_guid, analysis_group_guid, model_cls, required_fields, is_dynamic=False, validate_body=lambda x: None, post_process_model=lambda x: None): project = get_project_and_check_permissions(project_guid, request.user, can_edit=True) @@ -46,7 +46,7 @@ def _update_analysis_group(request, project_guid, analysis_group_guid, model_cls return create_json_response({ 'analysisGroupsByGuid': { - analysis_group.guid: get_json_for_analysis_group(analysis_group, project_guid=project_guid) + analysis_group.guid: get_json_for_analysis_group(analysis_group, project_guid=project_guid, is_dynamic=is_dynamic) }, }) @@ -72,7 +72,7 @@ def _validate_families(request_json): @login_and_policies_required def update_dynamic_analysis_group_handler(request, project_guid, analysis_group_guid=None): return _update_analysis_group( - request, project_guid, analysis_group_guid, DynamicAnalysisGroup, + request, project_guid, analysis_group_guid, DynamicAnalysisGroup, is_dynamic=True, required_fields={f: f.title() for f in ['name', 'criteria']}, ) diff --git a/seqr/views/utils/orm_to_json_utils.py b/seqr/views/utils/orm_to_json_utils.py index 4996114462..989f5aec41 100644 --- a/seqr/views/utils/orm_to_json_utils.py +++ b/seqr/views/utils/orm_to_json_utils.py @@ -364,7 +364,7 @@ def get_json_for_sample(sample, **kwargs): return _get_json_for_model(sample, **_get_sample_json_kwargs(**kwargs)) -def get_json_for_analysis_groups(analysis_groups, project_guid=None, skip_nested=False, **kwargs): +def get_json_for_analysis_groups(analysis_groups, project_guid=None, skip_nested=False, is_dynamic=False, **kwargs): """Returns a JSON representation of the given list of AnalysisGroups. Args: @@ -373,20 +373,23 @@ def get_json_for_analysis_groups(analysis_groups, project_guid=None, skip_nested Returns: array: array of json objects """ - + # TODO familyGuids needed for dynamic groups? Either populate or remove def _process_result(result, group): result.update({ - 'familyGuids': [f.guid for f in group.families.all()] + 'familyGuids': [] if is_dynamic else [f.guid for f in group.families.all()], }) - prefetch_related_objects(analysis_groups, 'families') + if not is_dynamic: + prefetch_related_objects(analysis_groups, 'families') if project_guid or not skip_nested: additional_kwargs = {'nested_fields': [{'fields': ('project', 'guid'), 'value': project_guid}]} else: additional_kwargs = {'additional_model_fields': ['project_id']} - return _get_json_for_models(analysis_groups, process_result=_process_result, **additional_kwargs, **kwargs) + return _get_json_for_models( + analysis_groups, process_result=_process_result, guid_key='analysisGroupGuid', **additional_kwargs, **kwargs, + ) def get_json_for_analysis_group(analysis_group, **kwargs): diff --git a/seqr/views/utils/project_context_utils.py b/seqr/views/utils/project_context_utils.py index 96b65a9e08..e8fdea5c45 100644 --- a/seqr/views/utils/project_context_utils.py +++ b/seqr/views/utils/project_context_utils.py @@ -1,7 +1,7 @@ from collections import defaultdict from django.db.models import Count, Q, prefetch_related_objects -from seqr.models import Individual, IgvSample, AnalysisGroup, LocusList, VariantTagType,\ +from seqr.models import Individual, IgvSample, AnalysisGroup, DynamicAnalysisGroup, LocusList, VariantTagType,\ VariantFunctionalData, FamilyNote, SavedVariant, VariantTag, VariantNote from seqr.utils.gene_utils import get_genes from seqr.views.utils.orm_to_json_utils import _get_json_for_families, _get_json_for_individuals, _get_json_for_models, \ @@ -26,7 +26,7 @@ def get_projects_child_entities(projects, project_guid, user): else: project_id_to_guid = {project.id: project.guid for project in projects} for group in response['analysisGroupsByGuid'].values(): - group['projectGuid'] = project_id_to_guid[group.pop('projectId')] + group['projectGuid'] = project_id_to_guid.get(group.pop('projectId')) for project in response['projectsByGuid'].values(): project['locusListGuids'] = [] @@ -41,11 +41,12 @@ def get_projects_child_entities(projects, project_guid, user): def get_project_analysis_groups(projects, project_guid): - # TODO also needs to include dynamic groups, must include familyGuids analysis_group_models = AnalysisGroup.objects.filter(project__in=projects) - analysis_groups = get_json_for_analysis_groups( - analysis_group_models, project_guid=project_guid, skip_nested=True, is_analyst=False) - return {ag['analysisGroupGuid']: ag for ag in analysis_groups} + get_json_kwargs = dict(project_guid=project_guid, skip_nested=True, is_analyst=False) + analysis_groups = get_json_for_analysis_groups(analysis_group_models, **get_json_kwargs) + dynamic_analysis_group_models = DynamicAnalysisGroup.objects.filter(Q(project__in=projects) | Q(project__isnull=True)) + dynamic_analysis_groups = get_json_for_analysis_groups(dynamic_analysis_group_models, **get_json_kwargs, is_dynamic=True) + return {ag['analysisGroupGuid']: ag for ag in analysis_groups + dynamic_analysis_groups} def get_project_locus_lists(projects, user, include_metadata=False): diff --git a/ui/redux/selectors.js b/ui/redux/selectors.js index d1e82bc453..f4f46f4832 100644 --- a/ui/redux/selectors.js +++ b/ui/redux/selectors.js @@ -89,6 +89,7 @@ export const getNotesByFamilyType = createSelector( export const getProjectAnalysisGroupOptions = createSelector( getAnalysisGroupsGroupedByProjectGuid, + // TODO cannot include dynamic groups analysisGroupsByProject => Object.entries(analysisGroupsByProject).reduce( (acc, [projectGuid, analysisGroupsByGuid]) => ({ ...acc, From b52f2f809da19e786e51cb80b53da8869a731b0a Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 11 Apr 2024 16:24:23 -0400 Subject: [PATCH 008/736] show dynamic list --- seqr/views/apis/analysis_group_api.py | 6 +++--- .../components/AnalysisGroupButtons.jsx | 18 +++++++++------- .../Project/components/AnalysisGroups.jsx | 21 +++++++++++++++---- 3 files changed, 30 insertions(+), 15 deletions(-) diff --git a/seqr/views/apis/analysis_group_api.py b/seqr/views/apis/analysis_group_api.py index bdd190fb82..2d797eff17 100644 --- a/seqr/views/apis/analysis_group_api.py +++ b/seqr/views/apis/analysis_group_api.py @@ -78,13 +78,13 @@ def update_dynamic_analysis_group_handler(request, project_guid, analysis_group_ @login_and_policies_required -def delete_analysis_group_handler(request, project_guid, analysis_group_guid, model_cls=AnalysisGroup, user_can_delete=True, validate_can_delete=lambda x: None): +def delete_analysis_group_handler(request, project_guid, analysis_group_guid, model_cls=AnalysisGroup, validate_can_delete=lambda x: None): project = get_project_and_check_permissions(project_guid, request.user, can_edit=True) analysis_group = model_cls.objects.get(guid=analysis_group_guid, project=project) error = validate_can_delete(analysis_group) if error: raise error - analysis_group.delete_model(request.user, user_can_delete=user_can_delete) + analysis_group.delete_model(request.user, user_can_delete=True) return create_json_response({'analysisGroupsByGuid': {analysis_group_guid: None}}) @@ -92,6 +92,6 @@ def delete_analysis_group_handler(request, project_guid, analysis_group_guid, mo @login_and_policies_required def delete_dynamic_analysis_group_handler(request, project_guid, analysis_group_guid): return delete_analysis_group_handler( - request, project_guid, analysis_group_guid, model_cls=DynamicAnalysisGroup, user_can_delete=False, + request, project_guid, analysis_group_guid, model_cls=DynamicAnalysisGroup, validate_can_delete=lambda analysis_group: None if analysis_group.project_id else 'Cannot delete shared analysis group', ) \ No newline at end of file diff --git a/ui/pages/Project/components/AnalysisGroupButtons.jsx b/ui/pages/Project/components/AnalysisGroupButtons.jsx index 4a531bafaf..4664ee7024 100644 --- a/ui/pages/Project/components/AnalysisGroupButtons.jsx +++ b/ui/pages/Project/components/AnalysisGroupButtons.jsx @@ -81,13 +81,11 @@ const mapTableInputStateToProps = state => ({ data: Object.values(getProjectFamiliesByGuid(state)), }) -const BASE_FORM_FIELDS = [ - { name: 'name', label: 'Name', validate: value => (value ? undefined : 'Name is required') }, - { name: 'description', label: 'Description' }, -] +const NAME_FIELD = { name: 'name', label: 'Name', validate: value => (value ? undefined : 'Name is required') } const FORM_FIELDS = [ - ...BASE_FORM_FIELDS, + NAME_FIELD, + { name: 'description', label: 'Description' }, { name: UPLOADED_FAMILIES_FIELD, key: 'familyUpload', @@ -107,7 +105,7 @@ const FORM_FIELDS = [ ] const DYNAMIC_FORM_FIELDS = [ - ...BASE_FORM_FIELDS, + NAME_FIELD, ...[FAMILY_FIELD_ANALYSIS_STATUS, FAMILY_FIELD_ANALYSED_BY, FAMILY_FIELD_FIRST_SAMPLE].map((category, i) => ({ name: `criteria.${category}`, label: `Criteria: ${FAMILY_FIELD_NAME_LOOKUP[category]}`, @@ -130,8 +128,12 @@ const DECORATORS = [ }), ] +const canUpdateGroup = (project, analysisGroup) => ( + project.canEdit && (!analysisGroup?.analysisGroupGuid || analysisGroup.projectGuid) +) + export const UpdateAnalysisGroup = React.memo(({ project, analysisGroup, onSubmit, iconOnly, isDynamic }) => { - if (!project.canEdit) { + if (!canUpdateGroup(project, analysisGroup)) { return null } const title = `${analysisGroup ? 'Edit' : 'Create New'} ${isDynamic ? 'Dynamic ' : ''}Analysis Group` @@ -175,7 +177,7 @@ export const UpdateAnalysisGroupButton = connect(mapUpdateStateToProps, mapDispa const navigateProjectPage = (history, projectGuid) => () => history.push(`/project/${projectGuid}/project_page`) export const DeleteAnalysisGroup = React.memo(({ project, analysisGroup, onSubmit, size, iconOnly, history }) => ( - project.canEdit ? ( + canUpdateGroup(project, analysisGroup) ? ( {Object.values(analysisGroupsByGuid).sort(compareObjects('name')).map(ag => (
+ {ag.criteria && } {ag.name} } - content={ + content={ag.criteria ? Object.keys(ag.criteria).map(category => ( + + )) : (
{`${ag.familyGuids.length} Families`}
{ag.description}
- } + )} size="tiny" /> - +
))} From d0c33548a5a70f8e827d3328eb0f93437fca604d Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 11 Apr 2024 16:40:30 -0400 Subject: [PATCH 009/736] show no project groups --- seqr/views/utils/orm_to_json_utils.py | 2 +- ui/pages/Project/selectors.js | 7 ++++++- .../Search/components/filters/ProjectFamiliesFilter.jsx | 1 + ui/pages/Search/selectors.js | 7 ++++--- ui/redux/selectors.js | 1 - 5 files changed, 12 insertions(+), 6 deletions(-) diff --git a/seqr/views/utils/orm_to_json_utils.py b/seqr/views/utils/orm_to_json_utils.py index 989f5aec41..1f6d1a14b8 100644 --- a/seqr/views/utils/orm_to_json_utils.py +++ b/seqr/views/utils/orm_to_json_utils.py @@ -383,7 +383,7 @@ def _process_result(result, group): prefetch_related_objects(analysis_groups, 'families') if project_guid or not skip_nested: - additional_kwargs = {'nested_fields': [{'fields': ('project', 'guid'), 'value': project_guid}]} + additional_kwargs = {'nested_fields': [{'fields': ('project', 'guid'), 'value': None if is_dynamic else project_guid}]} else: additional_kwargs = {'additional_model_fields': ['project_id']} diff --git a/ui/pages/Project/selectors.js b/ui/pages/Project/selectors.js index f0a23afb14..e91f9a05ba 100644 --- a/ui/pages/Project/selectors.js +++ b/ui/pages/Project/selectors.js @@ -77,7 +77,12 @@ export const getProjectFamiliesByGuid = createSelector( getFamiliesGroupedByProjectGuid, getProjectGuid, selectEntitiesForProjectGuid, ) export const getProjectAnalysisGroupsByGuid = createSelector( - getAnalysisGroupsGroupedByProjectGuid, getProjectGuid, selectEntitiesForProjectGuid, + getAnalysisGroupsGroupedByProjectGuid, + getProjectGuid, + (groupedAnalysisGroups, projectGuid) => ({ + ...selectEntitiesForProjectGuid(groupedAnalysisGroups, projectGuid), + ...selectEntitiesForProjectGuid(groupedAnalysisGroups, null), + }), ) const getAnalysisGroupGuid = (state, props) => ( diff --git a/ui/pages/Search/components/filters/ProjectFamiliesFilter.jsx b/ui/pages/Search/components/filters/ProjectFamiliesFilter.jsx index 1d9f36bc0c..736e2e44e9 100644 --- a/ui/pages/Search/components/filters/ProjectFamiliesFilter.jsx +++ b/ui/pages/Search/components/filters/ProjectFamiliesFilter.jsx @@ -57,6 +57,7 @@ class ProjectFamiliesFilterInput extends React.PureComponent { } selectedAnalysisGroups = () => { + // TODO correctly show selected dynamic groups, depndingon how familyGuids is populated const { projectAnalysisGroupsByGuid, value } = this.props return this.multiFamiliesSelected() ? [] : diff --git a/ui/pages/Search/selectors.js b/ui/pages/Search/selectors.js index b4f383c50f..1621666173 100644 --- a/ui/pages/Search/selectors.js +++ b/ui/pages/Search/selectors.js @@ -181,7 +181,8 @@ export const getFamilyOptions = createSelector( export const getAnalysisGroupOptions = createSelector( getAnalysisGroupsGroupedByProjectGuid, (state, props) => props.value.projectGuid, - (analysisGroupsGroupedByProjectGuid, projectGuid) => Object.values( - analysisGroupsGroupedByProjectGuid[projectGuid] || {}, - ).map(group => ({ value: group.analysisGroupGuid, text: group.name })), + (analysisGroupsGroupedByProjectGuid, projectGuid) => Object.values({ + ...(analysisGroupsGroupedByProjectGuid[projectGuid] || {}), + ...(analysisGroupsGroupedByProjectGuid.null || {}), + }).map(group => ({ value: group.analysisGroupGuid, text: group.name })), ) diff --git a/ui/redux/selectors.js b/ui/redux/selectors.js index f4f46f4832..d1e82bc453 100644 --- a/ui/redux/selectors.js +++ b/ui/redux/selectors.js @@ -89,7 +89,6 @@ export const getNotesByFamilyType = createSelector( export const getProjectAnalysisGroupOptions = createSelector( getAnalysisGroupsGroupedByProjectGuid, - // TODO cannot include dynamic groups analysisGroupsByProject => Object.entries(analysisGroupsByProject).reduce( (acc, [projectGuid, analysisGroupsByGuid]) => ({ ...acc, From 88acdde55a311b112579b986ed04463b3f46048f Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 12 Apr 2024 13:47:02 -0400 Subject: [PATCH 010/736] restructure and flag ares where need to poulate dynamic families --- .../components/AnalysisGroupButtons.jsx | 5 ++-- .../Project/components/AnalysisGroups.jsx | 2 +- ui/pages/Project/components/ProjectPageUI.jsx | 2 +- ui/pages/Project/components/SavedVariants.jsx | 12 +++++----- ui/pages/Project/selectors.js | 24 +++++++++---------- .../filters/ProjectFamiliesFilter.jsx | 2 +- ui/pages/Search/constants.js | 1 + ui/pages/Search/selectors.js | 1 + ui/redux/selectors.js | 2 +- .../components/panel/variants/selectors.js | 1 + 10 files changed, 28 insertions(+), 24 deletions(-) diff --git a/ui/pages/Project/components/AnalysisGroupButtons.jsx b/ui/pages/Project/components/AnalysisGroupButtons.jsx index 4664ee7024..099e490d5f 100644 --- a/ui/pages/Project/components/AnalysisGroupButtons.jsx +++ b/ui/pages/Project/components/AnalysisGroupButtons.jsx @@ -132,10 +132,11 @@ const canUpdateGroup = (project, analysisGroup) => ( project.canEdit && (!analysisGroup?.analysisGroupGuid || analysisGroup.projectGuid) ) -export const UpdateAnalysisGroup = React.memo(({ project, analysisGroup, onSubmit, iconOnly, isDynamic }) => { +export const UpdateAnalysisGroup = React.memo(({ project, analysisGroup, onSubmit, iconOnly, createDynamic }) => { if (!canUpdateGroup(project, analysisGroup)) { return null } + const isDynamic = !!analysisGroup?.criteria || createDynamic const title = `${analysisGroup ? 'Edit' : 'Create New'} ${isDynamic ? 'Dynamic ' : ''}Analysis Group` const entityName = `${isDynamic ? 'Dynamic' : ''}AnalysisGroup` return ( @@ -160,7 +161,7 @@ UpdateAnalysisGroup.propTypes = { project: PropTypes.object, analysisGroup: PropTypes.object, iconOnly: PropTypes.bool, - isDynamic: PropTypes.bool, + createDynamic: PropTypes.bool, onSubmit: PropTypes.func, } diff --git a/ui/pages/Project/components/AnalysisGroups.jsx b/ui/pages/Project/components/AnalysisGroups.jsx index 3c7813d853..17b0b9d721 100644 --- a/ui/pages/Project/components/AnalysisGroups.jsx +++ b/ui/pages/Project/components/AnalysisGroups.jsx @@ -42,7 +42,7 @@ const AnalysisGroups = React.memo(({ projectGuid, load, loading, analysisGroupsB )} size="tiny" /> - + ))} diff --git a/ui/pages/Project/components/ProjectPageUI.jsx b/ui/pages/Project/components/ProjectPageUI.jsx index c594739a32..593baa1557 100644 --- a/ui/pages/Project/components/ProjectPageUI.jsx +++ b/ui/pages/Project/components/ProjectPageUI.jsx @@ -28,7 +28,7 @@ import VariantTags from './VariantTags' const CreateAnalysisGroupButtons = () => ([ , - , + , ]) const ProjectSectionComponent = React.memo(( diff --git a/ui/pages/Project/components/SavedVariants.jsx b/ui/pages/Project/components/SavedVariants.jsx index 2f09953bc8..416f462164 100644 --- a/ui/pages/Project/components/SavedVariants.jsx +++ b/ui/pages/Project/components/SavedVariants.jsx @@ -6,7 +6,6 @@ import { Grid } from 'semantic-ui-react' import styled from 'styled-components' import { updateVariantTags } from 'redux/rootReducer' -import { getAnalysisGroupsByGuid } from 'redux/selectors' import { VARIANT_SORT_FIELD, VARIANT_HIDE_EXCLUDED_FIELD, @@ -27,6 +26,7 @@ import { loadSavedVariants, updateSavedVariantTable } from '../reducers' import { getCurrentProject, getProjectTagTypeOptions, getTaggedVariantsByFamily, getProjectVariantSavedByOptions, getSavedVariantTagTypeCounts, getSavedVariantTagTypeCountsByFamily, getSavedVariantTableState, + getCurrentAnalysisGroupFamilyGuids, } from '../selectors' import VariantTagTypeBar, { getSavedVariantsLinkPath } from './VariantTagTypeBar' import SelectSavedVariantsTable, { TAG_COLUMN, VARIANT_POS_COLUMN, GENES_COLUMN } from './SelectSavedVariantsTable' @@ -130,7 +130,7 @@ class BaseProjectSavedVariants extends React.PureComponent { static propTypes = { match: PropTypes.object, project: PropTypes.object, - analysisGroup: PropTypes.object, + analysisGroupFamilyGuids: PropTypes.arrayOf(PropTypes.string), tagTypeCounts: PropTypes.object, updateTableField: PropTypes.func, loadProjectSavedVariants: PropTypes.func, @@ -154,7 +154,7 @@ class BaseProjectSavedVariants extends React.PureComponent { } loadVariants = (newParams) => { - const { analysisGroup, match, loadProjectSavedVariants, updateTableField } = this.props + const { analysisGroupFamilyGuids, match, loadProjectSavedVariants, updateTableField } = this.props const { familyGuid, variantGuid, analysisGroupGuid } = match.params const isInitialLoad = match.params === newParams @@ -162,7 +162,7 @@ class BaseProjectSavedVariants extends React.PureComponent { newParams.analysisGroupGuid !== analysisGroupGuid || newParams.variantGuid !== variantGuid - const familyGuids = newParams.familyGuid ? [newParams.familyGuid] : (analysisGroup || {}).familyGuids + const familyGuids = newParams.familyGuid ? [newParams.familyGuid] : analysisGroupFamilyGuids updateTableField('page')(1) if (isInitialLoad || hasUpdatedFamilies) { @@ -235,7 +235,7 @@ class BaseProjectSavedVariants extends React.PureComponent { } render() { - const { project, analysisGroup, loadProjectSavedVariants, categoryFilter, ...props } = this.props + const { project, analysisGroupFamilyGuids, loadProjectSavedVariants, categoryFilter, ...props } = this.props const { familyGuid, tag, variantGuid } = props.match.params const appliedTagCategoryFilter = tag || (variantGuid ? null : (categoryFilter || SHOW_ALL)) @@ -260,7 +260,7 @@ class BaseProjectSavedVariants extends React.PureComponent { const mapStateToProps = (state, ownProps) => ({ project: getCurrentProject(state), - analysisGroup: getAnalysisGroupsByGuid(state)[ownProps.match.params.analysisGroupGuid], + analysisGroupFamilyGuids: getCurrentAnalysisGroupFamilyGuids(state, ownProps), tagTypeCounts: ownProps.match.params.familyGuid ? getSavedVariantTagTypeCountsByFamily(state)[ownProps.match.params.familyGuid] : getSavedVariantTagTypeCounts(state, ownProps), diff --git a/ui/pages/Project/selectors.js b/ui/pages/Project/selectors.js index e91f9a05ba..cd4eb05ca4 100644 --- a/ui/pages/Project/selectors.js +++ b/ui/pages/Project/selectors.js @@ -89,21 +89,21 @@ const getAnalysisGroupGuid = (state, props) => ( (props || {}).match ? props.match.params.analysisGroupGuid : (props || {}).analysisGroupGuid ) -const getCurrentAnalysisGroup = createSelector( +export const getCurrentAnalysisGroupFamilyGuids = createSelector( getProjectAnalysisGroupsByGuid, getAnalysisGroupGuid, (projectAnalysisGroupsByGuid, analysisGroupGuid) => analysisGroupGuid && - projectAnalysisGroupsByGuid[analysisGroupGuid], + projectAnalysisGroupsByGuid[analysisGroupGuid]?.familyGuids, // TODO work with dynamic group ) export const getProjectAnalysisGroupFamiliesByGuid = createSelector( getProjectFamiliesByGuid, - getCurrentAnalysisGroup, - (projectFamiliesByGuid, analysisGroup) => { - if (!analysisGroup) { + getCurrentAnalysisGroupFamilyGuids, + (projectFamiliesByGuid, analysisGroupFamilyGuids) => { + if (!analysisGroupFamilyGuids) { return projectFamiliesByGuid } - return analysisGroup.familyGuids.reduce( + return analysisGroupFamilyGuids.reduce( (acc, familyGuid) => ({ ...acc, [familyGuid]: projectFamiliesByGuid[familyGuid] }), {}, ) }, @@ -148,12 +148,12 @@ export const getProjectAnalysisGroupIndividualsByGuid = createSelector( export const getProjectAnalysisGroupSamplesByTypes = createSelector( getCurrentProject, - getCurrentAnalysisGroup, - (project, analysisGroup) => Object.entries(project.sampleCounts || {}).map( + getCurrentAnalysisGroupFamilyGuids, + (project, analysisGroupFamilyGuids) => Object.entries(project.sampleCounts || {}).map( ([key, typeCounts]) => ([key, typeCounts.map(({ familyCounts, ...data }) => ({ ...data, count: Object.entries(familyCounts).reduce((total, [familyGuid, count]) => ( - (!analysisGroup || analysisGroup.familyGuids.includes(familyGuid)) ? total + count : total + (!analysisGroupFamilyGuids || analysisGroupFamilyGuids.includes(familyGuid)) ? total + count : total ), 0), })).filter(({ count }) => count > 0)]), ), @@ -252,9 +252,9 @@ export const getSavedVariantTagTypeCounts = createSelector( ) export const getAnalysisGroupTagTypeCounts = createSelector( - getCurrentAnalysisGroup, + getCurrentAnalysisGroupFamilyGuids, getFamilyTagTypeCounts, - (analysisGroup, familyTagTypeCounts) => (analysisGroup ? analysisGroup.familyGuids.reduce( + (analysisGroupFamilyGuids, familyTagTypeCounts) => (analysisGroupFamilyGuids ? analysisGroupFamilyGuids.reduce( (acc, familyGuid) => Object.entries(familyTagTypeCounts[familyGuid] || {}).reduce((acc2, [tagType, count]) => ( { ...acc2, [tagType]: count + (acc2[tagType] || 0) } ), acc), {}, @@ -407,7 +407,7 @@ const analysedByFilters = (filter, analysedByOptions) => { let requireNoAnalysedBy = false const analsedByGroups = Object.values(filter.reduce( (acc, val) => { - const optFilter = analysedByOptions.has(val) ? () => ({ createdBy }) => createdBy === val : + const optFilter = analysedByOptions?.has(val) ? () => ({ createdBy }) => createdBy === val : ANALYSED_BY_FILTER_LOOKUP[val] if (optFilter) { const category = ANALYSED_BY_CATEGORY_OPTION_LOOKUP[val] diff --git a/ui/pages/Search/components/filters/ProjectFamiliesFilter.jsx b/ui/pages/Search/components/filters/ProjectFamiliesFilter.jsx index 736e2e44e9..7e099fa8b7 100644 --- a/ui/pages/Search/components/filters/ProjectFamiliesFilter.jsx +++ b/ui/pages/Search/components/filters/ProjectFamiliesFilter.jsx @@ -57,7 +57,7 @@ class ProjectFamiliesFilterInput extends React.PureComponent { } selectedAnalysisGroups = () => { - // TODO correctly show selected dynamic groups, depndingon how familyGuids is populated + // TODO correctly show selected dynamic groups, depending on how familyGuids is populated const { projectAnalysisGroupsByGuid, value } = this.props return this.multiFamiliesSelected() ? [] : diff --git a/ui/pages/Search/constants.js b/ui/pages/Search/constants.js index d9627ce86e..a17a951594 100644 --- a/ui/pages/Search/constants.js +++ b/ui/pages/Search/constants.js @@ -3,6 +3,7 @@ import { DE_NOVO_FILTER, ANY_AFFECTED, INHERITANCE_FILTER_OPTIONS, } from 'shared/utils/constants' +// TODO work with dynamic groups? export const getSelectedAnalysisGroups = (analysisGroupsByGuid, familyGuids) => Object.values( analysisGroupsByGuid, ).filter(group => group.familyGuids.every(familyGuid => familyGuids.includes(familyGuid))) diff --git a/ui/pages/Search/selectors.js b/ui/pages/Search/selectors.js index 1621666173..a90029b7ff 100644 --- a/ui/pages/Search/selectors.js +++ b/ui/pages/Search/selectors.js @@ -40,6 +40,7 @@ export const getProjectFamilies = (params, familiesByGuid, familiesByProjectGuid } if (params.analysisGroupGuid) { const analysisGroup = analysisGroupByGuid[params.analysisGroupGuid] + // TODO work with dynamic groups return analysisGroup ? { projectGuid: analysisGroup.projectGuid, familyGuids: analysisGroup.familyGuids, diff --git a/ui/redux/selectors.js b/ui/redux/selectors.js index d1e82bc453..9d3de32329 100644 --- a/ui/redux/selectors.js +++ b/ui/redux/selectors.js @@ -100,7 +100,7 @@ export const getProjectAnalysisGroupOptions = createSelector( export const getAnalysisGroupsByFamily = createSelector( getAnalysisGroupsByGuid, analysisGroupsByGuid => Object.values(analysisGroupsByGuid).reduce( - (acc, analysisGroup) => analysisGroup.familyGuids.reduce( + (acc, analysisGroup) => (analysisGroup.familyGuids || []).reduce( (familyAcc, familyGuid) => ({ ...familyAcc, [familyGuid]: [...(familyAcc[familyGuid] || []), analysisGroup] }), acc, ), {}, diff --git a/ui/shared/components/panel/variants/selectors.js b/ui/shared/components/panel/variants/selectors.js index cd03204b7c..1ab981dd4e 100644 --- a/ui/shared/components/panel/variants/selectors.js +++ b/ui/shared/components/panel/variants/selectors.js @@ -84,6 +84,7 @@ const getProjectSavedVariantsSelection = createSelector( if (variantGuid) { variantFilter = o => variantGuid.split(',').includes(o.variantGuid) } else if (analysisGroupGuid && analysisGroupsByGuid[analysisGroupGuid]) { + // TODO work with dynamic groups const analysisGroupFamilyGuids = analysisGroupsByGuid[analysisGroupGuid].familyGuids variantFilter = o => o.familyGuids.some(fg => analysisGroupFamilyGuids.includes(fg)) } else if (familyGuid) { From 786ece28fa543f64964de6665b4a236f6900f9ab Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 12 Apr 2024 15:53:09 -0400 Subject: [PATCH 011/736] abstract out selector for family filter --- ui/pages/Project/constants.js | 29 +++++------ ui/pages/Project/selectors.js | 90 +++++++++++++++++------------------ 2 files changed, 57 insertions(+), 62 deletions(-) diff --git a/ui/pages/Project/constants.js b/ui/pages/Project/constants.js index 1c752dcdfb..bbb8c063cc 100644 --- a/ui/pages/Project/constants.js +++ b/ui/pages/Project/constants.js @@ -260,7 +260,7 @@ const getFamilyCaseReviewStatuses = (family, individualsByGuid) => { return statuses.length ? statuses : family.caseReviewStatuses } -const caseReviewStatusFilter = status => individualsByGuid => family => getFamilyCaseReviewStatuses( +const caseReviewStatusFilter = status => (family, individualsByGuid) => getFamilyCaseReviewStatuses( family, individualsByGuid, ).some(caseReviewStatus => caseReviewStatus === status) @@ -288,7 +288,7 @@ const ALL_FAMILIES_FILTER = { value: SHOW_ALL, name: 'All', createFilter: () => const IN_REVIEW_FAMILIES_FILTER = { value: SHOW_IN_REVIEW, name: 'In Review', - createFilter: individualsByGuid => family => familyIsInReview(family, individualsByGuid), + createFilter: familyIsInReview, } const ACCEPTED_FILTER = { value: SHOW_ACCEPTED, @@ -298,18 +298,18 @@ const ACCEPTED_FILTER = { const ASSIGNED_TO_ME_FILTER = { value: SHOW_ASSIGNED_TO_ME, name: 'Assigned To Me', - createFilter: (individualsByGuid, user) => family => familyIsAssignedToMe(family, user), + createFilter: (family, individualsByGuid, user) => familyIsAssignedToMe(family, user), } const ANALYST_HIGH_PRIORITY_TAG = 'Analyst high priority' -const hasMatchingSampleFilter = isMatchingSample => (individualsByGuid, user, samplesByFamily) => family => ( +const hasMatchingSampleFilter = isMatchingSample => (family, individualsByGuid, user, samplesByFamily) => ( (samplesByFamily[family.familyGuid] || []).some(sample => sample.isActive && isMatchingSample(sample))) export const CATEGORY_FAMILY_FILTERS = { [FAMILY_FIELD_ANALYSIS_STATUS]: [ ...SELECTABLE_FAMILY_ANALYSIS_STATUS_OPTIONS.map(option => ({ ...option, - createFilter: () => family => family.analysisStatus === option.value, + createFilter: family => family.analysisStatus === option.value, })), ...[ACCEPTED_FILTER, IN_REVIEW_FAMILIES_FILTER].map(filter => ({ ...filter, category: 'Case Review Status' })), ], @@ -318,31 +318,31 @@ export const CATEGORY_FAMILY_FILTERS = { { value: SHOW_ANALYSED_BY_ME, name: 'Analysed By Me', - analysedByFilter: (individualsByGuid, user) => ({ createdBy }) => createdBy === (user.displayName || user.email), + analysedByFilter: ({ createdBy }, user) => createdBy === (user.displayName || user.email), }, { value: SHOW_ANALYSED, name: 'Analysed', - analysedByFilter: () => () => true, + analysedByFilter: () => true, }, { value: SHOW_NOT_ANALYSED, name: 'Not Analysed', requireNoAnalysedBy: true, - analysedByFilter: () => () => true, + analysedByFilter: () => true, }, ...FAMILY_ANALYSED_BY_DATA_TYPES.map(([type, typeDisplay]) => ({ value: type, name: typeDisplay, category: 'Data Type', - analysedByFilter: () => ({ dataType }) => dataType === type, + analysedByFilter: ({ dataType }) => dataType === type, })), { value: 'yearSinceAnalysed', name: '>1 Year', category: 'Analysis Date', requireNoAnalysedBy: true, - analysedByFilter: () => ({ lastModifiedDate }) => ( + analysedByFilter: ({ lastModifiedDate }) => ( (new Date()).setFullYear(new Date().getFullYear() - 1) < new Date(lastModifiedDate) ), }, @@ -368,20 +368,17 @@ export const CATEGORY_FAMILY_FILTERS = { { value: SHOW_PHENOTYPES_ENTERED, name: 'Required Metadata Entered', - createFilter: individualsByGuid => family => familyHasRequiredMetadata(family, individualsByGuid), + createFilter: familyHasRequiredMetadata, }, { value: SHOW_NO_PHENOTYPES_ENTERED, name: 'Required Metadata Missing', - createFilter: individualsByGuid => family => !familyHasRequiredMetadata(family, individualsByGuid), + createFilter: (family, individualsByGuid) => !familyHasRequiredMetadata(family, individualsByGuid), }, ], [FAMILY_FIELD_SAVED_VARIANTS]: [MME_TAG_NAME, ANALYST_HIGH_PRIORITY_TAG].map(tagName => ({ value: tagName, name: tagName, - createFilter: (individualsByGuid, user, samplesByFamily, familyTagTypeCounts) => ({ familyGuid }) => ( - (familyTagTypeCounts[familyGuid] || {})[tagName] - ), })), } @@ -399,7 +396,7 @@ export const CASE_REVIEW_FAMILY_FILTER_OPTIONS = [ { value: SHOW_ASSIGNED_TO_ME_IN_REVIEW, name: 'Assigned To Me - In Review', - createFilter: (individualsByGuid, user) => family => familyIsAssignedToMe( + createFilter: (family, individualsByGuid, user) => familyIsAssignedToMe( family, user, ) && familyIsInReview(family, individualsByGuid), }, diff --git a/ui/pages/Project/selectors.js b/ui/pages/Project/selectors.js index f0a23afb14..f7274a0534 100644 --- a/ui/pages/Project/selectors.js +++ b/ui/pages/Project/selectors.js @@ -391,18 +391,11 @@ const ANALYSED_BY_CATEGORY_OPTION_LOOKUP = CATEGORY_FAMILY_FILTERS[FAMILY_FIELD_ (acc, { value, category }) => ({ ...acc, [value]: category || 'Analysed By' }), {}, ) -const analysedByFilters = (filter, analysedByOptions) => { - const filterGroups = [] - - const otherFilters = filter.map(val => FAMILY_FILTER_LOOKUP[val]).filter(val => val) - if (otherFilters.length) { - filterGroups.push(otherFilters) - } - +const isAnalysedBy = (family, analysedByFilter, user, analysedByOptions) => { let requireNoAnalysedBy = false - const analsedByGroups = Object.values(filter.reduce( + const analsedByGroups = Object.values(analysedByFilter.reduce( (acc, val) => { - const optFilter = analysedByOptions.has(val) ? () => ({ createdBy }) => createdBy === val : + const optFilter = analysedByOptions?.has(val) ? ({ createdBy }) => createdBy === val : ANALYSED_BY_FILTER_LOOKUP[val] if (optFilter) { const category = ANALYSED_BY_CATEGORY_OPTION_LOOKUP[val] @@ -417,16 +410,14 @@ const analysedByFilters = (filter, analysedByOptions) => { return acc }, {}, )) - if (analsedByGroups.length) { - filterGroups.push([(...args) => (family) => { - const filteredAnalysedBy = analsedByGroups.reduce( - (acc, filterGroup) => acc.filter(analysedBy => filterGroup.some(f => f(...args)(analysedBy))), - family.analysedBy, - ) - return requireNoAnalysedBy ? filteredAnalysedBy.length === 0 : filteredAnalysedBy.length > 0 - }]) + if (!analsedByGroups.length) { + return true } - return filterGroups + const filteredAnalysedBy = analsedByGroups.reduce( + (acc, filterGroup) => acc.filter(analysedBy => filterGroup.some(f => f(analysedBy, user))), + family.analysedBy, + ) + return requireNoAnalysedBy ? filteredAnalysedBy.length === 0 : filteredAnalysedBy.length > 0 } export const getFamiliesTableFilters = createSelector( @@ -435,52 +426,59 @@ export const getFamiliesTableFilters = createSelector( (familyTableFiltersByProject, projectGuid) => (familyTableFiltersByProject || {})[projectGuid], ) -const getFamiliesFilterFunc = createSelector( +const familyPassesFilters = createSelector( + getIndividualsByGuid, + getUser, + getSamplesByFamily, + (individualsByGuid, user, samplesByFamily) => (family, groupedFilters, analysedByOptions) => { + if (groupedFilters.analysedBy && !isAnalysedBy(family, groupedFilters.analysedBy, user, analysedByOptions)) { + return false + } + return Object.values(groupedFilters).every((groupVals) => { + const filters = (groupVals || []).map(val => FAMILY_FILTER_LOOKUP[val]).filter(val => val) + return !filters.length || filters.some(filter => filter(family, individualsByGuid, user, samplesByFamily)) + }) + }, +) + +const familyPassesTableFilters = createSelector( (state, ownProps) => ownProps?.tableName === CASE_REVIEW_TABLE_NAME, state => state.caseReviewTableState.familiesFilter, getFamiliesTableFilters, getFamilyAnalysers, - (isCaseReview, caseReviewFilter, familyTableFilters, analysedByOptions) => { + getIndividualsByGuid, + getUser, + getFamilyTagTypeCounts, + familyPassesFilters, + ( + isCaseReview, caseReviewFilter, familyTableFilters, analysedByOptions, individualsByGuid, user, + familyTagTypeCounts, passesFilterFunc, + ) => (family) => { if (isCaseReview) { - return CASE_REVIEW_FILTER_LOOKUP[caseReviewFilter] + return CASE_REVIEW_FILTER_LOOKUP[caseReviewFilter](family, individualsByGuid, user) } - const { analysedBy, ...tableFilters } = familyTableFilters || {} - const filterGroups = Object.values(tableFilters).map( - groupVals => (groupVals || []).map(val => FAMILY_FILTER_LOOKUP[val]).filter(val => val), - ).filter(groupVals => groupVals.length) - if (analysedBy) { - const filters = analysedByFilters(analysedBy, analysedByOptions) - if (filters.length) { - filterGroups.push(...filters) - } - } - if (!filterGroups.length) { - return null + const { savedVariants, ...tableFilters } = familyTableFilters || {} + if (savedVariants?.length && !savedVariants.some( + tagName => (familyTagTypeCounts[family.familyGuid] || {})[tagName], + )) { + return false } - - return (...args) => family => filterGroups.every(filters => filters.some(filter => filter(...args)(family))) + return passesFilterFunc(family, tableFilters, analysedByOptions) }, ) export const getVisibleFamilies = createSelector( getProjectAnalysisGroupFamiliesByGuid, getFamiliesBySearchString, - getIndividualsByGuid, - getSamplesByFamily, - getUser, - getFamilyTagTypeCounts, getFamiliesSearch, - getFamiliesFilterFunc, - ( - familiesByGuid, familiesBySearchString, individualsByGuid, samplesByFamily, user, familyTagTypeCounts, - familiesSearch, familyFilter, - ) => { + familyPassesTableFilters, + (familiesByGuid, familiesBySearchString, familiesSearch, familyFilter) => { const searchedFamilies = familiesBySearchString ? Object.keys(familiesBySearchString).filter( familySearchString => familySearchString.includes(familiesSearch), ).map(familySearchString => familiesBySearchString[familySearchString]) : Object.values(familiesByGuid) return familyFilter ? - searchedFamilies.filter(familyFilter(individualsByGuid, user, samplesByFamily, familyTagTypeCounts)) : + searchedFamilies.filter(familyFilter) : searchedFamilies }, ) From 091eea865bf0292aa61025192704941da187916c Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Tue, 16 Apr 2024 08:51:18 -0400 Subject: [PATCH 012/736] broad submitter changed --- .../components/panel/variants/Pathogenicity.jsx | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/ui/shared/components/panel/variants/Pathogenicity.jsx b/ui/shared/components/panel/variants/Pathogenicity.jsx index a14a6b416e..a149c4782f 100644 --- a/ui/shared/components/panel/variants/Pathogenicity.jsx +++ b/ui/shared/components/panel/variants/Pathogenicity.jsx @@ -75,11 +75,13 @@ SubmissionLabel.propTypes = { submitter: PropTypes.string.isRequired, } -const Submissions = React.memo(({ submissions }) => submissions.map(([submitter, condition]) => ( - } content={condition} /> -))) +const ClinvarSubmissions = React.memo(({ submissions }) => submissions != null && + submissions.map(([submitter, condition]) => ( + submitter === 'Broad Center for Mendelian Genomics, Broad Institute of MIT and Harvard' && + } content={condition} /> + ))) -Submissions.propTypes = { +ClinvarSubmissions.propTypes = { submissions: PropTypes.object.isRequired, } @@ -109,7 +111,6 @@ const clinvarSubmissions = (submitters, conditions) => submitters.map((submitter const Pathogenicity = React.memo(({ variant, showHgmd }) => { const clinvar = variant.clinvar || {} - console.log(clinvar) const pathogenicity = [] if ((clinvar.clinicalSignificance || clinvar.pathogenicity) && (clinvar.variationId || clinvar.alleleId)) { const { pathogenicity: clinvarPathogenicity, assertions, severity } = clinvarSignificance(clinvar) @@ -143,10 +144,10 @@ const Pathogenicity = React.memo(({ variant, showHgmd }) => { { title === 'ClinVar' && ( -
+ - -
+ + ) } From 940b43d6df0a71c3995d837b2bf3c532f260987c Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Tue, 16 Apr 2024 13:18:35 -0400 Subject: [PATCH 013/736] fix hover --- .../components/panel/variants/Pathogenicity.jsx | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/ui/shared/components/panel/variants/Pathogenicity.jsx b/ui/shared/components/panel/variants/Pathogenicity.jsx index a149c4782f..a4a66d714b 100644 --- a/ui/shared/components/panel/variants/Pathogenicity.jsx +++ b/ui/shared/components/panel/variants/Pathogenicity.jsx @@ -65,20 +65,10 @@ PathogenicityLink.propTypes = { popup: PropTypes.string, } -const SubmissionLabel = React.memo(({ submitter }) => ( - -)) - -SubmissionLabel.propTypes = { - submitter: PropTypes.string.isRequired, -} - const ClinvarSubmissions = React.memo(({ submissions }) => submissions != null && submissions.map(([submitter, condition]) => ( submitter === 'Broad Center for Mendelian Genomics, Broad Institute of MIT and Harvard' && - } content={condition} /> + } content={condition} /> ))) ClinvarSubmissions.propTypes = { From 7c789a92f72d93f3657316532f4e1fa28f25526c Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Wed, 17 Apr 2024 10:55:26 -0400 Subject: [PATCH 014/736] ui updates --- .../panel/variants/Pathogenicity.jsx | 63 +++++++++++-------- 1 file changed, 36 insertions(+), 27 deletions(-) diff --git a/ui/shared/components/panel/variants/Pathogenicity.jsx b/ui/shared/components/panel/variants/Pathogenicity.jsx index a4a66d714b..f84517a640 100644 --- a/ui/shared/components/panel/variants/Pathogenicity.jsx +++ b/ui/shared/components/panel/variants/Pathogenicity.jsx @@ -2,12 +2,12 @@ import React from 'react' import PropTypes from 'prop-types' import { connect } from 'react-redux' import styled from 'styled-components' -import { Label, Icon, Popup } from 'semantic-ui-react' +import { Label, Icon, Popup, List, ListItem } from 'semantic-ui-react' +import { HorizontalSpacer, VerticalSpacer } from 'shared/components/Spacers' import { getUser, getFamiliesByGuid, getProjectsByGuid } from 'redux/selectors' import { clinvarSignificance, clinvarColor, getPermissionedHgmdClass } from '../../../utils/constants' import { snakecaseToTitlecase } from '../../../utils/stringUtils' -import { HorizontalSpacer } from '../../Spacers' const StarsContainer = styled.span` margin-left: 10px; @@ -27,6 +27,8 @@ const HGMD_CLASS_NAMES = { DP: 'Disease-associated polymorphism (DP)', } +const BROAD_CLINVAR_SUBMITTER = 'Broad Center for Mendelian Genomics, Broad Institute of MIT and Harvard' + const ClinvarStars = React.memo(({ goldStars }) => goldStars != null && ( {Array.from(Array(4).keys()).map(i => (i < goldStars ? : ))} @@ -37,10 +39,11 @@ ClinvarStars.propTypes = { goldStars: PropTypes.number, } -const PathogenicityLabel = React.memo(({ label, color, goldStars }) => ( +const PathogenicityLabel = React.memo(({ label, color, goldStars, submitters }) => ( )) @@ -48,6 +51,7 @@ PathogenicityLabel.propTypes = { label: PropTypes.string.isRequired, color: PropTypes.string, goldStars: PropTypes.number, + submitters: PropTypes.arrayOf(PropTypes.string), } const PathogenicityLink = React.memo(({ href, popup, ...labelProps }) => { @@ -62,17 +66,7 @@ const PathogenicityLink = React.memo(({ href, popup, ...labelProps }) => { PathogenicityLink.propTypes = { href: PropTypes.string.isRequired, - popup: PropTypes.string, -} - -const ClinvarSubmissions = React.memo(({ submissions }) => submissions != null && - submissions.map(([submitter, condition]) => ( - submitter === 'Broad Center for Mendelian Genomics, Broad Institute of MIT and Harvard' && - } content={condition} /> - ))) - -ClinvarSubmissions.propTypes = { - submissions: PropTypes.object.isRequired, + popup: PropTypes.object, } const clinvarUrl = (clinvar) => { @@ -95,9 +89,32 @@ const clinvarLabel = (pathogenicity, assertions, conflictingPathogenicities) => return label } -const clinvarSubmissions = (submitters, conditions) => submitters.map((submitter, index) => ( - [submitter, conditions[index]] -)) +const clinvarPopup = (clinvar) => { + const lastUpdated = ( +
{clinvar.version && `Last Updated: ${new Date(clinvar.version).toLocaleDateString()}`}
+ ) + const conditions = clinvar.conditions && ( +
+ Conditions: + + {[...new Set(clinvar.conditions)].map(condition => ( + {condition} + ))} + +
+ ) + return ( +
+ {lastUpdated} + {conditions && ( +
+ + {conditions} +
+ )} +
+ ) +} const Pathogenicity = React.memo(({ variant, showHgmd }) => { const clinvar = variant.clinvar || {} @@ -109,8 +126,8 @@ const Pathogenicity = React.memo(({ variant, showHgmd }) => { color: clinvarColor(severity, 'red', 'orange', 'green'), href: clinvarUrl(clinvar), goldStars: clinvar.goldStars, - popup: clinvar.version && `Last Updated: ${new Date(clinvar.version).toLocaleDateString()}`, - submissions: clinvarSubmissions(clinvar.submitters, clinvar.conditions), + popup: clinvarPopup(clinvar), + submitters: clinvar.submitters, }]) } if (showHgmd) { @@ -132,14 +149,6 @@ const Pathogenicity = React.memo(({ variant, showHgmd }) => { {`${title}:`} - { - title === 'ClinVar' && ( - - - - - ) - } )) }) From a8776be38439883b9abcc62a5b3b9b13b6f64c2b Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Wed, 17 Apr 2024 13:35:24 -0400 Subject: [PATCH 015/736] hail search table and tests --- .../SNV_INDEL/annotations.ht/.README.txt.crc | Bin 12 -> 0 bytes .../SNV_INDEL/annotations.ht/._SUCCESS.crc | Bin 8 -> 0 bytes .../annotations.ht/.metadata.json.gz.crc | Bin 16 -> 0 bytes .../GRCh38/SNV_INDEL/annotations.ht/README.txt | 3 --- .../GRCh38/SNV_INDEL/annotations.ht/_SUCCESS | 0 .../annotations.ht/globals/.metadata.json.gz.crc | Bin 16 -> 0 bytes .../annotations.ht/globals/metadata.json.gz | Bin 737 -> 0 bytes .../annotations.ht/globals/parts/.part-0.crc | Bin 24 -> 0 bytes .../annotations.ht/globals/parts/part-0 | Bin 1906 -> 0 bytes .../.index.crc | Bin 12 -> 0 bytes .../.metadata.json.gz.crc | Bin 12 -> 0 bytes .../index | Bin 139 -> 0 bytes .../metadata.json.gz | Bin 185 -> 0 bytes .../SNV_INDEL/annotations.ht/metadata.json.gz | Bin 985 -> 0 bytes .../annotations.ht/rows/.metadata.json.gz.crc | Bin 20 -> 0 bytes .../annotations.ht/rows/metadata.json.gz | Bin 1344 -> 0 bytes ...rt-0-69dcebe5-50a9-4af1-a543-db0e0db24364.crc | Bin 20 -> 0 bytes .../part-0-69dcebe5-50a9-4af1-a543-db0e0db24364 | Bin 1278 -> 0 bytes 18 files changed, 3 deletions(-) delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/.README.txt.crc delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/._SUCCESS.crc delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/.metadata.json.gz.crc delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/README.txt delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/_SUCCESS delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/globals/.metadata.json.gz.crc delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/globals/metadata.json.gz delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/globals/parts/.part-0.crc delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/globals/parts/part-0 delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-69dcebe5-50a9-4af1-a543-db0e0db24364.idx/.index.crc delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-69dcebe5-50a9-4af1-a543-db0e0db24364.idx/.metadata.json.gz.crc delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-69dcebe5-50a9-4af1-a543-db0e0db24364.idx/index delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-69dcebe5-50a9-4af1-a543-db0e0db24364.idx/metadata.json.gz delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/metadata.json.gz delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/.metadata.json.gz.crc delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/metadata.json.gz delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/.part-0-69dcebe5-50a9-4af1-a543-db0e0db24364.crc delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/part-0-69dcebe5-50a9-4af1-a543-db0e0db24364 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/.README.txt.crc deleted file mode 100644 index 0568c4d21455718c58816445db0d7bac7456abbc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}E^Uky!))6Qu(I diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/._SUCCESS.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/._SUCCESS.crc deleted file mode 100644 index 3b7b044936a890cd8d651d349a752d819d71d22c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8 PcmYc;N@ieSU}69O2$TUk diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/.metadata.json.gz.crc deleted file mode 100644 index 203a84d5d80b59b5c2e40eb1e214cb258698146e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16 XcmYc;N@ieSU}8`x7h2GCm=FLaPZ^o}VB!X0s%#!btfdqE<x9A{faLdtrRZ+MB`NE=;e#CK=KcOttB(94O~qKQm|Qrlyr z(E6)x!-o~Orrl9%^=mg6X5H*_xMm?Ztd~X2T8*uZ>tqqPMi#KEVXfmzgw^9SAIkN! z9<1lbXt{AJ`k$Q$JH1j)s0r0Es*C+L_f>pV4Dill^qd+_5=PQJ4LqZM=X5VnyqMNS zRPMaKP5N^q@>DI*vq{3G$ptDF(XUCl%VYIbE1c4aVufx`t->Gem1^B?HXtW&lV9iA zyXy~`bEOI0@{)NWSy|IZtP=#S4Y=UHG2r&*=Kbwy+E1m~G4+*s*ffx8n%S8T9-<*q zf8ix4d_zu*@#bw}-jpBih$tYdTfvO$kSL8#EQ?7XSpUEUEn9~F3`Z9-4I?%;F$U>O THKMgtCa3!Y`BV_!s0#o9=Q(kr diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/globals/parts/.part-0.crc deleted file mode 100644 index 1e49501175bb70c01836d0e0069c0ca5ab490953..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 24 gcmYc;N@ieSU}6vzsJngO?505Rlc&SKtvY%J0ATwHd;kCd diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/globals/parts/part-0 deleted file mode 100644 index 15400ca16ae46daeb61b53c4c66fe0e63bf228ea..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1906 zcmV-&2aWh$2LJ$F5C8xuwJ-f(T@QUa0E%Gj5=1bKn*+eWz`(%3z`(%3z#u~t(0O8| zgl$PvS7=irS;`}kkgHKEFk6|UYr#pC0~FwSL}Y`|;tSU%scdww|;ASqv|{b_ZPimfS`fZ5uH z-gWn~GxluZ>{w>mQI~|{41Xy7;KY6H=lAOp3s|MpL;sk->_>**0fMN>gv=w+NHX2X>}vdq<6hJRCgBkzSGFkin`}S#=&7i8#x| zQ{1m-bsiEeharO%l|E?|@dZtV85{ex6ZeNRvZ!p&GNe}|Ww0_;IrbF8u98mbdA9RY zc{6@HJ7d~i>si+*BqD=j_5Nw9Ots-|RbdZp3AZw6S(z%=hNo)9k&65~zaPc^2#pMj z=9ic6sVK_&F=>Z)ZDW%)eY6xxj!G8kowz&4*r&Z>i056CuuDP5g4*)5>lhwq{gF zpiI0VfwJy`1R7avJS6e3;XpZ-8pd&|Z!=0%=i;u)fURQG0Wj)d79$Kmh7e{E!T?~H zNH_zlb-cwY7~A?(ehjY7qR+|U2r#O^DmWEd#l^LGN>9T_nx|)b#maRs+W?(+bhKC8 zxY^&uMzOWS@mmoG6dR4~`hZ5H5)IDNBCan_2Z8M5B4yhKH_EPybMa^&ZgjGdK3H!e zO@y^3(n2y&1_E*V(l`|mh!Y}hbTUjkX-%|o+NU*9i0mWdGSDa_y0lNnrJZbCV_KH6 zK`rx5HyilgR(H>`;8abY-vgOGEp>;xSV$W>9LJ;B?h;DB8H*;7`63ChkT%kBDdFv4 z1F~@^T>{d6O<9H^eF;x*|AZ|gG`!iX7~1)?0sZ`nNK8Wyuikk)?WZ4A3=6!yHcOi6 z(@VZG<4VT1W@L|o*h6_ei~KaY5ZRYnlAT2m2-mX>?G^2%9W5moU{#YGEl*`=JJED8 z?L0c9SKJh+ZSR!fhWtyb0Gh8HFk)#?7o4ulRKc4Zbmt`oontH7J1HmzfheeRTKjpM%*xxaAY3vq?P7}v|#iemZCh9dQt`tHDhuom0J4UA?21>Xa$&5- zX$+D`4kMCVLZeMcg*LX4*?dzJF?7oi^hS{-L0AMc%f`AFPXY|QSjn`)qLMBagkUoN zYXno`h`kmba6Vp3Ou125D@u<22AV|aykkO1nej+3h3=F%_!kav+9Kr5Fiq~TZ)G>3 zYap6ackZ$oR3{FaaCX<;jfOJF-&3F$IlEaU$z(7H=!%&kv9VFH5M>#h*FlUchUJs^ zZX^jAtV8$s%W`ZO`=fEBl7IV19CXejF(iIHNNyc>Ra#T$gV64gdWPku_YM)M@n(Za zchZ}1D+7hBw6$Np%agIkVb7f-n>Of&dM6o2JZdP7Z}1o@1)={Ap?SHE>d*23BJkD; zS3q5_B-~*wEM7PUAEOGeb{PYbrb%>EBp`|g*-F8qy|~akMuhAitxs104FCWD00000D77#BAOHaX0ONs^zW@LL diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-69dcebe5-50a9-4af1-a543-db0e0db24364.idx/.index.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-69dcebe5-50a9-4af1-a543-db0e0db24364.idx/.index.crc deleted file mode 100644 index e068ffbb9a43ea12e5ad26abd9cc15821e40a53b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}Dhg^SlQD5mEyl diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-69dcebe5-50a9-4af1-a543-db0e0db24364.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-69dcebe5-50a9-4af1-a543-db0e0db24364.idx/.metadata.json.gz.crc deleted file mode 100644 index da6df53be05e706231fbb7a59231bccd9f3c1f66..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}8vBR5t_w5B35X diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-69dcebe5-50a9-4af1-a543-db0e0db24364.idx/index b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-69dcebe5-50a9-4af1-a543-db0e0db24364.idx/index deleted file mode 100644 index ea8953b74da9896ae9942c2fff326b5919a782e9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 139 zcmYddU|>iEVvVi(e-%>KGBKF2Ffgz&uq0;`87}f?Vs>~Px2xKHYdcw-Y z7{cg$h4JvQ-9qjsnHxV|&}L$EXLP*H+qV58kQu`0{!f67A)&&cIZeTseUjxl$utQ? Upba80TSOKzFfcL#d1#gZ0Oh44XaE2J diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-69dcebe5-50a9-4af1-a543-db0e0db24364.idx/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-69dcebe5-50a9-4af1-a543-db0e0db24364.idx/metadata.json.gz deleted file mode 100644 index 5b44f36c6f25e26361bcff6cfbdc5dbf74c1c5ca..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 185 zcmb2|=3sz;(7uCQhYWby?yIV|$}Q_NtQL0U=j!Sb{}_-cB zFW9Z}{qdX}|Ln~xmOELt^G%L9y5duBx6dAa1&2y`55fI^+Iwc+PxQ-CI`B)quCI$n lyv};LA&ZrXuhX>|qB@H@Hum>T$p4l2gDIuPaoPo-ivfGmO=IvmeqN{?lP+(+Lf-i2Hmsu2D zNI{s)P=f!2Hi~lL_pX_Ou3#;dfsCMuKn1T%?E!!9YP20TN!z#o(Rmde5W8{!jr~4^ z;8=k@ViLE<88`cMq7>afG*Q+G)f$vRl>72SfT#^|y))EW-BE2okxJoaBL@$9Q-g!O zSq*^ii38{xc?*8C=)>R0dJV9mRuCW+*VnbE3E`Sl!&e{{9k-Cr@g7k-)DCyyl{D6J>P)N4Z3DOZ1px23#KV80BlU~{1 zKfW%f+mrJT&Mr!#@{=<%FhBc{;E-nTJi9)kO-)NKB%zFFe&`rQv^nx-dMV7w826pE z^Iwk=2p~L|vM0^Lg`fW*eP`YxYb7ptA5>xsX)3G@)(O|dLMv#Ff9Yo_3!oxEd*~5l z#;+euCS-Y>MovXo&T}QTRiA!(*FpP5G?NO_c_FC{`S!UR6oq^cu*2eFIs4LuiKwD! z{bc~r2ItCPO^bj;c-+b!!-NR$a0d>9To&2NP^DY#8%~~)UfD>Wl$Nvm*;OBxzH>d-{kSXLdoV ztz}W)iI6ZSbF#QyTu=V;)lTN!=afvc?`@&DxBwF@WEpBo?p8OQ(;IXgrnvDz+K(Wv ztBu?)$=9pf&yy?fH%DaxvZ{d9&t?BccJ>F9%AfD|tBujAqB!dHu-9A>23R=N zwARGIoc+V;+Qw0`vaA8U2wvFw^sO?kk=U3i_TOL+*(gQuz+RcozkcS|u7`gB(^=&L HUkm^MP5^~w507xSUO8@`> diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/metadata.json.gz deleted file mode 100644 index e30708a4aeb5d08fafe3f2fb0c5a5f42cbcfffb7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1344 zcmV-G1;6?qiwFP!000000Nt2bZ`(K!fd7l0wph6v$43*r*{+=go5Qe!V6j0U&=MuH zkti)m2XO=a_Z{jaDLEI=mli=GlEb+;nosnLMNy0`$GV_Ce#ZXl?)y5$-mzs{?jzn? z&b<{S-j2f$N%jx^%|?H0TdIOxX2?uueErHCX{$6atb3hBni~i>mjDtRjh8qC+=yZ zv>qw0C_6SJ5s=Y*sxndg=?ctZH+QJ?Hy3%Z35N^?E=3{yfvzNpFrv1c3rGaJx;7~e zz|&)$xCi?QF@mV(*E8fd_h>w{Lz=|McOezBNC*w{T!K16g?2_iUG60eJM%@9p!`h0 z9>!D?VmO3jbIYhrFEhxw?`s>D=d1a5MW+`L3La}RMKxWhNem+532=6a#5RG0A|BL> zR&dSsRFT%Aur;#7JNl5lyJ|{-*F2wWNF;31k*vDXO-wy8bd-Bx#KG_qvI<`#h8^ zG!>9RSB-T&$b*i~B?{`c9l`sjS`UTG;q~B$>|b|zvaRiBnMB`Hi9QW_m-7EV&Q`PS z;N(jUwc>pTL3nR8qAB9vz7)hNq}~Ent}*#d|Po;G$4 zP^SwzdLSioI%v}cn{L*;ftnU+UIootoG~!dK^Pq{Ew(6VG038UMdON&6^$yLP-*(P z(!T0$-n6m9z1%>F*2%j;V%RJY@^PCDKDum5cqrWH!iGkS4q$Y!;&oK$q-dIS-I-fC zZo;fjvgAVx4ON@0$Y6q@W$wCdcUze&c(B<80cOUH>2f~pAD>K{CEN10B=QxJDWKpH zoY90fa&k?6g1~q*8jRc9{^i2Yw|;ORM21mp?#$#r2!hBvq7lPIHJ4u*8IEKJgZZyt z|H{SXEcEjrzZkyjwy0t^AdJg){#<-lLmk4Ul*6FoyBt=WUv|^X>?8;{7PTLU(xm8Y zy3s6q==uO?0#U-yn!9xWqhAZPS1q3mZ+?^V1A~r#a`|l7rq+Z+z-|c!Nj1(GfcB4N=hVq0w;A^EXHM$ln|f2jgM>OWuza zq9LBbC!{a;zgU1cCS+j6?|$%5lrUKJwyY2CR3&UV{iFVHoHaiUg8j*C1|ky+&fLP( zf2X3;{;1y{pBTDsLFO#iM5q_-e7r7vgWz#JHV0zx-9L zC)%4JGc{uLI(x5yGmejk@B{4a);p_7Ddm=OOQK8Bp6n*M?$W}ygb=dHmMP5uzW~wz zy?S}hdvBf68x=ih`hF(LcS(v_lcNcza7Zzu?2cq@?N>tFr`q5iiNT$sF$%_iXdcI_#R;&bzf4lNm>D612UO z<<^|!qwN*raqB%|@OxjCvba#Y^9ios*yVIbyLYMa6qQs|g49I2&3D{#(qqX>z3~BS z?cO&l4bhZ^hS*iKDGI0i|K7#-T7JL0fO21~)VTF0fbELghkbqBzYw?Z$BfYb?joV) zOu*npTbQeooimlU96KgCd8D)5-G*7` zoin~)UO`N7(0KeR$i#(Cive6;L$izmo+|{8D=)shTS$2i+PNd)ZAh$+xxyEP>*M0X zH{)QDy?JBAhJRCR$cO304aJp`@JSk<+7(kw4~^fg0TJg^wm#sP8FMV?$pmwjwgBpf zwEl#09`QkjGp&ZSObSstguAc2n)7`X`tZYwBkWNl7_(u?@;EtYW88jXmDq32iJ`f^7u?(Oj*avI z@Oxh>QZioYs)wF2>jtS12A52}I#3=f}tMyz;L!t{GH$|k8q_b*l6okM&UkX5Hdq+*`jLD*}0f3pP;Ur>+DKsP^ zqA*Dc>7W7;nWCZx5Va3DM=Z|0Sy5`2dvla8G!!J@Jfo|f&VEs zMsm6Vmr3uz^CgtS`D2ymeBWSk_0_;JXcipSixI{yI0~tY>>);KW_9nUa>l{}LPx9X z7gzSjbfbVB{X4Sb8FghBRN_iaj|ayDDvXGiAb1FwI?l#y6&RzFvIaR32Ng;dOa>dl zgD`jKTqujTe2$9bPZcO{Ia8PK8-C10q11wDRX0F$W$ZZnw@1)YHedN#a7%dmsZcIm zq>FbriMpwYkSx?GxjcczP6E0>l?!&)tkPF0X{FKBP06W4VaKO*bp}fIy5-a(@qbgQ zW=#FQ2H=Oqa{HpS0!>?ZpLCY2C{HT9AAG0ind~hMCK6k^ry9EnsFoC-1AS-2+g%^J oMJD?ROf(I>F)apR%reyBl~YU=4FCWD00000D77#BAOHaX0L*T5#sB~S From b1a3c3561e26f433ad4bc5a48a0abd1039929224 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Wed, 17 Apr 2024 13:35:39 -0400 Subject: [PATCH 016/736] hail search table and tests --- .../SNV_INDEL/annotations.ht/.README.txt.crc | Bin 0 -> 12 bytes .../SNV_INDEL/annotations.ht/._SUCCESS.crc | Bin 0 -> 8 bytes .../annotations.ht/.metadata.json.gz.crc | Bin 0 -> 16 bytes .../SNV_INDEL/annotations.ht/README.txt | 3 ++ .../GRCh38/SNV_INDEL/annotations.ht/_SUCCESS | 0 .../globals/.metadata.json.gz.crc | Bin 0 -> 16 bytes .../annotations.ht/globals/metadata.json.gz | Bin 0 -> 737 bytes .../annotations.ht/globals/parts/.part-0.crc | Bin 0 -> 24 bytes .../annotations.ht/globals/parts/part-0 | Bin 0 -> 1906 bytes .../.index.crc | Bin 0 -> 12 bytes .../.metadata.json.gz.crc | Bin 0 -> 12 bytes .../index | Bin 0 -> 139 bytes .../metadata.json.gz | Bin 0 -> 185 bytes .../SNV_INDEL/annotations.ht/metadata.json.gz | Bin 0 -> 1000 bytes .../annotations.ht/rows/.metadata.json.gz.crc | Bin 0 -> 20 bytes .../annotations.ht/rows/metadata.json.gz | Bin 0 -> 1371 bytes ...0-1431d7f7-8f3e-4e29-878c-69a273b63db2.crc | Bin 0 -> 20 bytes ...art-0-1431d7f7-8f3e-4e29-878c-69a273b63db2 | Bin 0 -> 1508 bytes hail_search/test_search.py | 11 +++++- hail_search/test_utils.py | 36 +++++++++++++++++- 20 files changed, 47 insertions(+), 3 deletions(-) create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/.README.txt.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/._SUCCESS.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/README.txt create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/_SUCCESS create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/globals/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/globals/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/globals/parts/.part-0.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/globals/parts/part-0 create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2.idx/.index.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2.idx/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2.idx/index create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2.idx/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/.part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/.README.txt.crc new file mode 100644 index 0000000000000000000000000000000000000000..c130205bef2210481f02941663d7bd5a21b425af GIT binary patch literal 12 TcmYc;N@ieSU}CVz-xdJ?5rG2q literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/._SUCCESS.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/._SUCCESS.crc new file mode 100644 index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c GIT binary patch literal 8 PcmYc;N@ieSU}69O2$TUk literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/.metadata.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..44cd4176edb8e2ccb7867e2103bdcf75ca28e819 GIT binary patch literal 16 XcmYc;N@ieSU}9kJV5m=FLaPZ^o}VB!X0s%#!btfdqE<x9A{faLdtrRZ+MB`NE=;e#CK=KcOttB(94O~qKQm|Qrlyr z(E6)x!-o~Orrl9%^=mg6X5H*_xMm?Ztd~X2T8*uZ>tqqPMi#KEVXfmzgw^9SAIkN! z9<1lbXt{AJ`k$Q$JH1j)s0r0Es*C+L_f>pV4Dill^qd+_5=PQJ4LqZM=X5VnyqMNS zRPMaKP5N^q@>DI*vq{3G$ptDF(XUCl%VYIbE1c4aVufx`t->Gem1^B?HXtW&lV9iA zyXy~`bEOI0@{)NWSy|IZtP=#S4Y=UHG2r&*=Kbwy+E1m~G4+*s*ffx8n%S8T9-<*q zf8ix4d_zu*@#bw}-jpBih$tYdTfvO$kSL8#EQ?7XSpUEUEn9~F3`Z9-4I?%;F$U>O THKMgtCa3!Y`BV_!s0#o9=Q(kr literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/globals/parts/.part-0.crc new file mode 100644 index 0000000000000000000000000000000000000000..1e49501175bb70c01836d0e0069c0ca5ab490953 GIT binary patch literal 24 gcmYc;N@ieSU}6vzsJngO?505Rlc&SKtvY%J0ATwHd;kCd literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/globals/parts/part-0 new file mode 100644 index 0000000000000000000000000000000000000000..15400ca16ae46daeb61b53c4c66fe0e63bf228ea GIT binary patch literal 1906 zcmV-&2aWh$2LJ$F5C8xuwJ-f(T@QUa0E%Gj5=1bKn*+eWz`(%3z`(%3z#u~t(0O8| zgl$PvS7=irS;`}kkgHKEFk6|UYr#pC0~FwSL}Y`|;tSU%scdww|;ASqv|{b_ZPimfS`fZ5uH z-gWn~GxluZ>{w>mQI~|{41Xy7;KY6H=lAOp3s|MpL;sk->_>**0fMN>gv=w+NHX2X>}vdq<6hJRCgBkzSGFkin`}S#=&7i8#x| zQ{1m-bsiEeharO%l|E?|@dZtV85{ex6ZeNRvZ!p&GNe}|Ww0_;IrbF8u98mbdA9RY zc{6@HJ7d~i>si+*BqD=j_5Nw9Ots-|RbdZp3AZw6S(z%=hNo)9k&65~zaPc^2#pMj z=9ic6sVK_&F=>Z)ZDW%)eY6xxj!G8kowz&4*r&Z>i056CuuDP5g4*)5>lhwq{gF zpiI0VfwJy`1R7avJS6e3;XpZ-8pd&|Z!=0%=i;u)fURQG0Wj)d79$Kmh7e{E!T?~H zNH_zlb-cwY7~A?(ehjY7qR+|U2r#O^DmWEd#l^LGN>9T_nx|)b#maRs+W?(+bhKC8 zxY^&uMzOWS@mmoG6dR4~`hZ5H5)IDNBCan_2Z8M5B4yhKH_EPybMa^&ZgjGdK3H!e zO@y^3(n2y&1_E*V(l`|mh!Y}hbTUjkX-%|o+NU*9i0mWdGSDa_y0lNnrJZbCV_KH6 zK`rx5HyilgR(H>`;8abY-vgOGEp>;xSV$W>9LJ;B?h;DB8H*;7`63ChkT%kBDdFv4 z1F~@^T>{d6O<9H^eF;x*|AZ|gG`!iX7~1)?0sZ`nNK8Wyuikk)?WZ4A3=6!yHcOi6 z(@VZG<4VT1W@L|o*h6_ei~KaY5ZRYnlAT2m2-mX>?G^2%9W5moU{#YGEl*`=JJED8 z?L0c9SKJh+ZSR!fhWtyb0Gh8HFk)#?7o4ulRKc4Zbmt`oontH7J1HmzfheeRTKjpM%*xxaAY3vq?P7}v|#iemZCh9dQt`tHDhuom0J4UA?21>Xa$&5- zX$+D`4kMCVLZeMcg*LX4*?dzJF?7oi^hS{-L0AMc%f`AFPXY|QSjn`)qLMBagkUoN zYXno`h`kmba6Vp3Ou125D@u<22AV|aykkO1nej+3h3=F%_!kav+9Kr5Fiq~TZ)G>3 zYap6ackZ$oR3{FaaCX<;jfOJF-&3F$IlEaU$z(7H=!%&kv9VFH5M>#h*FlUchUJs^ zZX^jAtV8$s%W`ZO`=fEBl7IV19CXejF(iIHNNyc>Ra#T$gV64gdWPku_YM)M@n(Za zchZ}1D+7hBw6$Np%agIkVb7f-n>Of&dM6o2JZdP7Z}1o@1)={Ap?SHE>d*23BJkD; zS3q5_B-~*wEM7PUAEOGeb{PYbrb%>EBp`|g*-F8qy|~akMuhAitxs104FCWD00000D77#BAOHaX0ONs^zW@LL literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2.idx/.index.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2.idx/.index.crc new file mode 100644 index 0000000000000000000000000000000000000000..2dd0312d7030509569218006d0090c18c5f6710d GIT binary patch literal 12 TcmYc;N@ieSU}D(Mc5FTX61f9q literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2.idx/.metadata.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..da6df53be05e706231fbb7a59231bccd9f3c1f66 GIT binary patch literal 12 TcmYc;N@ieSU}8vBR5t_w5B35X literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2.idx/index b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2.idx/index new file mode 100644 index 0000000000000000000000000000000000000000..1ddc2ce5c2a9114da078191961da002f489cea02 GIT binary patch literal 139 zcmYddU|>iEVvVi(e-%>KGBKF2Ffgz&uq0;`87}f?Vs>~Px2xKHadcw-Y z7{cg$mGSVg-9qlCnHxV|&}L$EXLP*7-?sfCkQu`0-XP4zkWgXJoTgyRKFRW&WSWE` T&;}8hEg}mU7#JCWJTyxH%|s&f literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2.idx/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2.idx/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..5b44f36c6f25e26361bcff6cfbdc5dbf74c1c5ca GIT binary patch literal 185 zcmb2|=3sz;(7uCQhYWby?yIV|$}Q_NtQL0U=j!Sb{}_-cB zFW9Z}{qdX}|Ln~xmOELt^G%L9y5duBx6dAa1&2y`55fI^+Iwc+PxQ-CI`B)quCI$n lyv};LA&ZrXuhX>|qB@H@Hum>T$p4l2gDIuPaoPo-ivfGmO=VEZ z&b3`eFY154$4Q(x&e+iXJPAS0`|)$^$L1y4@*Kz$D9xpalJkqR(PbR%D9<}(bn;>J z;dFHQ9)PBorza=$l$}O#q+kmQ1OrVJ7A5Oi;{|q4D0doN=A8KoBg+EX=5|?;2HAxa zgvk^&_(y1?C>QP8HB-_DxL__ z{eG-A!G_CWaO`;!#J>p4x!e+@KePlzkH`2s9FoUevb~OxjvxoUj+Ol3>Q$CxZh!yv zWjWcMUA}XEK@pXmU66tM#k&{}Y5K;!+h(+-X~Bggl<~AZbc`a}9CrR{V zUmYb7K)83xo;34?_WXP0JNE`zt8vNupc30i-C^ahj=3flT0t}XNj^!L0~H?H!w9c3 zetoYiAP^i%O$lbU*C6wrjQRjc3503r=Pnt0aeta zzxE*7;<+-|)50SW9*(ldFh0V2T!F(NlXIy4Lv;0iQ*eX8hkFzIOu4D< zieJ*$a{4fx_mPYrrmOppfez_zffP5?h9uavy}F*=`~EoBh0G7-4#bFDJ&c!uaf_=j z3Uc#x?uOp82YfoU0x5TvciY+d6g!@g#ogj&{CC^!c-DPR$TXC%7*CLeZz;Hw zZo8mY2;47m+a_r~yu7YAa9>h&oJ(m*-o;5pHYTvYu`@Ya|w@?S1dC`<7U>w za<{{}J68_x6MPCYS(H+MFgi+JqWTqFeF|FG*YWb<^TfHVN?_FkyM0wV|08PUkGIEF z&KOiuB!`L~Dt7S`ESzalS;Az_|M7fn^C($bo`GJ5R@ub#y;81`3Yp0dU*Q00lpuIu WFIg8~{bNvt9{&TyOstEM3;+OroAz)3 literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/.metadata.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..27a35cf2f3efd9e7fdd9adf789d4d98fe2042c3d GIT binary patch literal 20 bcmYc;N@ieSU}DJM{e1h9kTnVcvdnJ)LZ=5K literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..8328e83fe8ab79a189ab8bf9f8d793060af18080 GIT binary patch literal 1371 zcmV-h1*G~PiwFP!000000Nt2dZ{j!W>k%0``mrvFXqLfD8iOwT~Z%EV}Et`eVt&hXW5qfh!2)? zZ-ue9=P*WLh(pY+2UzlAAA3^Ja$ZUcw}%A#(`o^vu$+RGKW7uf@mjFGCtfmhjymJT zRFJI$QiLLQu#=b*L1GFsmsIq7sa?2sW@E_Fyp+bR6(H8!|Rvd>IQCrRhBm}KCB_#lO zdaMffU_T*75Y^&(hWcg^4NmMWjU(i{Pzu>3gl@B1f;mEkwnsl*?!^o{vw0Y!Y)`-+ z##9qxcnjO+7FnB7rciU=*A$i)tJ!zOrWXPCm`A-61y07ig?g4 zTF2Gui6V2*He(Mt)|J}Ep}!W0@lxQJBH|n~*`IAY!S|bp2-qg25sU7Jxc#iJ8%wF} zZzvBEtv0LK-E3i0-M*WxZ%@q0tG!qa4T((UjMd~cpm-Aaq1G-@uhlgZ` zY0tg58?VgO0O}D%?$zImvR8D0)W&3Mbi_=~xXBKDE8C2pk*A4`=kD@mc{Tp0j($8> zo~Q2E|4~K`qYEt5Q5Vc>GznLz*MQUb zR;-qID~37UltEGsbhZ_f3ff*FTOMI6*N$LK(=egdg0v!>jA-uwn!z#?Wk)Ec230Uj z!&ie|Cb!~BYp;WLNM$9>ko zY4a&@qmZTzG#X=C5Ys}J*WseGr9RfHv%FE`I?ZY)3!*e&QXZ@&OH7nB++8>Q?n-w# zK{op!z|6RsEoPH$Z#1cwY|Gn`&{sqz0FOrqMibh|$u;>20)x}j{-CM$FBg8M`awO& z45P@@OyxgFg2>CLk;7#TiUUy^=O>%)G)*7cJ{jsnlr%IP$nZb~7gnHrL$Ll;c dNFG;nGwDRmq%s{O1e3nf^nW7C1yMs4001nup~nCK literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/.part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/.part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2.crc new file mode 100644 index 0000000000000000000000000000000000000000..6ab56959aad8424f649fe998d4a25f9b3e147a84 GIT binary patch literal 20 ccmYc;N@ieSU}E?_fidRy(U(0XXTP@q07!TV5C8xG literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2 b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2 new file mode 100644 index 0000000000000000000000000000000000000000..50a0d0e31f8917f031170866e72ac1229bd1c6e0 GIT binary patch literal 1508 zcmVbv8J#h*tNP+bE4lz@C7rDnM&XiRht2(>9p`1Of8^ z5CMOC{Y3vsYE~gON&jis2smwm1}m_HLt1LIMP`;aHzhX@zO4P@#3oN5IPb_cs$A{s z$ZeXdy{BnP^z=YH996MT)G1W)Zq22 zximN=2M(H%2CKk;3Db4-OJ;kjg78S9&u;v)OuWprM1VN$A|RSfkXFc$4W?@IseHv3|(9h#L&fN<@I;{e~Q*)tJN`xa|2JLX+;WW zu>Aj*-E7S9`hQ(pLnqFeJHD?wqMdI(@}%dd$<6gl&7W+uaK^Q(-`A~y7S6Z~rVEv< zybin`yo=#1QTBxpWq6L^SKQ@%vK8*bqSr=xNa=ObhaYMhDBe9{npM1f`5GQ0Y0_{b z*SPZD%TzT-9sudws=3v3t49Q0J6O;2L^&B#adJJ43=DYC>1ADYO`%9#PkBd+4A>rY zLVJmpXzwad&9z)RKTlFus=T93>RF~~Ii#-JT>I|4qv49GdsWxHyIJ;HnU>&Q*YZqJ zwN%NyYSScBG$l>&ZLgnlJ$I*FtcN4Rs5rUP^hC`ITUu8MUe;ZA=cJAbB~SH4Q}e*y zJ571+6)}}_MHFdw>Z(Tc|2A%NzE%E{kA})@iNQ5aKou@&0a$*z5Pa?ACRf+2*XTh$ zeIj&OGTXzNP`auOQt{!lcf5E!JA>EZpEXuM58d84$13u^`l#uuUa81v{V zgTNeY*c?bzteLYU;_a8dzMaw{*}|kC9o(>NCiL)F&HnAdwnsaXfdvG0Wm{B*n-o@1UI28;Eq1c*{USjQWa;VqF&_RTrj z_m!(4=3-t6v0e|$!A{PN$+-#QF3yQuSXp8Sp9s%lGjIOISfbQV$1CBBVVoG}0{;-A z*m7Oqhb9w|Z#6(c`Cl)DkS;Dm1HfT>!{6B~#wX`2cXEs_i z9E{xF$(Pob-M0IpN(#(}F08Of;f}bGmub0AzK%RV4T|rRb2MGM0+5NogHI+$%dS9g zPNF5+&64T$lgr}vsz#msBv-RkO9g1(>AJ&?_Oy4C|74^nJ4SHeG#DjDPl_Y7*C7ie zfS#x!AyP;wBQzosG)W5SpaPMZqM8R1Amu;l|hg;|GDEXpEz{kiezTiwwOEBZaeAXOE6mFhG4jVP`lW{QeCAHYNPTNf#qBPR?b0 z+owUvQ9VDrStC=Xv>o*!y zGIkDdc3gg<7u2Y2vcEQCO*lZad+uI@zty;0=y|0fSzO0r66esm9O%MT5@g~SQWt>g zO2d6VB@2CMH3ub>M^GZaJCu*pg-dR9p<*i_8EjhYvyWa_f~kw6yw`_uI0c z1StL9H!e_?M5k&37pTZ%{V?(#>g)H(WShYLB=I+($Ip@HCOW_TW~vPU00000001bp KFa00@0RR9*+SwZb literal 0 HcmV?d00001 diff --git a/hail_search/test_search.py b/hail_search/test_search.py index 8890d40ab1..2a052d2127 100644 --- a/hail_search/test_search.py +++ b/hail_search/test_search.py @@ -33,7 +33,16 @@ } }, 'genotypeFilters': '', - 'clinvar': None, + 'clinvar': { + 'alleleId': None, + 'conflictingPathogenicities': None, + 'goldStars': None, + 'pathogenicity': None, + 'assertions': None, + 'submitters': None, + 'conditions': None, + 'version': '2024-02-21', + }, 'hgmd': None, 'screenRegionType': None, 'populations': { diff --git a/hail_search/test_utils.py b/hail_search/test_utils.py index 515f2f4d62..7ee894af36 100644 --- a/hail_search/test_utils.py +++ b/hail_search/test_utils.py @@ -109,6 +109,8 @@ 'goldStars': None, 'pathogenicity': 'Likely_pathogenic', 'assertions': None, + 'submitters': None, + 'conditions': None, 'version': '2024-02-21', }, 'hgmd': None, @@ -181,6 +183,18 @@ 'pathogenicity': 'Conflicting_classifications_of_pathogenicity', 'assertions': ['other'], 'version': '2024-02-21', + 'submitters': [ + 'Broad Center for Mendelian Genomics, Broad Institute of MIT and Harvard', + 'Illumina Laboratory Services, Illumina', + 'Blueprint Genetics', + 'GenomeConnect, ClinGen' + ], + 'conditions': [ + 'ABCA4-Related Disorders', + 'Severe early-childhood-onset retinal dystrophy', + 'not specified', + 'not provided' + ], }, 'hgmd': {'accession': 'CM981315', 'class': 'DFP'}, 'screenRegionType': None, @@ -256,7 +270,16 @@ }, }, 'genotypeFilters': '', - 'clinvar': None, + 'clinvar': { + 'alleleId': None, + 'conflictingPathogenicities': None, + 'goldStars': None, + 'submitters': None, + 'conditions': None, + 'pathogenicity': None, + 'assertions': None, + 'version': '2024-02-21', + }, 'hgmd': None, 'screenRegionType': None, 'populations': { @@ -324,7 +347,16 @@ }, }, 'genotypeFilters': 'VQSRTrancheSNP99.95to100.00', - 'clinvar': None, + 'clinvar': { + 'alleleId': None, + 'conflictingPathogenicities': None, + 'goldStars': None, + 'submitters': None, + 'conditions': None, + 'pathogenicity': None, + 'assertions': None, + 'version': '2024-02-21', + }, 'hgmd': None, 'screenRegionType': 'CTCF-only', 'populations': { From 7837921508a356320d05f6aef6039f2bc332da95 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 17 Apr 2024 15:26:18 -0400 Subject: [PATCH 017/736] move to shared file --- .../FamilyTable/header/TableHeaderRow.jsx | 4 +- ui/pages/Project/constants.js | 98 ++----------------- ui/pages/Project/selectors.js | 75 +------------- ui/redux/selectors.js | 77 ++++++++++++++- ui/shared/utils/constants.js | 80 +++++++++++++++ 5 files changed, 171 insertions(+), 163 deletions(-) diff --git a/ui/pages/Project/components/FamilyTable/header/TableHeaderRow.jsx b/ui/pages/Project/components/FamilyTable/header/TableHeaderRow.jsx index 8952ac73cb..80cbc38a4f 100644 --- a/ui/pages/Project/components/FamilyTable/header/TableHeaderRow.jsx +++ b/ui/pages/Project/components/FamilyTable/header/TableHeaderRow.jsx @@ -16,7 +16,7 @@ import { } from '../../../selectors' import { updateFamiliesTable, updateFamiliesTableFilters } from '../../../reducers' import { - CATEGORY_FAMILY_FILTERS, + PROJECT_CATEGORY_FAMILY_FILTERS, CASE_REVIEW_FAMILY_FILTER_OPTIONS, FAMILY_SORT_OPTIONS, CASE_REVIEW_TABLE_NAME, @@ -151,7 +151,7 @@ const FamilyTableFilter = connect(mapFilterStateToProps, mapFilterDispatchToProp const familyFieldDisplay = (field) => { const { id } = field - return CATEGORY_FAMILY_FILTERS[id] ? : FAMILY_FIELD_NAME_LOOKUP[id] + return PROJECT_CATEGORY_FAMILY_FILTERS[id] ? : FAMILY_FIELD_NAME_LOOKUP[id] } const TableHeaderRow = React.memo(({ diff --git a/ui/pages/Project/constants.js b/ui/pages/Project/constants.js index bbb8c063cc..c4ef5a6bd1 100644 --- a/ui/pages/Project/constants.js +++ b/ui/pages/Project/constants.js @@ -2,7 +2,8 @@ import { stripMarkdown } from 'shared/utils/stringUtils' import { - SELECTABLE_FAMILY_ANALYSIS_STATUS_OPTIONS, + CATEGORY_FAMILY_FILTERS, + ASSIGNED_TO_ME_FILTER, FAMILY_FIELD_ID, FAMILY_DISPLAY_NAME, FAMILY_FIELD_DESCRIPTION, @@ -35,12 +36,7 @@ import { INDIVIDUAL_HPO_EXPORT_DATA, FAMILY_NOTES_FIELDS, SNP_DATA_TYPE, - FAMILY_ANALYSED_BY_DATA_TYPES, MME_TAG_NAME, - SAMPLE_TYPE_RNA, - DATASET_TYPE_SV_CALLS, - DATASET_TYPE_MITO_CALLS, - DATASET_TITLE_LOOKUP, } from 'shared/utils/constants' export const CASE_REVIEW_TABLE_NAME = 'Case Review' @@ -243,15 +239,10 @@ export const INDIVIDUAL_DETAIL_FIELDS = [ export const SHOW_IN_REVIEW = 'IN_REVIEW' const SHOW_ACCEPTED = 'ACCEPTED' -const SHOW_DATA_LOADED = 'SHOW_DATA_LOADED' const SHOW_PHENOTYPES_ENTERED = 'SHOW_PHENOTYPES_ENTERED' const SHOW_NO_PHENOTYPES_ENTERED = 'SHOW_NO_PHENOTYPES_ENTERED' const SHOW_ASSIGNED_TO_ME_IN_REVIEW = 'SHOW_ASSIGNED_TO_ME_IN_REVIEW' -const SHOW_ASSIGNED_TO_ME = 'SHOW_ASSIGNED_TO_ME' -const SHOW_ANALYSED_BY_ME = 'SHOW_ANALYSED_BY_ME' -const SHOW_ANALYSED = 'SHOW_ANALYSED' -const SHOW_NOT_ANALYSED = 'SHOW_NOT_ANALYSED' const getFamilyCaseReviewStatuses = (family, individualsByGuid) => { const statuses = family.individualGuids.map( @@ -268,9 +259,6 @@ const familyIsInReview = (family, individualsByGuid) => getFamilyCaseReviewStatu status => status === CASE_REVIEW_STATUS_IN_REVIEW, ) -const familyIsAssignedToMe = (family, user) => ( - family.assignedAnalyst ? family.assignedAnalyst.email === user.email : null) - const REQUIRED_METADATA_FIELDS = INDIVIDUAL_DETAIL_FIELDS.filter( ({ isRequiredInternal }) => isRequiredInternal, ).map(({ field, subFields }) => (subFields ? subFields[0].field : field)) @@ -295,76 +283,17 @@ const ACCEPTED_FILTER = { name: 'Accepted', createFilter: caseReviewStatusFilter(CASE_REVIEW_STATUS_ACCEPTED), } -const ASSIGNED_TO_ME_FILTER = { - value: SHOW_ASSIGNED_TO_ME, - name: 'Assigned To Me', - createFilter: (family, individualsByGuid, user) => familyIsAssignedToMe(family, user), -} -const ANALYST_HIGH_PRIORITY_TAG = 'Analyst high priority' -const hasMatchingSampleFilter = isMatchingSample => (family, individualsByGuid, user, samplesByFamily) => ( - (samplesByFamily[family.familyGuid] || []).some(sample => sample.isActive && isMatchingSample(sample))) +const ANALYST_HIGH_PRIORITY_TAG = 'Analyst high priority' -export const CATEGORY_FAMILY_FILTERS = { +export const PROJECT_CATEGORY_FAMILY_FILTERS = { + ...CATEGORY_FAMILY_FILTERS, [FAMILY_FIELD_ANALYSIS_STATUS]: [ - ...SELECTABLE_FAMILY_ANALYSIS_STATUS_OPTIONS.map(option => ({ - ...option, - createFilter: family => family.analysisStatus === option.value, - })), + ...CATEGORY_FAMILY_FILTERS[FAMILY_FIELD_ANALYSIS_STATUS], ...[ACCEPTED_FILTER, IN_REVIEW_FAMILIES_FILTER].map(filter => ({ ...filter, category: 'Case Review Status' })), ], - [FAMILY_FIELD_ANALYSED_BY]: [ - ASSIGNED_TO_ME_FILTER, - { - value: SHOW_ANALYSED_BY_ME, - name: 'Analysed By Me', - analysedByFilter: ({ createdBy }, user) => createdBy === (user.displayName || user.email), - }, - { - value: SHOW_ANALYSED, - name: 'Analysed', - analysedByFilter: () => true, - }, - { - value: SHOW_NOT_ANALYSED, - name: 'Not Analysed', - requireNoAnalysedBy: true, - analysedByFilter: () => true, - }, - ...FAMILY_ANALYSED_BY_DATA_TYPES.map(([type, typeDisplay]) => ({ - value: type, - name: typeDisplay, - category: 'Data Type', - analysedByFilter: ({ dataType }) => dataType === type, - })), - { - value: 'yearSinceAnalysed', - name: '>1 Year', - category: 'Analysis Date', - requireNoAnalysedBy: true, - analysedByFilter: ({ lastModifiedDate }) => ( - (new Date()).setFullYear(new Date().getFullYear() - 1) < new Date(lastModifiedDate) - ), - }, - ], [FAMILY_FIELD_FIRST_SAMPLE]: [ - { - value: SHOW_DATA_LOADED, - name: 'Data Loaded', - createFilter: hasMatchingSampleFilter(() => true), - }, - { - value: `${SHOW_DATA_LOADED}_RNA`, - name: 'Data Loaded - RNA', - createFilter: hasMatchingSampleFilter(({ sampleType }) => sampleType === SAMPLE_TYPE_RNA), - }, - ...[DATASET_TYPE_SV_CALLS, DATASET_TYPE_MITO_CALLS].map(dataType => ({ - value: `${SHOW_DATA_LOADED}_${dataType}`, - name: `Data Loaded -${DATASET_TITLE_LOOKUP[dataType]}`, - createFilter: hasMatchingSampleFilter( - ({ sampleType, datasetType }) => sampleType !== SAMPLE_TYPE_RNA && datasetType === dataType, - ), - })), + ...CATEGORY_FAMILY_FILTERS[FAMILY_FIELD_FIRST_SAMPLE], { value: SHOW_PHENOTYPES_ENTERED, name: 'Required Metadata Entered', @@ -382,22 +311,13 @@ export const CATEGORY_FAMILY_FILTERS = { })), } -export const FAMILY_FILTER_LOOKUP = Object.values(CATEGORY_FAMILY_FILTERS).reduce( - (acc, options) => { - options.forEach((opt) => { - acc[opt.value] = opt.createFilter - }) - return acc - }, {}, -) - export const CASE_REVIEW_FAMILY_FILTER_OPTIONS = [ ALL_FAMILIES_FILTER, { value: SHOW_ASSIGNED_TO_ME_IN_REVIEW, name: 'Assigned To Me - In Review', - createFilter: (family, individualsByGuid, user) => familyIsAssignedToMe( - family, user, + createFilter: (family, individualsByGuid, user) => ASSIGNED_TO_ME_FILTER.createFilter( + family, individualsByGuid, user, ) && familyIsInReview(family, individualsByGuid), }, { ...ASSIGNED_TO_ME_FILTER, name: 'Assigned To Me - All' }, diff --git a/ui/pages/Project/selectors.js b/ui/pages/Project/selectors.js index f7274a0534..6445bbbb24 100644 --- a/ui/pages/Project/selectors.js +++ b/ui/pages/Project/selectors.js @@ -22,21 +22,20 @@ import { getMmeResultsByGuid, getMmeSubmissionsByGuid, getHasActiveSearchableSampleByFamily, getSelectableTagTypesByProject, getVariantTagsByGuid, getUserOptionsByUsername, getSamplesByFamily, getNotesByFamilyType, getVariantTagNotesByFamilyVariants, getPhenotypeGeneScoresByIndividual, - getRnaSeqDataByIndividual, + getRnaSeqDataByIndividual, familyPassesFilters, } from 'redux/selectors' import { SORT_BY_FAMILY_NAME, CASE_REVIEW_STATUS_OPTIONS, CASE_REVIEW_FILTER_LOOKUP, - FAMILY_FILTER_LOOKUP, FAMILY_SORT_OPTIONS, FAMILY_EXPORT_DATA, CASE_REVIEW_FAMILY_EXPORT_DATA, CASE_REVIEW_TABLE_NAME, CASE_REVIEW_INDIVIDUAL_EXPORT_DATA, SAMPLE_EXPORT_DATA, - CATEGORY_FAMILY_FILTERS, + PROJECT_CATEGORY_FAMILY_FILTERS, } from './constants' const FAMILY_SORT_LOOKUP = FAMILY_SORT_OPTIONS.reduce( @@ -361,86 +360,20 @@ const getFamilyAnalysers = createSelector( export const getFamiliesFilterOptionsByCategory = createSelector( getFamilyAnalysers, analysedByOptions => ({ - ...CATEGORY_FAMILY_FILTERS, + ...PROJECT_CATEGORY_FAMILY_FILTERS, [FAMILY_FIELD_ANALYSED_BY]: [ - ...CATEGORY_FAMILY_FILTERS[FAMILY_FIELD_ANALYSED_BY], + ...PROJECT_CATEGORY_FAMILY_FILTERS[FAMILY_FIELD_ANALYSED_BY], ...[...analysedByOptions].map(analysedBy => ({ value: analysedBy, category: 'Analysed By' })), ], }), ) -const ANALYSED_BY_FILTER_LOOKUP = Object.values(CATEGORY_FAMILY_FILTERS).reduce( - (acc, options) => { - options.forEach((opt) => { - acc[opt.value] = opt.analysedByFilter - }) - return acc - }, {}, -) - -const NO_ANALYSED_BY_FIELDS = Object.values(CATEGORY_FAMILY_FILTERS).reduce( - (acc, options) => { - options.filter(opt => opt.requireNoAnalysedBy).forEach((opt) => { - acc.add(opt.value) - }) - return acc - }, new Set(), -) - -const ANALYSED_BY_CATEGORY_OPTION_LOOKUP = CATEGORY_FAMILY_FILTERS[FAMILY_FIELD_ANALYSED_BY].reduce( - (acc, { value, category }) => ({ ...acc, [value]: category || 'Analysed By' }), {}, -) - -const isAnalysedBy = (family, analysedByFilter, user, analysedByOptions) => { - let requireNoAnalysedBy = false - const analsedByGroups = Object.values(analysedByFilter.reduce( - (acc, val) => { - const optFilter = analysedByOptions?.has(val) ? ({ createdBy }) => createdBy === val : - ANALYSED_BY_FILTER_LOOKUP[val] - if (optFilter) { - const category = ANALYSED_BY_CATEGORY_OPTION_LOOKUP[val] - if (!acc[category]) { - acc[category] = [] - } - acc[category].push(optFilter) - } - if (NO_ANALYSED_BY_FIELDS.has(val)) { - requireNoAnalysedBy = true - } - return acc - }, {}, - )) - if (!analsedByGroups.length) { - return true - } - const filteredAnalysedBy = analsedByGroups.reduce( - (acc, filterGroup) => acc.filter(analysedBy => filterGroup.some(f => f(analysedBy, user))), - family.analysedBy, - ) - return requireNoAnalysedBy ? filteredAnalysedBy.length === 0 : filteredAnalysedBy.length > 0 -} - export const getFamiliesTableFilters = createSelector( getFamiliesTableFiltersByProject, getProjectGuid, (familyTableFiltersByProject, projectGuid) => (familyTableFiltersByProject || {})[projectGuid], ) -const familyPassesFilters = createSelector( - getIndividualsByGuid, - getUser, - getSamplesByFamily, - (individualsByGuid, user, samplesByFamily) => (family, groupedFilters, analysedByOptions) => { - if (groupedFilters.analysedBy && !isAnalysedBy(family, groupedFilters.analysedBy, user, analysedByOptions)) { - return false - } - return Object.values(groupedFilters).every((groupVals) => { - const filters = (groupVals || []).map(val => FAMILY_FILTER_LOOKUP[val]).filter(val => val) - return !filters.length || filters.some(filter => filter(family, individualsByGuid, user, samplesByFamily)) - }) - }, -) - const familyPassesTableFilters = createSelector( (state, ownProps) => ownProps?.tableName === CASE_REVIEW_TABLE_NAME, state => state.caseReviewTableState.familiesFilter, diff --git a/ui/redux/selectors.js b/ui/redux/selectors.js index d1e82bc453..68237965ab 100644 --- a/ui/redux/selectors.js +++ b/ui/redux/selectors.js @@ -3,7 +3,7 @@ import uniqWith from 'lodash/uniqWith' import { compHetGene } from 'shared/components/panel/variants/VariantUtils' import { compareObjects } from 'shared/utils/sortUtils' -import { NOTE_TAG_NAME, MME_TAG_NAME } from 'shared/utils/constants' +import { NOTE_TAG_NAME, MME_TAG_NAME, FAMILY_FIELD_ANALYSED_BY, CATEGORY_FAMILY_FILTERS } from 'shared/utils/constants' export const getProjectsIsLoading = state => state.projectsLoading.isLoading export const getProjectDetailsIsLoading = state => state.projectDetailsLoading.isLoading @@ -453,3 +453,78 @@ export const getSpliceOutliersByChromFamily = createSelector( (acc, spliceData) => (groupDataNestedByChrom(acc, spliceData, spliceData[0].familyGuid)), {}, ), ) + +const FAMILY_FILTER_LOOKUP = Object.values(CATEGORY_FAMILY_FILTERS).reduce( + (acc, options) => { + options.forEach((opt) => { + acc[opt.value] = opt.createFilter + }) + return acc + }, {}, +) + +const ANALYSED_BY_FILTER_LOOKUP = Object.values(CATEGORY_FAMILY_FILTERS).reduce( + (acc, options) => { + options.forEach((opt) => { + acc[opt.value] = opt.analysedByFilter + }) + return acc + }, {}, +) + +const NO_ANALYSED_BY_FIELDS = Object.values(CATEGORY_FAMILY_FILTERS).reduce( + (acc, options) => { + options.filter(opt => opt.requireNoAnalysedBy).forEach((opt) => { + acc.add(opt.value) + }) + return acc + }, new Set(), +) + +const ANALYSED_BY_CATEGORY_OPTION_LOOKUP = CATEGORY_FAMILY_FILTERS[FAMILY_FIELD_ANALYSED_BY].reduce( + (acc, { value, category }) => ({ ...acc, [value]: category || 'Analysed By' }), {}, +) + +const isAnalysedBy = (family, analysedByFilter, user, analysedByOptions) => { + let requireNoAnalysedBy = false + const analsedByGroups = Object.values(analysedByFilter.reduce( + (acc, val) => { + const optFilter = analysedByOptions?.has(val) ? ({ createdBy }) => createdBy === val : + ANALYSED_BY_FILTER_LOOKUP[val] + if (optFilter) { + const category = ANALYSED_BY_CATEGORY_OPTION_LOOKUP[val] + if (!acc[category]) { + acc[category] = [] + } + acc[category].push(optFilter) + } + if (NO_ANALYSED_BY_FIELDS.has(val)) { + requireNoAnalysedBy = true + } + return acc + }, {}, + )) + if (!analsedByGroups.length) { + return true + } + const filteredAnalysedBy = analsedByGroups.reduce( + (acc, filterGroup) => acc.filter(analysedBy => filterGroup.some(f => f(analysedBy, user))), + family.analysedBy, + ) + return requireNoAnalysedBy ? filteredAnalysedBy.length === 0 : filteredAnalysedBy.length > 0 +} + +export const familyPassesFilters = createSelector( + getIndividualsByGuid, + getUser, + getSamplesByFamily, + (individualsByGuid, user, samplesByFamily) => (family, groupedFilters, analysedByOptions) => { + if (groupedFilters.analysedBy && !isAnalysedBy(family, groupedFilters.analysedBy, user, analysedByOptions)) { + return false + } + return Object.values(groupedFilters).every((groupVals) => { + const filters = (groupVals || []).map(val => FAMILY_FILTER_LOOKUP[val]).filter(val => val) + return !filters.length || filters.some(filter => filter(family, individualsByGuid, user, samplesByFamily)) + }) + }, +) diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js index c9b2b34e2d..75d83a8bd7 100644 --- a/ui/shared/utils/constants.js +++ b/ui/shared/utils/constants.js @@ -299,6 +299,86 @@ export const FAMILY_DETAIL_FIELDS = [ { id: FAMILY_FIELD_PMIDS }, ] +const SHOW_DATA_LOADED = 'SHOW_DATA_LOADED' +const SHOW_ASSIGNED_TO_ME = 'SHOW_ASSIGNED_TO_ME' +const SHOW_ANALYSED_BY_ME = 'SHOW_ANALYSED_BY_ME' +const SHOW_ANALYSED = 'SHOW_ANALYSED' +const SHOW_NOT_ANALYSED = 'SHOW_NOT_ANALYSED' + +const hasMatchingSampleFilter = isMatchingSample => (family, individualsByGuid, user, samplesByFamily) => ( + (samplesByFamily[family.familyGuid] || []).some(sample => sample.isActive && isMatchingSample(sample))) + +const familyIsAssignedToMe = (family, user) => ( + family.assignedAnalyst ? family.assignedAnalyst.email === user.email : null) + +export const ASSIGNED_TO_ME_FILTER = { + value: SHOW_ASSIGNED_TO_ME, + name: 'Assigned To Me', + createFilter: (family, individualsByGuid, user) => familyIsAssignedToMe(family, user), +} + +export const CATEGORY_FAMILY_FILTERS = { + [FAMILY_FIELD_ANALYSIS_STATUS]: [ + ...SELECTABLE_FAMILY_ANALYSIS_STATUS_OPTIONS.map(option => ({ + ...option, + createFilter: family => family.analysisStatus === option.value, + })), + ], + [FAMILY_FIELD_ANALYSED_BY]: [ + ASSIGNED_TO_ME_FILTER, + { + value: SHOW_ANALYSED_BY_ME, + name: 'Analysed By Me', + analysedByFilter: ({ createdBy }, user) => createdBy === (user.displayName || user.email), + }, + { + value: SHOW_ANALYSED, + name: 'Analysed', + analysedByFilter: () => true, + }, + { + value: SHOW_NOT_ANALYSED, + name: 'Not Analysed', + requireNoAnalysedBy: true, + analysedByFilter: () => true, + }, + ...FAMILY_ANALYSED_BY_DATA_TYPES.map(([type, typeDisplay]) => ({ + value: type, + name: typeDisplay, + category: 'Data Type', + analysedByFilter: ({ dataType }) => dataType === type, + })), + { + value: 'yearSinceAnalysed', + name: '>1 Year', + category: 'Analysis Date', + requireNoAnalysedBy: true, + analysedByFilter: ({ lastModifiedDate }) => ( + (new Date()).setFullYear(new Date().getFullYear() - 1) < new Date(lastModifiedDate) + ), + }, + ], + [FAMILY_FIELD_FIRST_SAMPLE]: [ + { + value: SHOW_DATA_LOADED, + name: 'Data Loaded', + createFilter: hasMatchingSampleFilter(() => true), + }, + { + value: `${SHOW_DATA_LOADED}_RNA`, + name: 'Data Loaded - RNA', + createFilter: hasMatchingSampleFilter(({ sampleType }) => sampleType === SAMPLE_TYPE_RNA), + }, + ...[DATASET_TYPE_SV_CALLS, DATASET_TYPE_MITO_CALLS].map(dataType => ({ + value: `${SHOW_DATA_LOADED}_${dataType}`, + name: `Data Loaded -${DATASET_TITLE_LOOKUP[dataType]}`, + createFilter: hasMatchingSampleFilter( + ({ sampleType, datasetType }) => sampleType !== SAMPLE_TYPE_RNA && datasetType === dataType, + ), + })), + ], +} + // INDIVIDUAL FIELDS export const SEX_OPTIONS = [ From 9daeb335d9d576232bb780005c273c6c100d25f4 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 17 Apr 2024 15:42:20 -0400 Subject: [PATCH 018/736] actually apply all possible filters --- ui/pages/Project/selectors.js | 2 +- ui/redux/selectors.js | 19 ++++++------------- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/ui/pages/Project/selectors.js b/ui/pages/Project/selectors.js index 6445bbbb24..e83e5c5b1f 100644 --- a/ui/pages/Project/selectors.js +++ b/ui/pages/Project/selectors.js @@ -397,7 +397,7 @@ const familyPassesTableFilters = createSelector( )) { return false } - return passesFilterFunc(family, tableFilters, analysedByOptions) + return passesFilterFunc(family, tableFilters, analysedByOptions, PROJECT_CATEGORY_FAMILY_FILTERS) }, ) diff --git a/ui/redux/selectors.js b/ui/redux/selectors.js index 68237965ab..7c5bfb94b5 100644 --- a/ui/redux/selectors.js +++ b/ui/redux/selectors.js @@ -454,15 +454,6 @@ export const getSpliceOutliersByChromFamily = createSelector( ), ) -const FAMILY_FILTER_LOOKUP = Object.values(CATEGORY_FAMILY_FILTERS).reduce( - (acc, options) => { - options.forEach((opt) => { - acc[opt.value] = opt.createFilter - }) - return acc - }, {}, -) - const ANALYSED_BY_FILTER_LOOKUP = Object.values(CATEGORY_FAMILY_FILTERS).reduce( (acc, options) => { options.forEach((opt) => { @@ -518,13 +509,15 @@ export const familyPassesFilters = createSelector( getIndividualsByGuid, getUser, getSamplesByFamily, - (individualsByGuid, user, samplesByFamily) => (family, groupedFilters, analysedByOptions) => { + (individualsByGuid, user, samplesByFamily) => ( + family, groupedFilters, analysedByOptions, categoryFilters = CATEGORY_FAMILY_FILTERS, + ) => { if (groupedFilters.analysedBy && !isAnalysedBy(family, groupedFilters.analysedBy, user, analysedByOptions)) { return false } - return Object.values(groupedFilters).every((groupVals) => { - const filters = (groupVals || []).map(val => FAMILY_FILTER_LOOKUP[val]).filter(val => val) - return !filters.length || filters.some(filter => filter(family, individualsByGuid, user, samplesByFamily)) + return Object.entries(groupedFilters).every(([key, groupVals]) => { + const filters = categoryFilters[key]?.filter(opt => groupVals.includes(opt.value)).map(opt => opt.createFilter) + return !filters?.length || filters.some(filter => filter(family, individualsByGuid, user, samplesByFamily)) }) }, ) From 6ff80e7e3865b95e8dba46d991b59434bf7a363a Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 17 Apr 2024 16:06:22 -0400 Subject: [PATCH 019/736] clean up --- ui/pages/Project/constants.js | 24 ++++++++++-------------- ui/pages/Project/selectors.js | 15 ++++++++------- ui/redux/selectors.js | 9 +++++---- ui/shared/utils/constants.js | 8 +++----- 4 files changed, 26 insertions(+), 30 deletions(-) diff --git a/ui/pages/Project/constants.js b/ui/pages/Project/constants.js index c4ef5a6bd1..f63465019d 100644 --- a/ui/pages/Project/constants.js +++ b/ui/pages/Project/constants.js @@ -244,18 +244,18 @@ const SHOW_NO_PHENOTYPES_ENTERED = 'SHOW_NO_PHENOTYPES_ENTERED' const SHOW_ASSIGNED_TO_ME_IN_REVIEW = 'SHOW_ASSIGNED_TO_ME_IN_REVIEW' -const getFamilyCaseReviewStatuses = (family, individualsByGuid) => { - const statuses = family.individualGuids.map( - individualGuid => (individualsByGuid[individualGuid] || {}).caseReviewStatus, +const getFamilyCaseReviewStatuses = (family) => { + const statuses = family.individuals.map( + individual => (individual || {}).caseReviewStatus, ).filter(status => status) return statuses.length ? statuses : family.caseReviewStatuses } -const caseReviewStatusFilter = status => (family, individualsByGuid) => getFamilyCaseReviewStatuses( - family, individualsByGuid, +const caseReviewStatusFilter = status => family => getFamilyCaseReviewStatuses( + family, ).some(caseReviewStatus => caseReviewStatus === status) -const familyIsInReview = (family, individualsByGuid) => getFamilyCaseReviewStatuses(family, individualsByGuid).every( +const familyIsInReview = family => getFamilyCaseReviewStatuses(family).every( status => status === CASE_REVIEW_STATUS_IN_REVIEW, ) @@ -263,10 +263,8 @@ const REQUIRED_METADATA_FIELDS = INDIVIDUAL_DETAIL_FIELDS.filter( ({ isRequiredInternal }) => isRequiredInternal, ).map(({ field, subFields }) => (subFields ? subFields[0].field : field)) -const familyHasRequiredMetadata = (family, individualsByGuid) => { - const individuals = family.individualGuids.map( - individualGuid => individualsByGuid[individualGuid], - ).filter(individual => individual) +const familyHasRequiredMetadata = (family) => { + const individuals = family.individuals.filter(individual => individual) return individuals.length ? individuals.some(individual => REQUIRED_METADATA_FIELDS.every( field => individual[field] || individual[field] === false, ) && individual.features.length > 0) : family.hasRequiredMetadata @@ -302,7 +300,7 @@ export const PROJECT_CATEGORY_FAMILY_FILTERS = { { value: SHOW_NO_PHENOTYPES_ENTERED, name: 'Required Metadata Missing', - createFilter: (family, individualsByGuid) => !familyHasRequiredMetadata(family, individualsByGuid), + createFilter: family => !familyHasRequiredMetadata(family), }, ], [FAMILY_FIELD_SAVED_VARIANTS]: [MME_TAG_NAME, ANALYST_HIGH_PRIORITY_TAG].map(tagName => ({ @@ -316,9 +314,7 @@ export const CASE_REVIEW_FAMILY_FILTER_OPTIONS = [ { value: SHOW_ASSIGNED_TO_ME_IN_REVIEW, name: 'Assigned To Me - In Review', - createFilter: (family, individualsByGuid, user) => ASSIGNED_TO_ME_FILTER.createFilter( - family, individualsByGuid, user, - ) && familyIsInReview(family, individualsByGuid), + createFilter: (family, user) => ASSIGNED_TO_ME_FILTER.createFilter(family, user) && familyIsInReview(family), }, { ...ASSIGNED_TO_ME_FILTER, name: 'Assigned To Me - All' }, { ...IN_REVIEW_FAMILIES_FILTER, category: 'Case Review Status:' }, diff --git a/ui/pages/Project/selectors.js b/ui/pages/Project/selectors.js index e83e5c5b1f..b0b43754e9 100644 --- a/ui/pages/Project/selectors.js +++ b/ui/pages/Project/selectors.js @@ -379,16 +379,14 @@ const familyPassesTableFilters = createSelector( state => state.caseReviewTableState.familiesFilter, getFamiliesTableFilters, getFamilyAnalysers, - getIndividualsByGuid, getUser, getFamilyTagTypeCounts, familyPassesFilters, ( - isCaseReview, caseReviewFilter, familyTableFilters, analysedByOptions, individualsByGuid, user, - familyTagTypeCounts, passesFilterFunc, + isCaseReview, caseReviewFilter, familyTableFilters, analysedByOptions, user, familyTagTypeCounts, passesFilterFunc, ) => (family) => { if (isCaseReview) { - return CASE_REVIEW_FILTER_LOOKUP[caseReviewFilter](family, individualsByGuid, user) + return CASE_REVIEW_FILTER_LOOKUP[caseReviewFilter](family, user) } const { savedVariants, ...tableFilters } = familyTableFilters || {} @@ -403,16 +401,19 @@ const familyPassesTableFilters = createSelector( export const getVisibleFamilies = createSelector( getProjectAnalysisGroupFamiliesByGuid, + getIndividualsByGuid, getFamiliesBySearchString, getFamiliesSearch, familyPassesTableFilters, - (familiesByGuid, familiesBySearchString, familiesSearch, familyFilter) => { + (familiesByGuid, individualsByGuid, familiesBySearchString, familiesSearch, familyFilter) => { const searchedFamilies = familiesBySearchString ? Object.keys(familiesBySearchString).filter( familySearchString => familySearchString.includes(familiesSearch), ).map(familySearchString => familiesBySearchString[familySearchString]) : Object.values(familiesByGuid) return familyFilter ? - searchedFamilies.filter(familyFilter) : - searchedFamilies + searchedFamilies.filter(family => familyFilter({ + ...family, + individuals: family.individualGuids.map(individualGuid => (individualsByGuid[individualGuid])), + })) : searchedFamilies }, ) diff --git a/ui/redux/selectors.js b/ui/redux/selectors.js index 7c5bfb94b5..ad9d0694e2 100644 --- a/ui/redux/selectors.js +++ b/ui/redux/selectors.js @@ -506,18 +506,19 @@ const isAnalysedBy = (family, analysedByFilter, user, analysedByOptions) => { } export const familyPassesFilters = createSelector( - getIndividualsByGuid, getUser, getSamplesByFamily, - (individualsByGuid, user, samplesByFamily) => ( + (user, samplesByFamily) => ( family, groupedFilters, analysedByOptions, categoryFilters = CATEGORY_FAMILY_FILTERS, ) => { if (groupedFilters.analysedBy && !isAnalysedBy(family, groupedFilters.analysedBy, user, analysedByOptions)) { return false } return Object.entries(groupedFilters).every(([key, groupVals]) => { - const filters = categoryFilters[key]?.filter(opt => groupVals.includes(opt.value)).map(opt => opt.createFilter) - return !filters?.length || filters.some(filter => filter(family, individualsByGuid, user, samplesByFamily)) + const filters = categoryFilters[key]?.filter( + opt => groupVals.includes(opt.value) && opt.createFilter, + ).map(opt => opt.createFilter) + return !filters?.length || filters.some(filter => filter(family, user, samplesByFamily)) }) }, ) diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js index 75d83a8bd7..2a453c0f59 100644 --- a/ui/shared/utils/constants.js +++ b/ui/shared/utils/constants.js @@ -305,16 +305,14 @@ const SHOW_ANALYSED_BY_ME = 'SHOW_ANALYSED_BY_ME' const SHOW_ANALYSED = 'SHOW_ANALYSED' const SHOW_NOT_ANALYSED = 'SHOW_NOT_ANALYSED' -const hasMatchingSampleFilter = isMatchingSample => (family, individualsByGuid, user, samplesByFamily) => ( +const hasMatchingSampleFilter = isMatchingSample => (family, user, samplesByFamily) => ( (samplesByFamily[family.familyGuid] || []).some(sample => sample.isActive && isMatchingSample(sample))) -const familyIsAssignedToMe = (family, user) => ( - family.assignedAnalyst ? family.assignedAnalyst.email === user.email : null) - export const ASSIGNED_TO_ME_FILTER = { value: SHOW_ASSIGNED_TO_ME, name: 'Assigned To Me', - createFilter: (family, individualsByGuid, user) => familyIsAssignedToMe(family, user), + createFilter: (family, user) => ( + family.assignedAnalyst ? family.assignedAnalyst.email === user.email : null), } export const CATEGORY_FAMILY_FILTERS = { From 75be69864944373554e3d90fc52a83d9bf6b1283 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Thu, 18 Apr 2024 10:29:58 -0400 Subject: [PATCH 020/736] fix annotations --- .../SNV_INDEL/annotations.ht/.README.txt.crc | Bin 12 -> 12 bytes .../SNV_INDEL/annotations.ht/README.txt | 2 +- .../.index.crc | Bin 12 -> 0 bytes .../index | Bin 139 -> 0 bytes .../.index.crc | Bin 0 -> 12 bytes .../.metadata.json.gz.crc | Bin .../index | Bin 0 -> 139 bytes .../metadata.json.gz | Bin .../annotations.ht/rows/.metadata.json.gz.crc | Bin 20 -> 20 bytes .../annotations.ht/rows/metadata.json.gz | Bin 1371 -> 1369 bytes ...0-1431d7f7-8f3e-4e29-878c-69a273b63db2.crc | Bin 20 -> 0 bytes ...0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b.crc | Bin 0 -> 20 bytes ...art-0-1431d7f7-8f3e-4e29-878c-69a273b63db2 | Bin 1508 -> 0 bytes ...art-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b | Bin 0 -> 1502 bytes hail_search/test_search.py | 11 +-------- hail_search/test_utils.py | 22 ++---------------- 16 files changed, 4 insertions(+), 31 deletions(-) delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2.idx/.index.crc delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2.idx/index create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b.idx/.index.crc rename hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/{part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2.idx => part-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b.idx}/.metadata.json.gz.crc (100%) create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b.idx/index rename hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/{part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2.idx => part-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b.idx}/metadata.json.gz (100%) delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/.part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/.part-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b.crc delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2 create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/part-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/.README.txt.crc index c130205bef2210481f02941663d7bd5a21b425af..6408cf00b610aae2242e225f06b01296377afec9 100644 GIT binary patch literal 12 TcmYc;N@ieSU}BK9V?G4{4}Jn; literal 12 TcmYc;N@ieSU}CVz-xdJ?5rG2q diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/README.txt b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/README.txt index c2116e76b9..2b30b458a1 100644 --- a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/README.txt +++ b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/README.txt @@ -1,3 +1,3 @@ This folder comprises a Hail (www.hail.is) native Table or MatrixTable. Written with version 0.2.128-eead8100a1c1 - Created at 2024/04/17 12:57:34 \ No newline at end of file + Created at 2024/04/18 10:25:58 \ No newline at end of file diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2.idx/.index.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2.idx/.index.crc deleted file mode 100644 index 2dd0312d7030509569218006d0090c18c5f6710d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D(Mc5FTX61f9q diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2.idx/index b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2.idx/index deleted file mode 100644 index 1ddc2ce5c2a9114da078191961da002f489cea02..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 139 zcmYddU|>iEVvVi(e-%>KGBKF2Ffgz&uq0;`87}f?Vs>~Px2xKHadcw-Y z7{cg$mGSVg-9qlCnHxV|&}L$EXLP*7-?sfCkQu`0-XP4zkWgXJoTgyRKFRW&WSWE` T&;}8hEg}mU7#JCWJTyxH%|s&f diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b.idx/.index.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b.idx/.index.crc new file mode 100644 index 0000000000000000000000000000000000000000..0577f8ecbc8a1d2dfcdee9db2e0b7faf671e97e1 GIT binary patch literal 12 TcmYc;N@ieSU}Ct@dr$xX6d40? literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b.idx/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2.idx/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b.idx/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b.idx/index b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b.idx/index new file mode 100644 index 0000000000000000000000000000000000000000..7203c8999a09bee25bc428e4cc0bd5e3e8258d0e GIT binary patch literal 139 zcmYddU|>iEVvVi(e-%>KGBKF2Ffgz&uq0;`87}f?Vs>~Px2xKHYdcw-Y z7{cg$h4JvQ-9qjsnHxV|&}L$EXLP*9-?sfCkQu`0{#S^NA)&&cIZeTseUjxl$utQ? Upba80TSOKzFfcL#d1#gZ0OlGaY5)KL literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2.idx/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b.idx/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2.idx/metadata.json.gz rename to hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b.idx/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/.metadata.json.gz.crc index 27a35cf2f3efd9e7fdd9adf789d4d98fe2042c3d..66c0bcabb7a829b28f367b0afe6724bb03153a4b 100644 GIT binary patch literal 20 ccmYc;N@ieSU}8A2Jm%55*FL!ijZzi@07y6pG5`Po literal 20 bcmYc;N@ieSU}DJM{e1h9kTnVcvdnJ)LZ=5K diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/metadata.json.gz index 8328e83fe8ab79a189ab8bf9f8d793060af18080..666bcdacb8cb38eac9a4f8268edd46b05f7825c3 100644 GIT binary patch literal 1369 zcmV-f1*ZBRiwFP!000000Nt2dPvbZgfd5OLcC=xZQYa;FZKq|>a;aK`R*O*N#!l-Y zak93H=%D=fJNmvXp!q$d}xlr_Tfl zlXUHY6lI9-+%yqX(uBdx1(W@w%q?Ae(-G%rcV9_9xP09|^*>{g3yQsJStMK%>`RWA z2slk8m**dQ!n4?S@d-T&6LBfeQoka=C_XPYA z!VDo!*061UnYA5d1~tdnP?&#TOuuP1gNQPS*pMl@=|V4J7|}pNuyZ6g32YP%pkIuJ zYt&OsW@qfh7E){qwN4_ul!%K;V4orqf^gNIT^k{`tC&jIC9Mz3=7+d(-qx+9%*87v z!qiB6F}<5!T19v7rpw!7d-7^4=Nw*$_B2xNZjn%3lPZxWi{-`a*3f`vm?zOL-4J%< zf4>_o?9~wJ5y$@3-`OW-l^iLE0Xr?_+ z{1N_9MGm9yMCzj?Hr8TV53e=FtZH`hoWw*}0chT?8*9~*?xKp-zghU_mp9|lWurTT zRo#{rOzj>jvfn?{`P2SE_f`LKYF7PfKG8NM~Hv@0w zYDKqlSinsiq}9MgTREwb?G>;U5q4s257i6}n`s?HE5pfw_70v|D6>hn2V#0qCBh7P zwcurQ>rHGKTiL>nj;l7T_OPl8RYr9+P?^1{VaiyjA=SaBEcmh(0PVo5&;RD&+kg67@G;K*ZuD3_3+Q~@X9Jur zp9(h`X}UmTFs1`B9dvmeE+$)=W4%7hTQzRdtaq{`N(&~{!CJAzMoG)vb=&Wza#s*! zwGBhUt()29blf`{j+-Ud2{tssn#dU7@d&{fLMt`7q(4DmaB|WgwAKC<66dNP)Pu|j zifzqI{evW^x{F#lTvT)U)s&M+op~_-_3K}`xS9n5g!u*ZUAIyl)FI_U0pPj#euqAU z%ddbz$M-+1c)#pMo4H9C3L@(`5MxPkvYAe^?4j$Ep-Dtb!@z+G|D)d*byTgGtZ06d z_5*@26kS8te6F2xf1I%H2Fm+YeWeiJ+U#~47Bj!NL)<&-cf^t!-L`J89KuF^q@aD zSsfgQz3^Z-An-Im$G9IH4iC>(`7ee)7Kp9M7(P0&(tmLPaa^jzir*LEp=e?78fW>k%0``mrvFXqLfD8iOwT~Z%EV}Et`eVt&hXW5qfh!2)? zZ-ue9=P*WLh(pY+2UzlAAA3^Ja$ZUcw}%A#(`o^vu$+RGKW7uf@mjFGCtfmhjymJT zRFJI$QiLLQu#=b*L1GFsmsIq7sa?2sW@E_Fyp+bR6(H8!|Rvd>IQCrRhBm}KCB_#lO zdaMffU_T*75Y^&(hWcg^4NmMWjU(i{Pzu>3gl@B1f;mEkwnsl*?!^o{vw0Y!Y)`-+ z##9qxcnjO+7FnB7rciU=*A$i)tJ!zOrWXPCm`A-61y07ig?g4 zTF2Gui6V2*He(Mt)|J}Ep}!W0@lxQJBH|n~*`IAY!S|bp2-qg25sU7Jxc#iJ8%wF} zZzvBEtv0LK-E3i0-M*WxZ%@q0tG!qa4T((UjMd~cpm-Aaq1G-@uhlgZ` zY0tg58?VgO0O}D%?$zImvR8D0)W&3Mbi_=~xXBKDE8C2pk*A4`=kD@mc{Tp0j($8> zo~Q2E|4~K`qYEt5Q5Vc>GznLz*MQUb zR;-qID~37UltEGsbhZ_f3ff*FTOMI6*N$LK(=egdg0v!>jA-uwn!z#?Wk)Ec230Uj z!&ie|Cb!~BYp;WLNM$9>ko zY4a&@qmZTzG#X=C5Ys}J*WseGr9RfHv%FE`I?ZY)3!*e&QXZ@&OH7nB++8>Q?n-w# zK{op!z|6RsEoPH$Z#1cwY|Gn`&{sqz0FOrqMibh|$u;>20)x}j{-CM$FBg8M`awO& z45P@@OyxgFg2>CLk;7#TiUUy^=O>%)G)*7cJ{jsnlr%IP$nZb~7gnHrL$Ll;c dNFG;nGwDRmq%s{O1e3nf^nW7C1yMs4001nup~nCK diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/.part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/.part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2.crc deleted file mode 100644 index 6ab56959aad8424f649fe998d4a25f9b3e147a84..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 ccmYc;N@ieSU}E?_fidRy(U(0XXTP@q07!TV5C8xG diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/.part-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/.part-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b.crc new file mode 100644 index 0000000000000000000000000000000000000000..ce34c02d08777a38c18364f947da110583ef40ca GIT binary patch literal 20 bcmYc;N@ieSU}9iWf78o2Q;JW`XKfh(FRukO literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2 b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/part-0-1431d7f7-8f3e-4e29-878c-69a273b63db2 deleted file mode 100644 index 50a0d0e31f8917f031170866e72ac1229bd1c6e0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1508 zcmVbv8J#h*tNP+bE4lz@C7rDnM&XiRht2(>9p`1Of8^ z5CMOC{Y3vsYE~gON&jis2smwm1}m_HLt1LIMP`;aHzhX@zO4P@#3oN5IPb_cs$A{s z$ZeXdy{BnP^z=YH996MT)G1W)Zq22 zximN=2M(H%2CKk;3Db4-OJ;kjg78S9&u;v)OuWprM1VN$A|RSfkXFc$4W?@IseHv3|(9h#L&fN<@I;{e~Q*)tJN`xa|2JLX+;WW zu>Aj*-E7S9`hQ(pLnqFeJHD?wqMdI(@}%dd$<6gl&7W+uaK^Q(-`A~y7S6Z~rVEv< zybin`yo=#1QTBxpWq6L^SKQ@%vK8*bqSr=xNa=ObhaYMhDBe9{npM1f`5GQ0Y0_{b z*SPZD%TzT-9sudws=3v3t49Q0J6O;2L^&B#adJJ43=DYC>1ADYO`%9#PkBd+4A>rY zLVJmpXzwad&9z)RKTlFus=T93>RF~~Ii#-JT>I|4qv49GdsWxHyIJ;HnU>&Q*YZqJ zwN%NyYSScBG$l>&ZLgnlJ$I*FtcN4Rs5rUP^hC`ITUu8MUe;ZA=cJAbB~SH4Q}e*y zJ571+6)}}_MHFdw>Z(Tc|2A%NzE%E{kA})@iNQ5aKou@&0a$*z5Pa?ACRf+2*XTh$ zeIj&OGTXzNP`auOQt{!lcf5E!JA>EZpEXuM58d84$13u^`l#uuUa81v{V zgTNeY*c?bzteLYU;_a8dzMaw{*}|kC9o(>NCiL)F&HnAdwnsaXfdvG0Wm{B*n-o@1UI28;Eq1c*{USjQWa;VqF&_RTrj z_m!(4=3-t6v0e|$!A{PN$+-#QF3yQuSXp8Sp9s%lGjIOISfbQV$1CBBVVoG}0{;-A z*m7Oqhb9w|Z#6(c`Cl)DkS;Dm1HfT>!{6B~#wX`2cXEs_i z9E{xF$(Pob-M0IpN(#(}F08Of;f}bGmub0AzK%RV4T|rRb2MGM0+5NogHI+$%dS9g zPNF5+&64T$lgr}vsz#msBv-RkO9g1(>AJ&?_Oy4C|74^nJ4SHeG#DjDPl_Y7*C7ie zfS#x!AyP;wBQzosG)W5SpaPMZqM8R1Amu;l|hg;|GDEXpEz{kiezTiwwOEBZaeAXOE6mFhG4jVP`lW{QeCAHYNPTNf#qBPR?b0 z+owUvQ9VDrStC=Xv>o*!y zGIkDdc3gg<7u2Y2vcEQCO*lZad+uI@zty;0=y|0fSzO0r66esm9O%MT5@g~SQWt>g zO2d6VB@2CMH3ub>M^GZaJCu*pg-dR9p<*i_8EjhYvyWa_f~kw6yw`_uI0c z1StL9H!e_?M5k&37pTZ%{V?(#>g)H(WShYLB=I+($Ip@HCOW_TW~vPU00000001bp KFa00@0RR9*+SwZb diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/part-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/part-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b new file mode 100644 index 0000000000000000000000000000000000000000..417105c39ba59d60adc1d3377417c93ca522a02e GIT binary patch literal 1502 zcmV<41tI#$1pokF3jhEpwJ-f(UkcqV0E$dd5kzpPsSZ;JNihIIs(JQnPt~eo7pK+{ z2xb;7u{H*;h%T5`k4cFQ?&Th3C7%NeuHS2SQyP(gJpol!fL1#=b%YqSO{N0@@c;|~ zYOmt;7yU1#T!-8&{i9?gASgF`2FY>7FEQn5S4FO* zdD=UQqD4{vV9LYgiA z|5!Ncg#Ev^wXriNtrcCj70=E$7x~h2v*e{}qGeB3x{$_ot6jIXeHYTWJca`qpm-fS zI2XfQq0AEz%J3Y+ZkT&{a@OrZqt{28i0SpxgAZyOD!xrpmRP`qd7B<2Yt(Mym#FgI zNi;1*9w5owX}ME#r$_}}KTu86Joy+~@liF84iNYc%3a%3MWseuM|nSu4k-Nxb$0S3 z&)!pxmMW=sZki*GOnE<_Qj35+%Q_s-%gYDVdI2 z)nz%NCpwnjE4_|-ckM=d+3q%yS@ChBsCkwcyt=d$xNEEK&PSZpIhv_?q9ua8H;VGt z9Z~hRBaC!grDZGne zn6zw#SUmV^o-iN3*3fzQU(uR+tY*~Az@C6r;xIM&vZ4u+@kUdChyD(W z6;0Zb5bK-7p0&a%>EXk+c4!h?x2}Kyb8t>)#*@ha>D( z2V?ja>vnEPHRIqo=DBcz9!@vIbF7oe;IdtofRL&O<8)>+>=mBLy!n-RVy}hpjddo< zcDtRt0vf{@Z9+^Ri?@dr`(a&tke=F zMc~`CG|@YhSc&3AgvZc-5gMdP`d_(_b+K=zL{ahoA)46Kf*_^gVAXU$g7r|ux*lt{ zF|Me@JP0F;ixsVS6FG^J+vKXq1K6baHaS00wJ-Rb3j7yU@{{Zf^ycG9p1mZUUPra- zZL8^&$<0wM$&_4h_Km6=>}N-NFL_T+n!b1%RHYAt6#o zDKjJ@qA*Dc>97J3nWCBp5+LP31EHWXh!}*-h>ValC8=2qjROG{jGD5-g03UXBE=8> zLa`V_aUp?Ap% ziC$2nw#ok5jFoVJynAlFsK3|vx#&5iAz56 Date: Thu, 18 Apr 2024 15:06:24 +0000 Subject: [PATCH 021/736] Bump aiohttp from 3.9.2 to 3.9.4 in /hail_search Bumps [aiohttp](https://github.com/aio-libs/aiohttp) from 3.9.2 to 3.9.4. - [Release notes](https://github.com/aio-libs/aiohttp/releases) - [Changelog](https://github.com/aio-libs/aiohttp/blob/master/CHANGES.rst) - [Commits](https://github.com/aio-libs/aiohttp/compare/v3.9.2...v3.9.4) --- updated-dependencies: - dependency-name: aiohttp dependency-type: indirect ... Signed-off-by: dependabot[bot] --- hail_search/requirements-test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hail_search/requirements-test.txt b/hail_search/requirements-test.txt index f2a7d5ca2b..508c8f0d05 100644 --- a/hail_search/requirements-test.txt +++ b/hail_search/requirements-test.txt @@ -4,7 +4,7 @@ # # pip-compile hail_search/requirements-test.in # -aiohttp==3.9.2 +aiohttp==3.9.4 # via pytest-aiohttp aiosignal==1.3.1 # via aiohttp From a9c932191aa1dd804802712011c9054c8cbf1033 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 18 Apr 2024 13:22:29 -0400 Subject: [PATCH 022/736] use shared categories --- ui/pages/Project/components/AnalysisGroupButtons.jsx | 9 +++------ ui/pages/Project/components/AnalysisGroups.jsx | 3 +-- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/ui/pages/Project/components/AnalysisGroupButtons.jsx b/ui/pages/Project/components/AnalysisGroupButtons.jsx index 099e490d5f..541320f636 100644 --- a/ui/pages/Project/components/AnalysisGroupButtons.jsx +++ b/ui/pages/Project/components/AnalysisGroupButtons.jsx @@ -14,13 +14,10 @@ import { FAMILY_DISPLAY_NAME, FAMILY_FIELD_PEDIGREE, FAMILY_FIELD_DESCRIPTION, - FAMILY_FIELD_ANALYSIS_STATUS, - FAMILY_FIELD_ANALYSED_BY, - FAMILY_FIELD_FIRST_SAMPLE, + CATEGORY_FAMILY_FILTERS, FAMILY_FIELD_NAME_LOOKUP, } from 'shared/utils/constants' -import { CATEGORY_FAMILY_FILTERS } from '../constants' import { updateAnalysisGroup } from '../reducers' import { getProjectFamiliesByGuid, getCurrentProject } from '../selectors' @@ -106,11 +103,11 @@ const FORM_FIELDS = [ const DYNAMIC_FORM_FIELDS = [ NAME_FIELD, - ...[FAMILY_FIELD_ANALYSIS_STATUS, FAMILY_FIELD_ANALYSED_BY, FAMILY_FIELD_FIRST_SAMPLE].map((category, i) => ({ + ...Object.entries(CATEGORY_FAMILY_FILTERS).map(([category, options], i) => ({ name: `criteria.${category}`, label: `Criteria: ${FAMILY_FIELD_NAME_LOOKUP[category]}`, + options, component: Multiselect, - options: CATEGORY_FAMILY_FILTERS[category], includeCategories: true, color: 'blue', validate: i === 0 ? (value, allValues) => (allValues.criteria ? undefined : 'At least one criteria is required') : null, diff --git a/ui/pages/Project/components/AnalysisGroups.jsx b/ui/pages/Project/components/AnalysisGroups.jsx index 17b0b9d721..e64fd1b3e2 100644 --- a/ui/pages/Project/components/AnalysisGroups.jsx +++ b/ui/pages/Project/components/AnalysisGroups.jsx @@ -8,9 +8,8 @@ import { getAnalysisGroupIsLoading } from 'redux/selectors' import OptionFieldView from 'shared/components/panel/view-fields/OptionFieldView' import DataLoader from 'shared/components/DataLoader' import { HelpIcon } from 'shared/components/StyledComponents' -import { FAMILY_FIELD_NAME_LOOKUP } from 'shared/utils/constants' +import { FAMILY_FIELD_NAME_LOOKUP, CATEGORY_FAMILY_FILTERS } from 'shared/utils/constants' import { compareObjects } from 'shared/utils/sortUtils' -import { CATEGORY_FAMILY_FILTERS } from '../constants' import { loadCurrentProjectAnalysisGroups } from '../reducers' import { getProjectAnalysisGroupsByGuid, getProjectGuid } from '../selectors' import { UpdateAnalysisGroupButton, DeleteAnalysisGroupButton } from './AnalysisGroupButtons' From c66790d451b39bfcf07acfe805dc5608e454d7bc Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 18 Apr 2024 14:27:18 -0400 Subject: [PATCH 023/736] apply dynamic group criteria to project page --- seqr/views/utils/orm_to_json_utils.py | 6 +++--- ui/pages/Project/selectors.js | 15 ++++++++++++--- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/seqr/views/utils/orm_to_json_utils.py b/seqr/views/utils/orm_to_json_utils.py index 1f6d1a14b8..86af230b2b 100644 --- a/seqr/views/utils/orm_to_json_utils.py +++ b/seqr/views/utils/orm_to_json_utils.py @@ -373,10 +373,9 @@ def get_json_for_analysis_groups(analysis_groups, project_guid=None, skip_nested Returns: array: array of json objects """ - # TODO familyGuids needed for dynamic groups? Either populate or remove def _process_result(result, group): result.update({ - 'familyGuids': [] if is_dynamic else [f.guid for f in group.families.all()], + 'familyGuids': [f.guid for f in group.families.all()], }) if not is_dynamic: @@ -388,7 +387,8 @@ def _process_result(result, group): additional_kwargs = {'additional_model_fields': ['project_id']} return _get_json_for_models( - analysis_groups, process_result=_process_result, guid_key='analysisGroupGuid', **additional_kwargs, **kwargs, + analysis_groups, process_result=None if is_dynamic else _process_result, guid_key='analysisGroupGuid', + **additional_kwargs, **kwargs, ) diff --git a/ui/pages/Project/selectors.js b/ui/pages/Project/selectors.js index 7856a80f6b..f1bf358de4 100644 --- a/ui/pages/Project/selectors.js +++ b/ui/pages/Project/selectors.js @@ -89,10 +89,19 @@ const getAnalysisGroupGuid = (state, props) => ( ) export const getCurrentAnalysisGroupFamilyGuids = createSelector( - getProjectAnalysisGroupsByGuid, getAnalysisGroupGuid, - (projectAnalysisGroupsByGuid, analysisGroupGuid) => analysisGroupGuid && - projectAnalysisGroupsByGuid[analysisGroupGuid]?.familyGuids, // TODO work with dynamic group + getProjectAnalysisGroupsByGuid, + getProjectFamiliesByGuid, + familyPassesFilters, + (analysisGroupGuid, analysisGroupsByGuid, projectFamiliesByGuid, passesFilterFunc) => { + const analysisGroup = analysisGroupsByGuid[analysisGroupGuid] + if (!analysisGroup) { + return null + } + return analysisGroup.familyGuids || Object.values(projectFamiliesByGuid).filter( + family => passesFilterFunc(family, analysisGroup.criteria), + ).map(family => family.familyGuid) + }, ) export const getProjectAnalysisGroupFamiliesByGuid = createSelector( From ad82586c54e374f2e3d38a1e67de3e9a849c2b37 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 18 Apr 2024 14:37:21 -0400 Subject: [PATCH 024/736] fix analysis group loading --- ui/pages/Project/components/AnalysisGroups.jsx | 5 +++-- ui/pages/Project/components/ProjectPageUI.jsx | 8 +++----- ui/pages/Project/selectors.js | 6 +++--- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/ui/pages/Project/components/AnalysisGroups.jsx b/ui/pages/Project/components/AnalysisGroups.jsx index e64fd1b3e2..c0c1ee7172 100644 --- a/ui/pages/Project/components/AnalysisGroups.jsx +++ b/ui/pages/Project/components/AnalysisGroups.jsx @@ -14,9 +14,9 @@ import { loadCurrentProjectAnalysisGroups } from '../reducers' import { getProjectAnalysisGroupsByGuid, getProjectGuid } from '../selectors' import { UpdateAnalysisGroupButton, DeleteAnalysisGroupButton } from './AnalysisGroupButtons' -const AnalysisGroups = React.memo(({ projectGuid, load, loading, analysisGroupsByGuid }) => ( +const AnalysisGroups = React.memo(({ projectGuid, load, loading, analysisGroupsByGuid, analysisGroupGuid }) => ( - {Object.values(analysisGroupsByGuid).sort(compareObjects('name')).map(ag => ( + {(analysisGroupsByGuid[analysisGroupGuid] ? [analysisGroupsByGuid[analysisGroupGuid]] : Object.values(analysisGroupsByGuid).sort(compareObjects('name'))).map(ag => (
{ag.criteria && } {ag.name} @@ -49,6 +49,7 @@ const AnalysisGroups = React.memo(({ projectGuid, load, loading, analysisGroupsB )) AnalysisGroups.propTypes = { + analysisGroupGuid: PropTypes.string, projectGuid: PropTypes.string, analysisGroupsByGuid: PropTypes.object.isRequired, loading: PropTypes.bool, diff --git a/ui/pages/Project/components/ProjectPageUI.jsx b/ui/pages/Project/components/ProjectPageUI.jsx index 593baa1557..a922ada2b4 100644 --- a/ui/pages/Project/components/ProjectPageUI.jsx +++ b/ui/pages/Project/components/ProjectPageUI.jsx @@ -83,11 +83,9 @@ const ProjectPageUI = React.memo(({ analysisGroupGuid, load, loading, familiesLo - {analysisGroupGuid ? null : ( - }> - - - )} + }> + + } collaboratorEdit> diff --git a/ui/pages/Project/selectors.js b/ui/pages/Project/selectors.js index f1bf358de4..243c34a7b6 100644 --- a/ui/pages/Project/selectors.js +++ b/ui/pages/Project/selectors.js @@ -98,9 +98,9 @@ export const getCurrentAnalysisGroupFamilyGuids = createSelector( if (!analysisGroup) { return null } - return analysisGroup.familyGuids || Object.values(projectFamiliesByGuid).filter( + return analysisGroup.criteria ? Object.values(projectFamiliesByGuid).filter( family => passesFilterFunc(family, analysisGroup.criteria), - ).map(family => family.familyGuid) + ).map(family => family.familyGuid) : analysisGroup.familyGuids }, ) @@ -426,7 +426,7 @@ export const getVisibleFamilies = createSelector( return familyFilter ? searchedFamilies.filter(family => familyFilter({ ...family, - individuals: family.individualGuids.map(individualGuid => (individualsByGuid[individualGuid])), + individuals: family?.individualGuids.map(individualGuid => (individualsByGuid[individualGuid])), })) : searchedFamilies }, ) From 359037d3b4efdcbc6b44bd127a0c3252b74f3751 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 18 Apr 2024 16:20:46 -0400 Subject: [PATCH 025/736] shared helper current group guids --- ui/pages/Project/components/SavedVariants.jsx | 2 +- ui/pages/Project/selectors.js | 22 +------------------ ui/redux/selectors.js | 22 +++++++++++++++++++ .../components/panel/variants/selectors.js | 10 ++++----- 4 files changed, 28 insertions(+), 28 deletions(-) diff --git a/ui/pages/Project/components/SavedVariants.jsx b/ui/pages/Project/components/SavedVariants.jsx index 416f462164..37748641a7 100644 --- a/ui/pages/Project/components/SavedVariants.jsx +++ b/ui/pages/Project/components/SavedVariants.jsx @@ -6,6 +6,7 @@ import { Grid } from 'semantic-ui-react' import styled from 'styled-components' import { updateVariantTags } from 'redux/rootReducer' +import { getCurrentAnalysisGroupFamilyGuids } from 'redux/selectors' import { VARIANT_SORT_FIELD, VARIANT_HIDE_EXCLUDED_FIELD, @@ -26,7 +27,6 @@ import { loadSavedVariants, updateSavedVariantTable } from '../reducers' import { getCurrentProject, getProjectTagTypeOptions, getTaggedVariantsByFamily, getProjectVariantSavedByOptions, getSavedVariantTagTypeCounts, getSavedVariantTagTypeCountsByFamily, getSavedVariantTableState, - getCurrentAnalysisGroupFamilyGuids, } from '../selectors' import VariantTagTypeBar, { getSavedVariantsLinkPath } from './VariantTagTypeBar' import SelectSavedVariantsTable, { TAG_COLUMN, VARIANT_POS_COLUMN, GENES_COLUMN } from './SelectSavedVariantsTable' diff --git a/ui/pages/Project/selectors.js b/ui/pages/Project/selectors.js index 243c34a7b6..0d4c34d973 100644 --- a/ui/pages/Project/selectors.js +++ b/ui/pages/Project/selectors.js @@ -22,7 +22,7 @@ import { getMmeResultsByGuid, getMmeSubmissionsByGuid, getHasActiveSearchableSampleByFamily, getSelectableTagTypesByProject, getVariantTagsByGuid, getUserOptionsByUsername, getSamplesByFamily, getNotesByFamilyType, getVariantTagNotesByFamilyVariants, getPhenotypeGeneScoresByIndividual, - getRnaSeqDataByIndividual, familyPassesFilters, + getRnaSeqDataByIndividual, familyPassesFilters, getAnalysisGroupGuid, getCurrentAnalysisGroupFamilyGuids, } from 'redux/selectors' import { @@ -84,26 +84,6 @@ export const getProjectAnalysisGroupsByGuid = createSelector( }), ) -const getAnalysisGroupGuid = (state, props) => ( - (props || {}).match ? props.match.params.analysisGroupGuid : (props || {}).analysisGroupGuid -) - -export const getCurrentAnalysisGroupFamilyGuids = createSelector( - getAnalysisGroupGuid, - getProjectAnalysisGroupsByGuid, - getProjectFamiliesByGuid, - familyPassesFilters, - (analysisGroupGuid, analysisGroupsByGuid, projectFamiliesByGuid, passesFilterFunc) => { - const analysisGroup = analysisGroupsByGuid[analysisGroupGuid] - if (!analysisGroup) { - return null - } - return analysisGroup.criteria ? Object.values(projectFamiliesByGuid).filter( - family => passesFilterFunc(family, analysisGroup.criteria), - ).map(family => family.familyGuid) : analysisGroup.familyGuids - }, -) - export const getProjectAnalysisGroupFamiliesByGuid = createSelector( getProjectFamiliesByGuid, getCurrentAnalysisGroupFamilyGuids, diff --git a/ui/redux/selectors.js b/ui/redux/selectors.js index 8e1bdce4d5..ac21d17d01 100644 --- a/ui/redux/selectors.js +++ b/ui/redux/selectors.js @@ -522,3 +522,25 @@ export const familyPassesFilters = createSelector( }) }, ) + +export const getAnalysisGroupGuid = (state, props) => ( + (props || {}).match ? props.match.params.analysisGroupGuid : (props || {}).analysisGroupGuid +) + +export const getCurrentAnalysisGroupFamilyGuids = createSelector( + getAnalysisGroupGuid, + getAnalysisGroupsByGuid, + getFamiliesGroupedByProjectGuid, + familyPassesFilters, + state => state.currentProjectGuid, + (analysisGroupGuid, analysisGroupsByGuid, familiesByProjectGuid, passesFilterFunc, projectGuid) => { + const analysisGroup = analysisGroupGuid && analysisGroupsByGuid[analysisGroupGuid] + if (!analysisGroup) { + return null + } + return analysisGroup.criteria ? Object.values( + familiesByProjectGuid[analysisGroup.projectGuid || projectGuid] || {}, + ).filter(family => passesFilterFunc(family, analysisGroup.criteria)).map(family => family.familyGuid) : + analysisGroup.familyGuids + }, +) diff --git a/ui/shared/components/panel/variants/selectors.js b/ui/shared/components/panel/variants/selectors.js index 1ab981dd4e..a58de15314 100644 --- a/ui/shared/components/panel/variants/selectors.js +++ b/ui/shared/components/panel/variants/selectors.js @@ -16,7 +16,7 @@ import { import { getVariantTagsByGuid, getVariantNotesByGuid, getSavedVariantsByGuid, getAnalysisGroupsByGuid, getGenesById, getUser, getFamiliesByGuid, getProjectsByGuid, getIndividualsByGuid, getRnaSeqDataByIndividual, - getPhenotypeGeneScoresByIndividual, + getPhenotypeGeneScoresByIndividual, getCurrentAnalysisGroupFamilyGuids, } from 'redux/selectors' export const getIndividualGeneDataByFamilyGene = createSelector( @@ -71,10 +71,10 @@ const sortCompHet = (a, b) => (a.populations ? 1 : 0) - (b.populations ? 1 : 0) const getProjectSavedVariantsSelection = createSelector( (state, props) => props.match.params, getFamiliesByGuid, - getAnalysisGroupsByGuid, + getCurrentAnalysisGroupFamilyGuids, state => state.currentProjectGuid, getVariantTagsByGuid, - ({ tag, familyGuid, analysisGroupGuid, variantGuid }, familiesByGuid, analysisGroupsByGuid, + ({ tag, familyGuid, analysisGroupGuid, variantGuid }, familiesByGuid, analysisGroupFamilyGuids, projectGuid, tagsByGuid) => { if (!projectGuid) { return null @@ -83,9 +83,7 @@ const getProjectSavedVariantsSelection = createSelector( let variantFilter if (variantGuid) { variantFilter = o => variantGuid.split(',').includes(o.variantGuid) - } else if (analysisGroupGuid && analysisGroupsByGuid[analysisGroupGuid]) { - // TODO work with dynamic groups - const analysisGroupFamilyGuids = analysisGroupsByGuid[analysisGroupGuid].familyGuids + } else if (analysisGroupFamilyGuids) { variantFilter = o => o.familyGuids.some(fg => analysisGroupFamilyGuids.includes(fg)) } else if (familyGuid) { variantFilter = o => o.familyGuids.includes(familyGuid) From a0678bd256f57e3c51d506b9aac0b3ddb780fae1 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 18 Apr 2024 16:52:54 -0400 Subject: [PATCH 026/736] clean up --- seqr/views/apis/analysis_group_api.py | 2 +- seqr/views/utils/orm_to_json_utils.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/seqr/views/apis/analysis_group_api.py b/seqr/views/apis/analysis_group_api.py index 2d797eff17..d71a3f60ee 100644 --- a/seqr/views/apis/analysis_group_api.py +++ b/seqr/views/apis/analysis_group_api.py @@ -94,4 +94,4 @@ def delete_dynamic_analysis_group_handler(request, project_guid, analysis_group_ return delete_analysis_group_handler( request, project_guid, analysis_group_guid, model_cls=DynamicAnalysisGroup, validate_can_delete=lambda analysis_group: None if analysis_group.project_id else 'Cannot delete shared analysis group', - ) \ No newline at end of file + ) diff --git a/seqr/views/utils/orm_to_json_utils.py b/seqr/views/utils/orm_to_json_utils.py index 86af230b2b..0c9f5df448 100644 --- a/seqr/views/utils/orm_to_json_utils.py +++ b/seqr/views/utils/orm_to_json_utils.py @@ -373,9 +373,10 @@ def get_json_for_analysis_groups(analysis_groups, project_guid=None, skip_nested Returns: array: array of json objects """ + def _process_result(result, group): result.update({ - 'familyGuids': [f.guid for f in group.families.all()], + 'familyGuids': [f.guid for f in group.families.all()] }) if not is_dynamic: From 78623ffcbc33fb22fa25df51b4013f5aac3d06a4 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 19 Apr 2024 10:49:34 -0400 Subject: [PATCH 027/736] include project in analysis group search path --- ui/pages/Project/selectors.js | 2 +- ui/pages/Search/VariantSearch.jsx | 2 +- ui/pages/Search/components/PageHeader.jsx | 11 ++++++----- ui/redux/selectors.js | 2 +- ui/shared/components/page/PageHeader.jsx | 2 +- 5 files changed, 10 insertions(+), 9 deletions(-) diff --git a/ui/pages/Project/selectors.js b/ui/pages/Project/selectors.js index 0d4c34d973..666756d7ee 100644 --- a/ui/pages/Project/selectors.js +++ b/ui/pages/Project/selectors.js @@ -775,7 +775,7 @@ export const getPageHeaderEntityLinks = createSelector( familyGuid => !(hasActiveSearchableSampleByFamilyGuid[familyGuid] || {}).isSearchable, ) const entityLinks = [{ - to: `/variant_search/${searchType}/${searchId}`, + to: `/variant_search/${searchType === 'analysis_group' ? `project/${project.projectGuid}/` : ''}${searchType}/${searchId}`, content: `${snakecaseToTitlecase(searchType)} Variant Search`, disabled, popup: disabled ? diff --git a/ui/pages/Search/VariantSearch.jsx b/ui/pages/Search/VariantSearch.jsx index a4d509d37d..843f9f79db 100644 --- a/ui/pages/Search/VariantSearch.jsx +++ b/ui/pages/Search/VariantSearch.jsx @@ -11,8 +11,8 @@ const RESULTS_PATH = 'results/:searchHash' const SINGLE_VARIANT_RESULTS_PATH = 'variant/:variantId/family/:familyGuid' const SEARCH_FORM_PAGES = [ + 'project/:projectGuid/analysis_group/:analysisGroupGuid', 'project/:projectGuid', - 'analysis_group/:analysisGroupGuid', 'family/:familyGuid', RESULTS_PATH, ] diff --git a/ui/pages/Search/components/PageHeader.jsx b/ui/pages/Search/components/PageHeader.jsx index e21e2c772b..205150aede 100644 --- a/ui/pages/Search/components/PageHeader.jsx +++ b/ui/pages/Search/components/PageHeader.jsx @@ -62,14 +62,15 @@ const PAGE_CONFIGS = { } const getPageHeaderProps = ({ projectsByGuid, familiesByGuid, analysisGroupsByGuid, searchesByHash, match }) => { - const { pageType, entityGuid } = match.params + const { pageType, entityGuid, subPageType, subEntityGuid } = match.params const breadcrumbIdSections = [] - const { entity, entityUrlPath, actualPageType, description } = - PAGE_CONFIGS[pageType](entityGuid, projectsByGuid, familiesByGuid, analysisGroupsByGuid, searchesByHash) + const { entity, entityUrlPath, actualPageType, description } = PAGE_CONFIGS[subPageType || pageType]( + subEntityGuid || entityGuid, projectsByGuid, familiesByGuid, analysisGroupsByGuid, searchesByHash, + ) if (entity) { - const project = projectsByGuid[entity.projectGuid] - breadcrumbIdSections.push({ content: snakecaseToTitlecase(actualPageType || pageType) }) + const project = projectsByGuid[entity.projectGuid || entityGuid] + breadcrumbIdSections.push({ content: snakecaseToTitlecase(actualPageType || subPageType || pageType) }) breadcrumbIdSections.push({ content: entity.displayName || entity.name, link: project && `/project/${project.projectGuid}/${entityUrlPath}`, diff --git a/ui/redux/selectors.js b/ui/redux/selectors.js index ac21d17d01..96344b760a 100644 --- a/ui/redux/selectors.js +++ b/ui/redux/selectors.js @@ -532,7 +532,7 @@ export const getCurrentAnalysisGroupFamilyGuids = createSelector( getAnalysisGroupsByGuid, getFamiliesGroupedByProjectGuid, familyPassesFilters, - state => state.currentProjectGuid, + (state, props) => state.currentProjectGuid || props.match?.params?.projectGuid, (analysisGroupGuid, analysisGroupsByGuid, familiesByProjectGuid, passesFilterFunc, projectGuid) => { const analysisGroup = analysisGroupGuid && analysisGroupsByGuid[analysisGroupGuid] if (!analysisGroup) { diff --git a/ui/shared/components/page/PageHeader.jsx b/ui/shared/components/page/PageHeader.jsx index 162c1f34dc..eddda2984d 100644 --- a/ui/shared/components/page/PageHeader.jsx +++ b/ui/shared/components/page/PageHeader.jsx @@ -80,7 +80,7 @@ export default () => ( - + ) From 005f29f473c5856cf1503cdd1c03c40b7f1a5c21 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 19 Apr 2024 11:01:44 -0400 Subject: [PATCH 028/736] better search project families selector --- .../filters/ProjectFamiliesFilter.jsx | 7 +- ui/pages/Search/selectors.js | 69 ++++++++++--------- 2 files changed, 38 insertions(+), 38 deletions(-) diff --git a/ui/pages/Search/components/filters/ProjectFamiliesFilter.jsx b/ui/pages/Search/components/filters/ProjectFamiliesFilter.jsx index 1d9f36bc0c..852b40b270 100644 --- a/ui/pages/Search/components/filters/ProjectFamiliesFilter.jsx +++ b/ui/pages/Search/components/filters/ProjectFamiliesFilter.jsx @@ -5,10 +5,7 @@ import { Form, Button } from 'semantic-ui-react' import { getProjectsByGuid, - getFamiliesGroupedByProjectGuid, getAnalysisGroupsGroupedByProjectGuid, - getFamiliesByGuid, - getAnalysisGroupsByGuid, getProjectDatasetTypes, } from 'redux/selectors' import { Multiselect, ButtonRadioGroup } from 'shared/components/form/Inputs' @@ -147,9 +144,7 @@ const mapStateToProps = (state, ownProps) => ({ const mapDispatchToProps = (dispatch, ownProps) => { const onLoadSuccess = (state) => { - const newVal = getProjectFamilies( - ownProps.value, getFamiliesByGuid(state), getFamiliesGroupedByProjectGuid(state), getAnalysisGroupsByGuid(state), - ) + const newVal = getProjectFamilies(state)(ownProps.value) if (newVal && newVal !== ownProps.value) { ownProps.onChange(newVal) } diff --git a/ui/pages/Search/selectors.js b/ui/pages/Search/selectors.js index b4f383c50f..6ac32faec1 100644 --- a/ui/pages/Search/selectors.js +++ b/ui/pages/Search/selectors.js @@ -26,37 +26,44 @@ export const getInhertanceFilterMode = createSelector( searchParams => (((searchParams || {}).search || {}).inheritance || {}).mode, ) -export const getProjectFamilies = (params, familiesByGuid, familiesByProjectGuid, analysisGroupByGuid) => { - if (params.projectGuid && params.familyGuids) { - return params - } - - if (params.projectGuid) { - const loadedProjectFamilies = familiesByProjectGuid[params.projectGuid] - return { - projectGuid: params.projectGuid, - familyGuids: loadedProjectFamilies ? Object.keys(loadedProjectFamilies) : null, +export const getProjectFamilies = createSelector( + getFamiliesByGuid, + getFamiliesGroupedByProjectGuid, + getAnalysisGroupsByGuid, + (familiesByGuid, familiesByProjectGuid, analysisGroupByGuid) => ( + { projectGuid, familyGuids, familyGuid, analysisGroupGuid, searchHash, ...params }, + ) => { + if (projectGuid && familyGuids) { + return { projectGuid, familyGuids } + } + + if (projectGuid) { + const loadedProjectFamilies = familiesByProjectGuid[projectGuid] + return { + projectGuid, + familyGuids: loadedProjectFamilies ? Object.keys(loadedProjectFamilies) : null, + } + } + if (analysisGroupGuid) { + const analysisGroup = analysisGroupByGuid[analysisGroupGuid] + return analysisGroup ? { + projectGuid: analysisGroup.projectGuid, + familyGuids: analysisGroup.familyGuids, + } : { analysisGroupGuid } } - } - if (params.analysisGroupGuid) { - const analysisGroup = analysisGroupByGuid[params.analysisGroupGuid] - return analysisGroup ? { - projectGuid: analysisGroup.projectGuid, - familyGuids: analysisGroup.familyGuids, - } : { analysisGroupGuid: params.analysisGroupGuid } - } - if (params.familyGuid || params.familyGuids) { - const familyGuid = params.familyGuid || params.familyGuids[0] - return { - projectGuid: (familiesByGuid[familyGuid] || {}).projectGuid, - familyGuids: [familyGuid], + if (familyGuid || familyGuids) { + const singleFamilyGuid = familyGuid || familyGuids[0] + return { + projectGuid: (familiesByGuid[singleFamilyGuid] || {}).projectGuid, + familyGuids: [singleFamilyGuid], + } } - } - if (params.searchHash) { - return params - } - return null -} + if (searchHash) { + return { projectGuid, familyGuids, familyGuid, analysisGroupGuid, searchHash, ...params } + } + return null + }, +) export const getMultiProjectFamilies = createSelector( (state, props) => props.match.params, @@ -74,10 +81,8 @@ const createProjectFamiliesSelector = createSelectorCreator( const getIntitialProjectFamilies = createProjectFamiliesSelector( (state, props) => props.match.params, - getFamiliesByGuid, - getFamiliesGroupedByProjectGuid, - getAnalysisGroupsByGuid, getProjectFamilies, + (params, getProjectFamiliesFunc) => getProjectFamiliesFunc(params), ) export const getIntitialSearch = createSelector( From d39ae20da20369dee8dc9cd442e5594e6f96f6cc Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 19 Apr 2024 11:14:43 -0400 Subject: [PATCH 029/736] initally select dynamic analysis group --- ui/pages/Search/constants.js | 2 +- ui/pages/Search/selectors.js | 20 +++++++++----------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/ui/pages/Search/constants.js b/ui/pages/Search/constants.js index a17a951594..59506392dc 100644 --- a/ui/pages/Search/constants.js +++ b/ui/pages/Search/constants.js @@ -6,7 +6,7 @@ import { // TODO work with dynamic groups? export const getSelectedAnalysisGroups = (analysisGroupsByGuid, familyGuids) => Object.values( analysisGroupsByGuid, -).filter(group => group.familyGuids.every(familyGuid => familyGuids.includes(familyGuid))) +).filter(group => group.familyGuids?.every(familyGuid => familyGuids.includes(familyGuid))) const REF_REF = 'ref_ref' const HAS_REF = 'has_ref' diff --git a/ui/pages/Search/selectors.js b/ui/pages/Search/selectors.js index 40c231e190..2360776368 100644 --- a/ui/pages/Search/selectors.js +++ b/ui/pages/Search/selectors.js @@ -4,7 +4,7 @@ import { getProjectsByGuid, getFamiliesByGuid, getFamiliesGroupedByProjectGuid, - getAnalysisGroupsByGuid, + getCurrentAnalysisGroupFamilyGuids, getLocusListsByGuid, getAnalysisGroupsGroupedByProjectGuid, getCurrentSearchParams, @@ -29,14 +29,20 @@ export const getInhertanceFilterMode = createSelector( export const getProjectFamilies = createSelector( getFamiliesByGuid, getFamiliesGroupedByProjectGuid, - getAnalysisGroupsByGuid, - (familiesByGuid, familiesByProjectGuid, analysisGroupByGuid) => ( + getCurrentAnalysisGroupFamilyGuids, + (familiesByGuid, familiesByProjectGuid, analysisGroupFamilyGuids) => ( { projectGuid, familyGuids, familyGuid, analysisGroupGuid, searchHash, ...params }, ) => { if (projectGuid && familyGuids) { return { projectGuid, familyGuids } } + if (analysisGroupGuid) { + return analysisGroupFamilyGuids ? { + projectGuid, + familyGuids: analysisGroupFamilyGuids, + } : { projectGuid, analysisGroupGuid } + } if (projectGuid) { const loadedProjectFamilies = familiesByProjectGuid[projectGuid] return { @@ -44,14 +50,6 @@ export const getProjectFamilies = createSelector( familyGuids: loadedProjectFamilies ? Object.keys(loadedProjectFamilies) : null, } } - if (analysisGroupGuid) { - const analysisGroup = analysisGroupByGuid[analysisGroupGuid] - // TODO work with dynamic groups - return analysisGroup ? { - projectGuid: analysisGroup.projectGuid, - familyGuids: analysisGroup.familyGuids, - } : { analysisGroupGuid } - } if (familyGuid || familyGuids) { const singleFamilyGuid = familyGuid || familyGuids[0] return { From d8970a052a9bc5a6d80fd9a55a5c1de67cdcb06e Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 19 Apr 2024 12:03:16 -0400 Subject: [PATCH 030/736] fix search page dynamic analysis group selection --- .../filters/ProjectFamiliesFilter.jsx | 28 +++++++------ ui/pages/Search/constants.js | 1 - ui/pages/Search/selectors.js | 2 +- ui/redux/selectors.js | 39 ++++++++++++------- 4 files changed, 43 insertions(+), 27 deletions(-) diff --git a/ui/pages/Search/components/filters/ProjectFamiliesFilter.jsx b/ui/pages/Search/components/filters/ProjectFamiliesFilter.jsx index 0ac8f97b46..290873ae56 100644 --- a/ui/pages/Search/components/filters/ProjectFamiliesFilter.jsx +++ b/ui/pages/Search/components/filters/ProjectFamiliesFilter.jsx @@ -5,13 +5,12 @@ import { Form, Button } from 'semantic-ui-react' import { getProjectsByGuid, - getAnalysisGroupsGroupedByProjectGuid, + getProjectAnalysisGroupFamilyGuidsByGuid, getProjectDatasetTypes, } from 'redux/selectors' import { Multiselect, ButtonRadioGroup } from 'shared/components/form/Inputs' import { ProjectFilter } from 'shared/components/panel/search/ProjectsField' import { SOLVED_FAMILY_STATUS_OPTIONS } from 'shared/utils/constants' -import { getSelectedAnalysisGroups } from '../../constants' import { getProjectFamilies, getSearchContextIsLoading, getFamilyOptions, getAnalysisGroupOptions } from '../../selectors' import { loadProjectFamiliesContext } from '../../reducers' @@ -30,7 +29,7 @@ class ProjectFamiliesFilterInput extends React.PureComponent { static propTypes = { familyOptions: PropTypes.arrayOf(PropTypes.object), analysisGroupOptions: PropTypes.arrayOf(PropTypes.object), - projectAnalysisGroupsByGuid: PropTypes.object, + projectAnalysisGroupFamilyGuidsByGuid: PropTypes.arrayOf(PropTypes.string), value: PropTypes.object, onChange: PropTypes.func, } @@ -54,11 +53,14 @@ class ProjectFamiliesFilterInput extends React.PureComponent { } selectedAnalysisGroups = () => { - // TODO correctly show selected dynamic groups, depending on how familyGuids is populated - const { projectAnalysisGroupsByGuid, value } = this.props + const { projectAnalysisGroupFamilyGuidsByGuid, value } = this.props - return this.multiFamiliesSelected() ? [] : - getSelectedAnalysisGroups(projectAnalysisGroupsByGuid, value.familyGuids).map(group => group.analysisGroupGuid) + return this.multiFamiliesSelected() ? [] : Object.entries(projectAnalysisGroupFamilyGuidsByGuid).reduce( + (acc, [analysisGroupGuid, groupFamilyGuids]) => ( + groupFamilyGuids.every(familyGuid => value.familyGuids.includes(familyGuid)) ? [...acc, analysisGroupGuid] : acc + ), + [], + ) } onFamiliesChange = (familyGuids) => { @@ -67,21 +69,21 @@ class ProjectFamiliesFilterInput extends React.PureComponent { } selectAnalysisGroup = (analysisGroups) => { - const { projectAnalysisGroupsByGuid, value } = this.props + const { projectAnalysisGroupFamilyGuidsByGuid, value } = this.props const selectedAnalysisGroups = this.selectedAnalysisGroups() if (analysisGroups.length > selectedAnalysisGroups.length) { const newGroupGuid = analysisGroups.find(analysisGroupGuid => !selectedAnalysisGroups.includes(analysisGroupGuid)) this.onFamiliesChange( - [...new Set([...value.familyGuids, ...projectAnalysisGroupsByGuid[newGroupGuid].familyGuids])], + [...new Set([...value.familyGuids, ...projectAnalysisGroupFamilyGuidsByGuid[newGroupGuid]])], ) } else if (analysisGroups.length < selectedAnalysisGroups.length) { const removedGroupGuid = selectedAnalysisGroups.find( analysisGroupGuid => !analysisGroups.includes(analysisGroupGuid), ) this.onFamiliesChange(value.familyGuids.filter( - familyGuid => !projectAnalysisGroupsByGuid[removedGroupGuid].familyGuids.includes(familyGuid), + familyGuid => !projectAnalysisGroupFamilyGuidsByGuid[removedGroupGuid].includes(familyGuid), )) } } @@ -97,7 +99,9 @@ class ProjectFamiliesFilterInput extends React.PureComponent { } render() { - const { familyOptions, analysisGroupOptions, projectAnalysisGroupsByGuid, value, onChange, ...props } = this.props + const { + familyOptions, analysisGroupOptions, projectAnalysisGroupFamilyGuidsByGuid, value, onChange, ...props + } = this.props const multiFamiliesSelected = this.multiFamiliesSelected() const selectedFamilies = multiFamiliesSelected ? [] : value.familyGuids @@ -136,7 +140,7 @@ class ProjectFamiliesFilterInput extends React.PureComponent { const mapStateToProps = (state, ownProps) => ({ familyOptions: getFamilyOptions(state, ownProps), analysisGroupOptions: getAnalysisGroupOptions(state, ownProps), - projectAnalysisGroupsByGuid: getAnalysisGroupsGroupedByProjectGuid(state)[ownProps.value.projectGuid] || {}, + projectAnalysisGroupFamilyGuidsByGuid: getProjectAnalysisGroupFamilyGuidsByGuid(state, ownProps), project: getProjectsByGuid(state)[ownProps.value.projectGuid], projectHasSamples: (getProjectDatasetTypes(state)[ownProps.value.projectGuid] || []).length > 0, loading: getSearchContextIsLoading(state), diff --git a/ui/pages/Search/constants.js b/ui/pages/Search/constants.js index 59506392dc..b6134dd203 100644 --- a/ui/pages/Search/constants.js +++ b/ui/pages/Search/constants.js @@ -3,7 +3,6 @@ import { DE_NOVO_FILTER, ANY_AFFECTED, INHERITANCE_FILTER_OPTIONS, } from 'shared/utils/constants' -// TODO work with dynamic groups? export const getSelectedAnalysisGroups = (analysisGroupsByGuid, familyGuids) => Object.values( analysisGroupsByGuid, ).filter(group => group.familyGuids?.every(familyGuid => familyGuids.includes(familyGuid))) diff --git a/ui/pages/Search/selectors.js b/ui/pages/Search/selectors.js index 2360776368..04b5f042d2 100644 --- a/ui/pages/Search/selectors.js +++ b/ui/pages/Search/selectors.js @@ -188,5 +188,5 @@ export const getAnalysisGroupOptions = createSelector( (analysisGroupsGroupedByProjectGuid, projectGuid) => Object.values({ ...(analysisGroupsGroupedByProjectGuid[projectGuid] || {}), ...(analysisGroupsGroupedByProjectGuid.null || {}), - }).map(group => ({ value: group.analysisGroupGuid, text: group.name })), + }).map(group => ({ value: group.analysisGroupGuid, text: group.name, icon: group.criteria ? 'sync' : null })), ) diff --git a/ui/redux/selectors.js b/ui/redux/selectors.js index 96344b760a..734ead8c8b 100644 --- a/ui/redux/selectors.js +++ b/ui/redux/selectors.js @@ -523,24 +523,37 @@ export const familyPassesFilters = createSelector( }, ) +export const getProjectAnalysisGroupFamilyGuidsByGuid = createSelector( + getAnalysisGroupsGroupedByProjectGuid, + getFamiliesGroupedByProjectGuid, + familyPassesFilters, + (state, props) => ( + state.currentProjectGuid || + props.value?.projectGuid || + props.match?.params?.projectGuid || + props.match?.params?.entityGuid + ), + (projectAnalysisGroupsByGuid, familiesByProjectGuid, passesFilterFunc, projectGuid) => ( + [ + ...Object.values(projectAnalysisGroupsByGuid[projectGuid] || {}), + ...Object.values(projectAnalysisGroupsByGuid.null || {}), + ].reduce((acc, analysisGroup) => ({ + ...acc, + [analysisGroup.analysisGroupGuid]: analysisGroup.criteria ? + Object.values(familiesByProjectGuid[projectGuid] || {}).filter( + family => passesFilterFunc(family, analysisGroup.criteria), + ).map(family => family.familyGuid) : analysisGroup.familyGuids, + }), {}) + ), +) + export const getAnalysisGroupGuid = (state, props) => ( (props || {}).match ? props.match.params.analysisGroupGuid : (props || {}).analysisGroupGuid ) export const getCurrentAnalysisGroupFamilyGuids = createSelector( getAnalysisGroupGuid, - getAnalysisGroupsByGuid, - getFamiliesGroupedByProjectGuid, - familyPassesFilters, + getProjectAnalysisGroupFamilyGuidsByGuid, (state, props) => state.currentProjectGuid || props.match?.params?.projectGuid, - (analysisGroupGuid, analysisGroupsByGuid, familiesByProjectGuid, passesFilterFunc, projectGuid) => { - const analysisGroup = analysisGroupGuid && analysisGroupsByGuid[analysisGroupGuid] - if (!analysisGroup) { - return null - } - return analysisGroup.criteria ? Object.values( - familiesByProjectGuid[analysisGroup.projectGuid || projectGuid] || {}, - ).filter(family => passesFilterFunc(family, analysisGroup.criteria)).map(family => family.familyGuid) : - analysisGroup.familyGuids - }, + (analysisGroupGuid, analysisGroupFamilyGuidsByGuid) => analysisGroupFamilyGuidsByGuid[analysisGroupGuid], ) From dfb7c8f2028c0a03148c7a3baec77b4d9cd2a82c Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 19 Apr 2024 13:08:22 -0400 Subject: [PATCH 031/736] family info for analysis group search context --- seqr/views/apis/variant_search_api.py | 18 ++++++++----- seqr/views/utils/orm_to_json_utils.py | 25 +++++++++++-------- .../filters/ProjectFamiliesFilter.jsx | 2 +- ui/shared/utils/constants.js | 4 ++- 4 files changed, 30 insertions(+), 19 deletions(-) diff --git a/seqr/views/apis/variant_search_api.py b/seqr/views/apis/variant_search_api.py index 1f7e5447f4..dc9bd67ca5 100644 --- a/seqr/views/apis/variant_search_api.py +++ b/seqr/views/apis/variant_search_api.py @@ -7,6 +7,7 @@ from django.core.exceptions import MultipleObjectsReturned, PermissionDenied from django.db.utils import IntegrityError from django.db.models import Q, F, Value +from django.db.models.functions import JSONObject from math import ceil from reference_data.models import GENOME_VERSION_GRCh37, GENOME_VERSION_GRCh38 @@ -21,7 +22,7 @@ from seqr.views.utils.json_to_orm_utils import update_model_from_json, get_or_create_model_from_json, \ create_model_from_json from seqr.views.utils.orm_to_json_utils import get_json_for_saved_variants_with_tags, get_json_for_saved_search,\ - get_json_for_saved_searches, add_individual_hpo_details, FAMILY_DISPLAY_NAME_EXPR + get_json_for_saved_searches, add_individual_hpo_details, FAMILY_ADDITIONAL_VALUES from seqr.views.utils.permissions_utils import check_project_permissions, get_project_guids_user_can_view, \ user_is_analyst, login_and_policies_required, check_user_created_object_permissions, check_projects_view_permission from seqr.views.utils.project_context_utils import get_projects_child_entities @@ -382,14 +383,19 @@ def search_context_handler(request): response['familiesByGuid'] = {f['familyGuid']: f for f in Family.objects.filter(project__in=projects).values( projectGuid=Value(project_guid) if project_guid else F('project__guid'), familyGuid=F('guid'), - displayName=FAMILY_DISPLAY_NAME_EXPR, analysisStatus=F('analysis_status'), + **FAMILY_ADDITIONAL_VALUES, )} - project_dataset_types = get_search_samples(projects).values('individual__family__project__guid').annotate( - dataset_types=ArrayAgg('dataset_type', distinct=True)) - for agg in project_dataset_types: - response['projectsByGuid'][agg['individual__family__project__guid']]['datasetTypes'] = agg['dataset_types'] + family_sample_types = get_search_samples(projects).values('individual__family__guid').annotate( + samples=ArrayAgg(JSONObject(sampleType='sample_type', datasetType='dataset_type', isActive=Value(True)), distinct=True)) + project_dataset_types = defaultdict(set) + for agg in family_sample_types: + family = response['familiesByGuid'][agg['individual__family__guid']] + family['sampleTypes'] = agg['samples'] + project_dataset_types[family['projectGuid']].update([s['datasetType'] for s in agg['samples']]) + for project_guid, dataset_types in project_dataset_types.items(): + response['projectsByGuid'][project_guid]['datasetTypes'] = list(dataset_types) project_category_guid = context.get('projectCategoryGuid') if project_category_guid: diff --git a/seqr/views/utils/orm_to_json_utils.py b/seqr/views/utils/orm_to_json_utils.py index 0c9f5df448..fd6d277917 100644 --- a/seqr/views/utils/orm_to_json_utils.py +++ b/seqr/views/utils/orm_to_json_utils.py @@ -215,23 +215,26 @@ def _get_case_review_fields(model_cls, has_case_review_perm): FAMILY_DISPLAY_NAME_EXPR = Coalesce(NullIf('display_name', Value('')), 'family_id') +FAMILY_ADDITIONAL_VALUES = { + 'analysedBy': ArrayAgg(JSONObject( + createdBy=_user_expr('familyanalysedby__created_by'), + dataType='familyanalysedby__data_type', + lastModifiedDate='familyanalysedby__last_modified_date', + ), filter=Q(familyanalysedby__isnull=False)), + 'assignedAnalyst': Case( + When(assigned_analyst__isnull=False, then=JSONObject( + fullName=_full_name_expr('assigned_analyst'), email=F('assigned_analyst__email'), + )), default=Value(None), + ), + 'displayName': FAMILY_DISPLAY_NAME_EXPR, +} def _get_json_for_families(families, user=None, add_individual_guids_field=False, project_guid=None, is_analyst=None, has_case_review_perm=False, additional_values=None): family_additional_values = { - 'analysedBy': ArrayAgg(JSONObject( - createdBy=_user_expr('familyanalysedby__created_by'), - dataType='familyanalysedby__data_type', - lastModifiedDate='familyanalysedby__last_modified_date', - ), filter=Q(familyanalysedby__isnull=False)), - 'assignedAnalyst': Case( - When(assigned_analyst__isnull=False, then=JSONObject( - fullName=_full_name_expr('assigned_analyst'), email=F('assigned_analyst__email'), - )), default=Value(None), - ), - 'displayName': FAMILY_DISPLAY_NAME_EXPR, + **FAMILY_ADDITIONAL_VALUES, 'pedigreeImage': NullIf(Concat(Value(MEDIA_URL), 'pedigree_image', output_field=CharField()), Value(MEDIA_URL)), } if additional_values: diff --git a/ui/pages/Search/components/filters/ProjectFamiliesFilter.jsx b/ui/pages/Search/components/filters/ProjectFamiliesFilter.jsx index 290873ae56..35a8ad4321 100644 --- a/ui/pages/Search/components/filters/ProjectFamiliesFilter.jsx +++ b/ui/pages/Search/components/filters/ProjectFamiliesFilter.jsx @@ -149,7 +149,7 @@ const mapStateToProps = (state, ownProps) => ({ const mapDispatchToProps = (dispatch, ownProps) => { const onLoadSuccess = (state) => { - const newVal = getProjectFamilies(state)(ownProps.value) + const newVal = getProjectFamilies(state, ownProps.value)(ownProps.value) if (newVal && newVal !== ownProps.value) { ownProps.onChange(newVal) } diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js index 2a453c0f59..b9f8599165 100644 --- a/ui/shared/utils/constants.js +++ b/ui/shared/utils/constants.js @@ -306,7 +306,9 @@ const SHOW_ANALYSED = 'SHOW_ANALYSED' const SHOW_NOT_ANALYSED = 'SHOW_NOT_ANALYSED' const hasMatchingSampleFilter = isMatchingSample => (family, user, samplesByFamily) => ( - (samplesByFamily[family.familyGuid] || []).some(sample => sample.isActive && isMatchingSample(sample))) + (family.sampleTypes || samplesByFamily[family.familyGuid] || []).some( + sample => sample.isActive && isMatchingSample(sample), + )) export const ASSIGNED_TO_ME_FILTER = { value: SHOW_ASSIGNED_TO_ME, From ea7414fa22d28c24de28e731b97e1903022ed5e7 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 19 Apr 2024 13:38:26 -0400 Subject: [PATCH 032/736] fix projectGuid prop --- ui/redux/selectors.js | 1 + 1 file changed, 1 insertion(+) diff --git a/ui/redux/selectors.js b/ui/redux/selectors.js index 734ead8c8b..db076a4937 100644 --- a/ui/redux/selectors.js +++ b/ui/redux/selectors.js @@ -529,6 +529,7 @@ export const getProjectAnalysisGroupFamilyGuidsByGuid = createSelector( familyPassesFilters, (state, props) => ( state.currentProjectGuid || + props.projectGuid || props.value?.projectGuid || props.match?.params?.projectGuid || props.match?.params?.entityGuid From aa09a2f3db2710a72ed5075919e9f4e1f8951800 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 19 Apr 2024 14:43:03 -0400 Subject: [PATCH 033/736] update search context tests --- seqr/fixtures/1kg_project.json | 26 +++++++++++++++++++++ seqr/views/apis/variant_search_api_tests.py | 16 +++++++++---- seqr/views/utils/test_utils.py | 1 + 3 files changed, 38 insertions(+), 5 deletions(-) diff --git a/seqr/fixtures/1kg_project.json b/seqr/fixtures/1kg_project.json index 6a79f0df4c..ff9b601f52 100644 --- a/seqr/fixtures/1kg_project.json +++ b/seqr/fixtures/1kg_project.json @@ -2448,6 +2448,32 @@ "families": [3] } }, +{ + "model": "seqr.dynamicanalysisgroup", + "pk": 1, + "fields": { + "guid": "DAG0000001_unsolved", + "created_date": "2024-02-09T18:53:24.207Z", + "created_by": null, + "last_modified_date": "2024-02-09T18:53:24.207Z", + "name": "Unsolved", + "project": null, + "criteria": {"firstSample": ["SHOW_DATA_LOADED"], "analysisStatus": ["I", "P", "C", "Rncc", "Rcpc"]} + } +}, +{ + "model": "seqr.dynamicanalysisgroup", + "pk": 2, + "fields": { + "guid": "DAG0000002_my_new_cases", + "created_date": "2024-02-09T18:53:24.207Z", + "created_by": null, + "last_modified_date": "2024-03-09T18:53:24.207Z", + "name": "My New Cases", + "project": 1, + "criteria": {"analysedBy": ["SHOW_ASSIGNED_TO_ME", "SHOW_NOT_ANALYSED"], "analysisStatus": ["I"]} + } +}, { "model": "matchmaker.matchmakersubmission", "pk": 1, diff --git a/seqr/views/apis/variant_search_api_tests.py b/seqr/views/apis/variant_search_api_tests.py index 811cb807bc..87f3a6fd43 100644 --- a/seqr/views/apis/variant_search_api_tests.py +++ b/seqr/views/apis/variant_search_api_tests.py @@ -16,7 +16,7 @@ from seqr.views.utils.test_utils import AuthenticationTestCase, VARIANTS, AnvilAuthenticationTestCase,\ GENE_VARIANT_FIELDS, GENE_VARIANT_DISPLAY_FIELDS, LOCUS_LIST_FIELDS, FAMILY_FIELDS, \ PA_LOCUS_LIST_FIELDS, INDIVIDUAL_FIELDS, FUNCTIONAL_FIELDS, IGV_SAMPLE_FIELDS, FAMILY_NOTE_FIELDS, ANALYSIS_GROUP_FIELDS, \ - VARIANT_NOTE_FIELDS, TAG_FIELDS, MATCHMAKER_SUBMISSION_FIELDS, SAVED_VARIANT_DETAIL_FIELDS + VARIANT_NOTE_FIELDS, TAG_FIELDS, MATCHMAKER_SUBMISSION_FIELDS, SAVED_VARIANT_DETAIL_FIELDS, DYNAMIC_ANALYSIS_GROUP_FIELDS LOCUS_LIST_GUID = 'LL00049_pid_genes_autosomal_do' PROJECT_GUID = 'R0001_1kg' @@ -133,7 +133,7 @@ }, 'projectsByGuid': {PROJECT_GUID: mock.ANY}, 'familiesByGuid': mock.ANY, - 'analysisGroupsByGuid': {'AG0000183_test_group': mock.ANY, 'AG0000185_accepted': mock.ANY}, + 'analysisGroupsByGuid': {'AG0000183_test_group': mock.ANY, 'AG0000185_accepted': mock.ANY, 'DAG0000001_unsolved': mock.ANY, 'DAG0000002_my_new_cases': mock.ANY}, 'locusListsByGuid': {LOCUS_LIST_GUID: mock.ANY, 'LL00005_retina_proteome': mock.ANY}, } @@ -178,11 +178,17 @@ def _assert_expected_search_context(self, response_json): locus_list_fields.remove('canEdit') self.assertSetEqual(set(response_json['locusListsByGuid'][LOCUS_LIST_GUID].keys()), locus_list_fields) self.assertSetEqual(set(response_json['analysisGroupsByGuid']['AG0000183_test_group'].keys()), ANALYSIS_GROUP_FIELDS) + self.assertSetEqual(set(response_json['analysisGroupsByGuid']['DAG0000001_unsolved'].keys()), DYNAMIC_ANALYSIS_GROUP_FIELDS) self.assertEqual(len(response_json['familiesByGuid']), 11) - self.assertSetEqual(set(response_json['familiesByGuid']['F000001_1'].keys()), {'projectGuid', 'familyGuid', 'displayName', 'analysisStatus'}) - self.assertEqual(response_json['familiesByGuid']['F000001_1']['displayName'], '1') - self.assertEqual(response_json['familiesByGuid']['F000001_1']['analysisStatus'], 'Q') + self.assertSetEqual(set(response_json['familiesByGuid']['F000001_1'].keys()), { + 'projectGuid', 'familyGuid', 'displayName', 'analysisStatus', 'analysedBy', 'assignedAnalyst', 'sampleTypes', + }) + self.assertDictEqual(response_json['familiesByGuid']['F000001_1'], { + 'projectGuid': PROJECT_GUID, 'familyGuid': 'F000001_1', 'displayName': '1', 'analysisStatus': 'Q', + 'assignedAnalyst': None, 'sampleTypes': [{'datasetType': 'SNV_INDEL', 'sampleType': 'WES', 'isActive': True}], + 'analysedBy': [{'createdBy': 'Test No Access User', 'dataType': 'SNP', 'lastModifiedDate': '2022-07-22T19:27:08.563+00:00'}], + }) def _assert_expected_rnaseq_response(self, response_json): self.assertDictEqual( diff --git a/seqr/views/utils/test_utils.py b/seqr/views/utils/test_utils.py index 441354347a..9ce88a67a7 100644 --- a/seqr/views/utils/test_utils.py +++ b/seqr/views/utils/test_utils.py @@ -730,6 +730,7 @@ def _get_list_param(call, param): } ANALYSIS_GROUP_FIELDS = {'analysisGroupGuid', 'description', 'name', 'projectGuid', 'familyGuids'} +DYNAMIC_ANALYSIS_GROUP_FIELDS = {'analysisGroupGuid', 'criteria', 'name', 'projectGuid'} FAMILY_FIELDS = { 'projectGuid', 'familyGuid', 'analysedBy', 'pedigreeImage', 'familyId', 'displayName', 'description', From ef42bee47374a9e9092355e1ec0801a656e67c6b Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 19 Apr 2024 14:49:57 -0400 Subject: [PATCH 034/736] update project tests --- seqr/views/apis/project_api_tests.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/seqr/views/apis/project_api_tests.py b/seqr/views/apis/project_api_tests.py index fe10392c22..16068f5bd8 100644 --- a/seqr/views/apis/project_api_tests.py +++ b/seqr/views/apis/project_api_tests.py @@ -16,7 +16,7 @@ PROJECT_FIELDS, LOCUS_LIST_FIELDS, PA_LOCUS_LIST_FIELDS, NO_INTERNAL_CASE_REVIEW_INDIVIDUAL_FIELDS, \ SAMPLE_FIELDS, FAMILY_FIELDS, INTERNAL_FAMILY_FIELDS, INTERNAL_INDIVIDUAL_FIELDS, INDIVIDUAL_FIELDS, TAG_TYPE_FIELDS, \ CASE_REVIEW_FAMILY_FIELDS, FAMILY_NOTE_FIELDS, MATCHMAKER_SUBMISSION_FIELDS, ANALYSIS_GROUP_FIELDS, \ - EXT_WORKSPACE_NAMESPACE, EXT_WORKSPACE_NAME + EXT_WORKSPACE_NAMESPACE, EXT_WORKSPACE_NAME, DYNAMIC_ANALYSIS_GROUP_FIELDS PROJECT_GUID = 'R0001_1kg' EMPTY_PROJECT_GUID = 'R0002_empty' @@ -485,10 +485,20 @@ def test_project_analysis_groups(self): response_json = response.json() response_keys = {'analysisGroupsByGuid'} self.assertSetEqual(set(response_json.keys()), response_keys) - self.assertEqual(len(response_json['analysisGroupsByGuid']), 2) + self.assertEqual(len(response_json['analysisGroupsByGuid']), 4) self.assertSetEqual( - set(next(iter(response_json['analysisGroupsByGuid'].values())).keys()), ANALYSIS_GROUP_FIELDS + set(response_json['analysisGroupsByGuid']['AG0000183_test_group'].keys()), ANALYSIS_GROUP_FIELDS ) + self.assertSetEqual( + set(response_json['analysisGroupsByGuid']['DAG0000002_my_new_cases'].keys()), DYNAMIC_ANALYSIS_GROUP_FIELDS + ) + + response = self.client.get(url.replace(PROJECT_GUID, DEMO_PROJECT_GUID)) + self.assertEqual(response.status_code, 200) + self.assertDictEqual(response.json(), {'analysisGroupsByGuid': {'DAG0000001_unsolved': { + 'analysisGroupGuid': 'DAG0000001_unsolved', 'projectGuid': None, 'name': 'Unsolved', + 'criteria': {'firstSample': ['SHOW_DATA_LOADED'], 'analysisStatus': ['I', 'P', 'C', 'Rncc', 'Rcpc']}, + }}}) def test_project_locus_lists(self): url = reverse(project_locus_lists, args=[PROJECT_GUID]) From d804734dcd64379ba2640e440424bfadd2606b26 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 19 Apr 2024 15:06:53 -0400 Subject: [PATCH 035/736] test analysis groups --- seqr/views/apis/analysis_group_api_tests.py | 57 ++++++++++++++++++++- 1 file changed, 55 insertions(+), 2 deletions(-) diff --git a/seqr/views/apis/analysis_group_api_tests.py b/seqr/views/apis/analysis_group_api_tests.py index dc4fc43267..385bd11ac9 100644 --- a/seqr/views/apis/analysis_group_api_tests.py +++ b/seqr/views/apis/analysis_group_api_tests.py @@ -2,8 +2,9 @@ from django.urls.base import reverse -from seqr.models import AnalysisGroup -from seqr.views.apis.analysis_group_api import update_analysis_group_handler, delete_analysis_group_handler +from seqr.models import AnalysisGroup, DynamicAnalysisGroup +from seqr.views.apis.analysis_group_api import update_analysis_group_handler, delete_analysis_group_handler, \ + update_dynamic_analysis_group_handler, delete_dynamic_analysis_group_handler from seqr.views.utils.test_utils import AuthenticationTestCase PROJECT_GUID = 'R0001_1kg' @@ -82,3 +83,55 @@ def test_create_update_and_delete_analysis_group(self): # check that analysis_group was deleted new_analysis_group = AnalysisGroup.objects.filter(guid=guid) self.assertEqual(len(new_analysis_group), 0) + + def test_create_update_and_delete_dynamic_analysis_group(self): + create_analysis_group_url = reverse(update_dynamic_analysis_group_handler, args=[PROJECT_GUID]) + self.check_manager_login(create_analysis_group_url) + + # send invalid requests to create analysis_group + response = self.client.post(create_analysis_group_url, content_type='application/json', data=json.dumps({})) + self.assertEqual(response.status_code, 400) + self.assertEqual(response.reason_phrase, 'Missing required field(s): Name, Criteria') + + # send valid request to create analysis_group + response = self.client.post(create_analysis_group_url, content_type='application/json', data=json.dumps({ + 'name': 'new_dynamic_group', 'criteria': {'analysisStatus': ['Q']}, + })) + self.assertEqual(response.status_code, 200) + new_analysis_group_response = response.json() + self.assertEqual(len(new_analysis_group_response['analysisGroupsByGuid']), 1) + new_analysis_group = next(iter(new_analysis_group_response['analysisGroupsByGuid'].values())) + self.assertEqual(new_analysis_group['name'], 'new_dynamic_group') + + guid = new_analysis_group['analysisGroupGuid'] + new_analysis_group_model = DynamicAnalysisGroup.objects.filter(guid=guid).first() + self.assertIsNotNone(new_analysis_group_model) + self.assertEqual(new_analysis_group_model.name, new_analysis_group['name']) + + # update the analysis_group + update_analysis_group_url = reverse(update_dynamic_analysis_group_handler, args=[PROJECT_GUID, guid]) + response = self.client.post(update_analysis_group_url, content_type='application/json', data=json.dumps( + {'name': 'updated_analysis_group', 'criteria': {'analysisStatus': ['I']}})) + + self.assertEqual(response.status_code, 200) + updated_analysis_group_response = response.json() + self.assertEqual(len(updated_analysis_group_response['analysisGroupsByGuid']), 1) + updated_analysis_group = next(iter(updated_analysis_group_response['analysisGroupsByGuid'].values())) + self.assertEqual(updated_analysis_group['name'], 'updated_analysis_group') + self.assertDictEqual(updated_analysis_group['criteria'], {'analysisStatus': ['I']}) + + updated_analysis_group_model = DynamicAnalysisGroup.objects.filter(guid=guid).first() + self.assertIsNotNone(updated_analysis_group_model) + self.assertEqual(updated_analysis_group_model.name, updated_analysis_group['name']) + self.assertEqual(updated_analysis_group_model.criteria, updated_analysis_group['criteria']) + + # delete the analysis_group + delete_analysis_group_url = reverse(delete_dynamic_analysis_group_handler, args=[PROJECT_GUID, guid]) + response = self.client.post(delete_analysis_group_url, content_type='application/json') + + self.assertEqual(response.status_code, 200) + self.assertDictEqual(response.json(), {'analysisGroupsByGuid': {guid: None}}) + + # check that analysis_group was deleted + new_analysis_group = DynamicAnalysisGroup.objects.filter(guid=guid) + self.assertEqual(len(new_analysis_group), 0) From 7ba4bdac3718f9896b1cd00a02596d087360d726 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 19 Apr 2024 15:08:02 -0400 Subject: [PATCH 036/736] clean up unreachable code --- seqr/views/apis/analysis_group_api.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/seqr/views/apis/analysis_group_api.py b/seqr/views/apis/analysis_group_api.py index d71a3f60ee..ccd0aebf80 100644 --- a/seqr/views/apis/analysis_group_api.py +++ b/seqr/views/apis/analysis_group_api.py @@ -78,20 +78,13 @@ def update_dynamic_analysis_group_handler(request, project_guid, analysis_group_ @login_and_policies_required -def delete_analysis_group_handler(request, project_guid, analysis_group_guid, model_cls=AnalysisGroup, validate_can_delete=lambda x: None): +def delete_analysis_group_handler(request, project_guid, analysis_group_guid, model_cls=AnalysisGroup): project = get_project_and_check_permissions(project_guid, request.user, can_edit=True) - analysis_group = model_cls.objects.get(guid=analysis_group_guid, project=project) - error = validate_can_delete(analysis_group) - if error: - raise error - analysis_group.delete_model(request.user, user_can_delete=True) + model_cls.objects.get(guid=analysis_group_guid, project=project).delete_model(request.user, user_can_delete=True) return create_json_response({'analysisGroupsByGuid': {analysis_group_guid: None}}) @login_and_policies_required def delete_dynamic_analysis_group_handler(request, project_guid, analysis_group_guid): - return delete_analysis_group_handler( - request, project_guid, analysis_group_guid, model_cls=DynamicAnalysisGroup, - validate_can_delete=lambda analysis_group: None if analysis_group.project_id else 'Cannot delete shared analysis group', - ) + return delete_analysis_group_handler(request, project_guid, analysis_group_guid, model_cls=DynamicAnalysisGroup) From c1fa4ca60579426e34a758638fe0fc69992115b4 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 19 Apr 2024 15:11:02 -0400 Subject: [PATCH 037/736] fix ui tests --- ui/pages/Search/selectors.test.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ui/pages/Search/selectors.test.js b/ui/pages/Search/selectors.test.js index ffb055f09f..efdebd7577 100644 --- a/ui/pages/Search/selectors.test.js +++ b/ui/pages/Search/selectors.test.js @@ -27,7 +27,7 @@ test('getIntitialSearch', () => { ) expect(getIntitialSearch( - NO_SEARCH_STATE, { match: { params: { analysisGroupGuid: ANALYSIS_GROUP_GUID } } }) + NO_SEARCH_STATE, { match: { params: { projectGuid: PROJECT_GUID, analysisGroupGuid: ANALYSIS_GROUP_GUID } } }) ).toEqual(EXPECTED_INITAL_SEARCH) expect(getIntitialSearch(NO_SEARCH_STATE, { match: { params: { analysisGroupGuid: 'foo' } } })).toEqual( { projectFamilies: [{ analysisGroupGuid: 'foo' }] } From 45d42aeb1ac6b18c0cad62d7773cc3f24656408a Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 19 Apr 2024 15:15:01 -0400 Subject: [PATCH 038/736] bum changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index dd16765048..5164fd92bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # _seqr_ Changes ## dev +* Adds dynamic analysis groups (REQUIRES DB MIGRATION) ## 4/4/24 * Add ability to import project metadata from gregor metadata From 1780d67da4f4fcb12aed0097aba0eab97f778d6c Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 19 Apr 2024 16:30:01 -0400 Subject: [PATCH 039/736] add ui tests --- ui/pages/Search/fixtures.js | 15 +++++++++++++++ ui/pages/Search/selectors.test.js | 6 +++++- ui/redux/selectors.test.js | 14 +++++++++++++- 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/ui/pages/Search/fixtures.js b/ui/pages/Search/fixtures.js index 4ad3b1cd94..4e8dc6a22a 100644 --- a/ui/pages/Search/fixtures.js +++ b/ui/pages/Search/fixtures.js @@ -3,6 +3,7 @@ export const PROJECT_GUID = 'R0237_1000_genomes_demo' export const FAMILY_GUID = 'F011652_1' export const ANALYSIS_GROUP_GUID = 'AG0000183_test_group' +export const DYNAMIC_ANALYSIS_GROUP_GUID = 'DAG0000183_test' export const SEARCH_HASH = 'd380ed0fd28c3127d07a64ea2ba907d7' export const GENE_ID = 'ENSG00000228198' export const SEARCH = { projectFamilies: [{ projectGuid: PROJECT_GUID, familyGuid: FAMILY_GUID}], search: {} } @@ -215,6 +216,20 @@ export const STATE = { name: "Test Group", projectGuid:PROJECT_GUID, }, + [DYNAMIC_ANALYSIS_GROUP_GUID]: { + analysisGroupGuid: DYNAMIC_ANALYSIS_GROUP_GUID, + createdDate: '2018-08-09T18:53:24.207Z', + name: 'Test Dynamic Group', + projectGuid: null, + criteria: { analysisStatus: ['Rncc', 'Rcpc'], analysedBy: ['SHOW_NOT_ANALYSED'] }, + }, + DAG0000184_test_2: { + analysisGroupGuid: 'DAG0000184_test_2', + createdDate: '2018-08-09T18:53:24.207Z', + name: 'Test Dynamic Group', + projectGuid: PROJECT_GUID, + criteria: { firstSample: ['SHOW_DATA_LOADED'], analysisStatus: ['I', 'P', 'C'] }, + }, }, locusListsByGuid: { [LOCUS_LIST_GUID]: LOCUS_LIST }, rnaSeqDataByIndividual: { I021474_na19679: { diff --git a/ui/pages/Search/selectors.test.js b/ui/pages/Search/selectors.test.js index efdebd7577..407e9e243a 100644 --- a/ui/pages/Search/selectors.test.js +++ b/ui/pages/Search/selectors.test.js @@ -1,7 +1,7 @@ import { getProjectDatasetTypes } from 'redux/selectors' import { getIntitialSearch, getLocusListOptions, getDatasetTypes } from './selectors' -import { STATE, SEARCH_HASH, SEARCH, PROJECT_GUID, FAMILY_GUID, ANALYSIS_GROUP_GUID, LOCUS_LIST } from './fixtures' +import { STATE, SEARCH_HASH, SEARCH, PROJECT_GUID, FAMILY_GUID, ANALYSIS_GROUP_GUID, DYNAMIC_ANALYSIS_GROUP_GUID, LOCUS_LIST } from './fixtures' const NO_SEARCH_STATE = { ...STATE, currentSearchHash: null } const EXPECTED_INITAL_SEARCH = { projectFamilies: [{ projectGuid: PROJECT_GUID, familyGuids: [FAMILY_GUID] }] } @@ -32,6 +32,10 @@ test('getIntitialSearch', () => { expect(getIntitialSearch(NO_SEARCH_STATE, { match: { params: { analysisGroupGuid: 'foo' } } })).toEqual( { projectFamilies: [{ analysisGroupGuid: 'foo' }] } ) + + expect(getIntitialSearch( + NO_SEARCH_STATE, { match: { params: { projectGuid: PROJECT_GUID, analysisGroupGuid: DYNAMIC_ANALYSIS_GROUP_GUID } } }) + ).toEqual(EXPECTED_INITAL_SEARCH) }) test('getLocusListOptions', () => { diff --git a/ui/redux/selectors.test.js b/ui/redux/selectors.test.js index 294fb06713..e8aeef6ab9 100644 --- a/ui/redux/selectors.test.js +++ b/ui/redux/selectors.test.js @@ -8,8 +8,9 @@ import { getUserOptions, getLocusListIntervalsByChromProject, getSpliceOutliersByChromFamily, + getProjectAnalysisGroupFamilyGuidsByGuid, } from './selectors' -import {FAMILY_GUID, GENE_ID, SEARCH, SEARCH_HASH, STATE} from "../pages/Search/fixtures"; +import {DYNAMIC_ANALYSIS_GROUP_GUID, FAMILY_GUID, GENE_ID, SEARCH, SEARCH_HASH, STATE} from "../pages/Search/fixtures"; test('getVariantTagNotesByByFamilyVariants', () => { const tagsNotesByGuid = getVariantTagNotesByFamilyVariants( @@ -85,3 +86,14 @@ test('getSpliceOutliersByChromFamily', () => { } }) }) + +test('getProjectAnalysisGroupFamilyGuidsByGuid', () => { + expect(getProjectAnalysisGroupFamilyGuidsByGuid(STATE, { projectGuid: 'R0237_1000_genomes_demo' })).toEqual({ + AG0000183_test_group: ['F011652_1'], + DAG0000183_test: ['F011652_1'], + DAG0000184_test_2: [], + }) + expect(getProjectAnalysisGroupFamilyGuidsByGuid(STATE, {})).toEqual({ + DAG0000183_test: [], + }) +}) From 57e5a6a134213de92bb6a6f583e6a1359e7f6772 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 19 Apr 2024 16:54:05 -0400 Subject: [PATCH 040/736] remove unused import --- seqr/views/apis/analysis_group_api.py | 1 - 1 file changed, 1 deletion(-) diff --git a/seqr/views/apis/analysis_group_api.py b/seqr/views/apis/analysis_group_api.py index ccd0aebf80..8147158321 100644 --- a/seqr/views/apis/analysis_group_api.py +++ b/seqr/views/apis/analysis_group_api.py @@ -1,4 +1,3 @@ -from django.core.exceptions import PermissionDenied import json from seqr.models import AnalysisGroup, DynamicAnalysisGroup, Family From b1b55a17c2b9b7cdc4c793c59500cb4d03622b0c Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 22 Apr 2024 11:44:05 -0400 Subject: [PATCH 041/736] update ui to support snv indel --- ui/pages/DataManagement/components/LoadData.jsx | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/ui/pages/DataManagement/components/LoadData.jsx b/ui/pages/DataManagement/components/LoadData.jsx index 33af8e3fcb..7a06bc8f6f 100644 --- a/ui/pages/DataManagement/components/LoadData.jsx +++ b/ui/pages/DataManagement/components/LoadData.jsx @@ -5,7 +5,13 @@ import { validators } from 'shared/components/form/FormHelpers' import FormWizard from 'shared/components/form/FormWizard' import { ButtonRadioGroup } from 'shared/components/form/Inputs' import LoadOptionsSelect from 'shared/components/form/LoadOptionsSelect' -import { SAMPLE_TYPE_EXOME, SAMPLE_TYPE_GENOME, DATASET_TYPE_SV_CALLS, DATASET_TYPE_MITO_CALLS } from 'shared/utils/constants' +import { + SAMPLE_TYPE_EXOME, + SAMPLE_TYPE_GENOME, + DATASET_TYPE_SV_CALLS, + DATASET_TYPE_MITO_CALLS, + DATASET_TYPE_SNV_INDEL_CALLS, +} from 'shared/utils/constants' const formatProjectOption = ({ name, projectGuid, dataTypeLastLoaded }) => ({ value: projectGuid, @@ -54,7 +60,11 @@ const LOAD_DATA_PAGES = [ name: 'datasetType', label: 'Dataset Type', component: ButtonRadioGroup, - options: [DATASET_TYPE_SV_CALLS, DATASET_TYPE_MITO_CALLS].map(value => ({ value, text: value })), + options: [ + DATASET_TYPE_SNV_INDEL_CALLS, + DATASET_TYPE_SV_CALLS, + DATASET_TYPE_MITO_CALLS, + ].map(value => ({ value, text: value.replace('_', '/') })), validate: validators.required, }, ], From 04068a401a53a3b9b1255c87785a09ac10c701c9 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 22 Apr 2024 11:57:57 -0400 Subject: [PATCH 042/736] allow new project loading for snv indel --- seqr/views/apis/data_manager_api.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index a9706a8e02..65ca847417 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -424,9 +424,14 @@ def validate_callset(request): @pm_or_data_manager_required def get_loaded_projects(request, sample_type, dataset_type): - projects = get_internal_projects().filter( - family__individual__sample__sample_type=sample_type, is_demo=False, - ).distinct().order_by('name').values('name', projectGuid=F('guid'), dataTypeLastLoaded=Max( + projects = get_internal_projects().filter(is_demo=False) + if dataset_type == Sample.DATASET_TYPE_VARIANT_CALLS: + exclude_sample_type = Sample.SAMPLE_TYPE_WES if sample_type == Sample.SAMPLE_TYPE_WGS else Sample.SAMPLE_TYPE_WGS + projects = projects.exclude(family__individual__sample__sample_type=exclude_sample_type) + # TODO filter for loadable projects from airtable + else: + projects = projects.filter(family__individual__sample__sample_type=sample_type) + projects = projects.distinct().order_by('name').values('name', projectGuid=F('guid'), dataTypeLastLoaded=Max( 'family__individual__sample__loaded_date', filter=Q(family__individual__sample__dataset_type=dataset_type), )) return create_json_response({'projects': list(projects)}) From 7ec9f19899e63fce93a022c864427d3846812e52 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Wed, 24 Apr 2024 18:05:54 -0400 Subject: [PATCH 043/736] panelapp use genes api --- panelapp/pa_locus_list_api_tests.py | 19 +- panelapp/panelapp_utils.py | 25 ++- .../test_resources/au_panel_3069_genes.json | 187 ------------------ ..._260_genes.json => au_panelapp_genes.json} | 182 ++++++++++++++++- ..._260_genes.json => uk_panelapp_genes.json} | 0 5 files changed, 208 insertions(+), 205 deletions(-) delete mode 100644 panelapp/test_resources/au_panel_3069_genes.json rename panelapp/test_resources/{au_panel_260_genes.json => au_panelapp_genes.json} (52%) rename panelapp/test_resources/{uk_panel_260_genes.json => uk_panelapp_genes.json} (100%) diff --git a/panelapp/pa_locus_list_api_tests.py b/panelapp/pa_locus_list_api_tests.py index 8ceecd7740..8b46eddbed 100644 --- a/panelapp/pa_locus_list_api_tests.py +++ b/panelapp/pa_locus_list_api_tests.py @@ -55,22 +55,21 @@ def test_import_all_panels(self): # Given all PanelApp gene lists and associated genes au_panels_p1_url = '{}/panels/?page=1'.format(PANEL_APP_API_URL_AU) au_panels_p2_url = '{}/panels/?page=2'.format(PANEL_APP_API_URL_AU) - uk_panels_p1_url = '{}/panels/?page=1'.format(PANEL_APP_API_URL_UK) - au_genes_260_url = '{}/panels/{}/genes/?page=1'.format(PANEL_APP_API_URL_AU, 260) - au_genes_3069_url = '{}/panels/{}/genes/?page=1'.format(PANEL_APP_API_URL_AU, 3069) - uk_genes_260_url = '{}/panels/{}/genes/?page=1'.format(PANEL_APP_API_URL_UK, 260) + au_genes_url = '{}/genes/?page=1'.format(PANEL_APP_API_URL_AU) au_panels_p1_json = _get_json_from_file('panelapp/test_resources/au_panelapp_panels_p1.json') au_panels_p2_json = _get_json_from_file('panelapp/test_resources/au_panelapp_panels_p2.json') + au_genes_json = _get_json_from_file('panelapp/test_resources/au_panelapp_genes.json') + + uk_panels_p1_url = '{}/panels/?page=1'.format(PANEL_APP_API_URL_UK) + uk_genes_url = '{}/genes/?page=1'.format(PANEL_APP_API_URL_UK) uk_panels_p1_json = _get_json_from_file('panelapp/test_resources/uk_panelapp_panels_p1.json') - au_genes_260_json = _get_json_from_file('panelapp/test_resources/au_panel_260_genes.json') - au_genes_3069_json = _get_json_from_file('panelapp/test_resources/au_panel_3069_genes.json') - uk_genes_260_json = _get_json_from_file('panelapp/test_resources/uk_panel_260_genes.json') + uk_genes_json = _get_json_from_file('panelapp/test_resources/uk_panelapp_genes.json') + responses.add(responses.GET, au_panels_p1_url, json=au_panels_p1_json, status=200) responses.add(responses.GET, au_panels_p2_url, json=au_panels_p2_json, status=200) + responses.add(responses.GET, au_genes_url, json=au_genes_json, status=200) responses.add(responses.GET, uk_panels_p1_url, json=uk_panels_p1_json, status=200) - responses.add(responses.GET, au_genes_260_url, json=au_genes_260_json, status=200) - responses.add(responses.GET, au_genes_3069_url, json=au_genes_3069_json, status=200) - responses.add(responses.GET, uk_genes_260_url, json=uk_genes_260_json, status=200) + responses.add(responses.GET, uk_genes_url, json=uk_genes_json, status=200) # URl argument is required with self.assertRaises(CommandError) as err: diff --git a/panelapp/panelapp_utils.py b/panelapp/panelapp_utils.py index 2834772735..99803a3181 100644 --- a/panelapp/panelapp_utils.py +++ b/panelapp/panelapp_utils.py @@ -1,6 +1,9 @@ import requests from django.db import transaction from django.utils import timezone +from urllib3.util import Retry +from requests import Session +from requests.adapters import HTTPAdapter from panelapp.models import PaLocusList, PaLocusListGene from seqr.models import LocusList as SeqrLocusList, LocusListGene as SeqrLocusListGene @@ -23,9 +26,11 @@ def _extract_ensembl_id_from_json(raw_gene_json): return None panels_url = '{}/panels/?page=1'.format(panel_app_api_url) - all_panels = _get_all_panels(panels_url, []) + genes_url = '{}/genes/?page=1'.format(panel_app_api_url) + genes_by_panel_id = _get_all_genes(genes_url, {}) + for panel in all_panels: panel_app_id = panel.get('id') logger.info('Importing panel id {}'.format(panel_app_id), user) @@ -33,7 +38,7 @@ def _extract_ensembl_id_from_json(raw_gene_json): with transaction.atomic(): panel_genes_url = '{}/panels/{}/genes'.format(panel_app_api_url, panel_app_id) pa_locus_list = _create_or_update_locus_list_from_panel(user, panel_genes_url, panel, label) - all_genes_for_panel = _get_all_genes_for_panel('{}/?page=1'.format(panel_genes_url), []) + all_genes_for_panel = genes_by_panel_id.get(panel_app_id, []) panel_genes_by_id = {_extract_ensembl_id_from_json(gene): gene for gene in all_genes_for_panel if _extract_ensembl_id_from_json(gene)} raw_ensbl_38_gene_ids_csv = ','.join(panel_genes_by_id.keys()) @@ -105,16 +110,22 @@ def _get_all_panels(panels_url, all_results): return _get_all_panels(next_page, all_results) -def _get_all_genes_for_panel(panel_genes_url, all_results): - resp = requests.get(panel_genes_url) +def _get_all_genes(genes_url: str, results_by_panel_id: dict): + resp = requests.get(genes_url) resp_json = resp.json() - all_results += resp_json.get('results', []) + + for result in resp_json.get('results', []): + if result.get('panel'): + panel_id = result['panel']['id'] + if panel_id not in results_by_panel_id: + results_by_panel_id[panel_id] = [] + results_by_panel_id[panel_id].append(result) next_page = resp_json.get('next', None) if next_page is None: - return all_results + return results_by_panel_id else: - return _get_all_genes_for_panel(next_page, all_results) + return _get_all_genes(next_page, results_by_panel_id) def _create_or_update_locus_list_from_panel(user, panelgenes_url, panel_json, label): diff --git a/panelapp/test_resources/au_panel_3069_genes.json b/panelapp/test_resources/au_panel_3069_genes.json deleted file mode 100644 index 494d0f17ab..0000000000 --- a/panelapp/test_resources/au_panel_3069_genes.json +++ /dev/null @@ -1,187 +0,0 @@ -{ - "count": 2, - "next": null, - "previous": null, - "results": [ - { - "gene_data": { - "alias": [ - "CMT2N", - "AlaRS" - ], - "biotype": "protein_coding", - "hgnc_id": "HGNC:20", - "gene_name": "alanyl-tRNA synthetase", - "omim_gene": [ - "601065" - ], - "alias_name": [ - "alanine tRNA ligase 1, cytoplasmic" - ], - "gene_symbol": "AARS", - "hgnc_symbol": "AARS", - "hgnc_release": "2017-11-03", - "ensembl_genes": { - "GRch37": { - "82": { - "location": "16:70286198-70323446", - "ensembl_id": "ENSG00000090861" - } - }, - "GRch38": { - "90": { - "location": "16:70252295-70289543", - "ensembl_id": "ENSG00000090861" - } - } - }, - "hgnc_date_symbol_changed": "1995-07-11" - }, - "entity_type": "gene", - "entity_name": "AARS", - "confidence_level": "3", - "penetrance": null, - "mode_of_pathogenicity": "", - "publications": [ - "20045102", - "22009580", - "22206013", - "30373780", - "26032230" - ], - "evidence": [ - "Expert Review Green", - "Royal Melbourne Hospital" - ], - "phenotypes": [ - "Charcot Marie Tooth disease, axonal, type 2N, 613287", - "HMSN, dHMN/dSMA" - ], - "mode_of_inheritance": "MONOALLELIC, autosomal or pseudoautosomal, imprinted status unknown", - "tags": [], - "panel": { - "id": 3069, - "hash_id": null, - "name": "Hereditary Neuropathy_CMT - isolated", - "disease_group": "Neurology and neurodevelopmental disorders", - "disease_sub_group": "", - "status": "public", - "version": "1.7", - "version_created": "2021-08-09T10:57:36.791182Z", - "relevant_disorders": [], - "stats": { - "number_of_genes": 106, - "number_of_strs": 0, - "number_of_regions": 0 - }, - "types": [ - { - "name": "Victorian Clinical Genetics Services", - "slug": "victorian-clinical-genetics-services", - "description": "Panel used by VCGS." - }, - { - "name": "Royal Melbourne Hospital", - "slug": "royal-melbourne-hospital", - "description": "Royal Melbourne Hospital" - }, - { - "name": "Rare Disease", - "slug": "rare-disease", - "description": "Rare disease panels" - } - ] - }, - "transcript": null - }, - { - "gene_data": { - "alias": [ - "KIAA0294", - "Gef10" - ], - "biotype": "protein_coding", - "hgnc_id": "HGNC:14103", - "gene_name": "Rho guanine nucleotide exchange factor 10", - "omim_gene": [ - "608136" - ], - "alias_name": null, - "gene_symbol": "ARHGEF10", - "hgnc_symbol": "ARHGEF10", - "hgnc_release": "2017-11-03", - "ensembl_genes": { - "GRch37": { - "82": { - "location": "8:1772142-1906807", - "ensembl_id": "ENSG00000104728" - } - }, - "GRch38": { - "90": { - "location": "8:1823976-1958641", - "ensembl_id": "ENSG00000104728" - } - } - }, - "hgnc_date_symbol_changed": "2000-12-01" - }, - "entity_type": "gene", - "entity_name": "ARHGEF10", - "confidence_level": "2", - "penetrance": null, - "mode_of_pathogenicity": "", - "publications": [ - "14508709", - "21719701", - "25025039", - "25275565", - "25091364" - ], - "evidence": [ - "Expert Review Amber", - "Royal Melbourne Hospital" - ], - "phenotypes": [ - "?Slowed nerve conduction velocity, AD, 608236", - "HMSN" - ], - "mode_of_inheritance": "MONOALLELIC, autosomal or pseudoautosomal, NOT imprinted", - "tags": [], - "panel": { - "id": 3069, - "hash_id": null, - "name": "Hereditary Neuropathy_CMT - isolated", - "disease_group": "Neurology and neurodevelopmental disorders", - "disease_sub_group": "", - "status": "public", - "version": "1.7", - "version_created": "2021-08-09T10:57:36.791182Z", - "relevant_disorders": [], - "stats": { - "number_of_genes": 106, - "number_of_strs": 0, - "number_of_regions": 0 - }, - "types": [ - { - "name": "Victorian Clinical Genetics Services", - "slug": "victorian-clinical-genetics-services", - "description": "Panel used by VCGS." - }, - { - "name": "Royal Melbourne Hospital", - "slug": "royal-melbourne-hospital", - "description": "Royal Melbourne Hospital" - }, - { - "name": "Rare Disease", - "slug": "rare-disease", - "description": "Rare disease panels" - } - ] - }, - "transcript": null - } - ] -} diff --git a/panelapp/test_resources/au_panel_260_genes.json b/panelapp/test_resources/au_panelapp_genes.json similarity index 52% rename from panelapp/test_resources/au_panel_260_genes.json rename to panelapp/test_resources/au_panelapp_genes.json index 91ca70264d..e9ac749f14 100644 --- a/panelapp/test_resources/au_panel_260_genes.json +++ b/panelapp/test_resources/au_panelapp_genes.json @@ -1,5 +1,5 @@ { - "count": 2, + "count": 4, "next": null, "previous": null, "results": [ @@ -189,6 +189,186 @@ ] }, "transcript": null + }, + { + "gene_data": { + "alias": [ + "CMT2N", + "AlaRS" + ], + "biotype": "protein_coding", + "hgnc_id": "HGNC:20", + "gene_name": "alanyl-tRNA synthetase", + "omim_gene": [ + "601065" + ], + "alias_name": [ + "alanine tRNA ligase 1, cytoplasmic" + ], + "gene_symbol": "AARS", + "hgnc_symbol": "AARS", + "hgnc_release": "2017-11-03", + "ensembl_genes": { + "GRch37": { + "82": { + "location": "16:70286198-70323446", + "ensembl_id": "ENSG00000090861" + } + }, + "GRch38": { + "90": { + "location": "16:70252295-70289543", + "ensembl_id": "ENSG00000090861" + } + } + }, + "hgnc_date_symbol_changed": "1995-07-11" + }, + "entity_type": "gene", + "entity_name": "AARS", + "confidence_level": "3", + "penetrance": null, + "mode_of_pathogenicity": "", + "publications": [ + "20045102", + "22009580", + "22206013", + "30373780", + "26032230" + ], + "evidence": [ + "Expert Review Green", + "Royal Melbourne Hospital" + ], + "phenotypes": [ + "Charcot Marie Tooth disease, axonal, type 2N, 613287", + "HMSN, dHMN/dSMA" + ], + "mode_of_inheritance": "MONOALLELIC, autosomal or pseudoautosomal, imprinted status unknown", + "tags": [], + "panel": { + "id": 3069, + "hash_id": null, + "name": "Hereditary Neuropathy_CMT - isolated", + "disease_group": "Neurology and neurodevelopmental disorders", + "disease_sub_group": "", + "status": "public", + "version": "1.7", + "version_created": "2021-08-09T10:57:36.791182Z", + "relevant_disorders": [], + "stats": { + "number_of_genes": 106, + "number_of_strs": 0, + "number_of_regions": 0 + }, + "types": [ + { + "name": "Victorian Clinical Genetics Services", + "slug": "victorian-clinical-genetics-services", + "description": "Panel used by VCGS." + }, + { + "name": "Royal Melbourne Hospital", + "slug": "royal-melbourne-hospital", + "description": "Royal Melbourne Hospital" + }, + { + "name": "Rare Disease", + "slug": "rare-disease", + "description": "Rare disease panels" + } + ] + }, + "transcript": null + }, + { + "gene_data": { + "alias": [ + "KIAA0294", + "Gef10" + ], + "biotype": "protein_coding", + "hgnc_id": "HGNC:14103", + "gene_name": "Rho guanine nucleotide exchange factor 10", + "omim_gene": [ + "608136" + ], + "alias_name": null, + "gene_symbol": "ARHGEF10", + "hgnc_symbol": "ARHGEF10", + "hgnc_release": "2017-11-03", + "ensembl_genes": { + "GRch37": { + "82": { + "location": "8:1772142-1906807", + "ensembl_id": "ENSG00000104728" + } + }, + "GRch38": { + "90": { + "location": "8:1823976-1958641", + "ensembl_id": "ENSG00000104728" + } + } + }, + "hgnc_date_symbol_changed": "2000-12-01" + }, + "entity_type": "gene", + "entity_name": "ARHGEF10", + "confidence_level": "2", + "penetrance": null, + "mode_of_pathogenicity": "", + "publications": [ + "14508709", + "21719701", + "25025039", + "25275565", + "25091364" + ], + "evidence": [ + "Expert Review Amber", + "Royal Melbourne Hospital" + ], + "phenotypes": [ + "?Slowed nerve conduction velocity, AD, 608236", + "HMSN" + ], + "mode_of_inheritance": "MONOALLELIC, autosomal or pseudoautosomal, NOT imprinted", + "tags": [], + "panel": { + "id": 3069, + "hash_id": null, + "name": "Hereditary Neuropathy_CMT - isolated", + "disease_group": "Neurology and neurodevelopmental disorders", + "disease_sub_group": "", + "status": "public", + "version": "1.7", + "version_created": "2021-08-09T10:57:36.791182Z", + "relevant_disorders": [], + "stats": { + "number_of_genes": 106, + "number_of_strs": 0, + "number_of_regions": 0 + }, + "types": [ + { + "name": "Victorian Clinical Genetics Services", + "slug": "victorian-clinical-genetics-services", + "description": "Panel used by VCGS." + }, + { + "name": "Royal Melbourne Hospital", + "slug": "royal-melbourne-hospital", + "description": "Royal Melbourne Hospital" + }, + { + "name": "Rare Disease", + "slug": "rare-disease", + "description": "Rare disease panels" + } + ] + }, + "transcript": null } ] } diff --git a/panelapp/test_resources/uk_panel_260_genes.json b/panelapp/test_resources/uk_panelapp_genes.json similarity index 100% rename from panelapp/test_resources/uk_panel_260_genes.json rename to panelapp/test_resources/uk_panelapp_genes.json From 3b082d0f86a38f7b21f29b3d68e6ddf18dd2c489 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Wed, 24 Apr 2024 18:14:15 -0400 Subject: [PATCH 044/736] clean imports --- panelapp/panelapp_utils.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/panelapp/panelapp_utils.py b/panelapp/panelapp_utils.py index 99803a3181..cb5b6551f9 100644 --- a/panelapp/panelapp_utils.py +++ b/panelapp/panelapp_utils.py @@ -1,9 +1,6 @@ import requests from django.db import transaction from django.utils import timezone -from urllib3.util import Retry -from requests import Session -from requests.adapters import HTTPAdapter from panelapp.models import PaLocusList, PaLocusListGene from seqr.models import LocusList as SeqrLocusList, LocusListGene as SeqrLocusListGene From 07ed9e497483a1f989be6c921f8b707814021c2c Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Thu, 25 Apr 2024 00:08:56 -0400 Subject: [PATCH 045/736] add timeout --- panelapp/panelapp_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/panelapp/panelapp_utils.py b/panelapp/panelapp_utils.py index cb5b6551f9..e656a54093 100644 --- a/panelapp/panelapp_utils.py +++ b/panelapp/panelapp_utils.py @@ -10,6 +10,8 @@ logger = SeqrLogger(__name__) +REQUEST_TIMEOUT_S = 300 + def import_all_panels(user, panel_app_api_url, label=None): def _extract_ensembl_id_from_json(raw_gene_json): @@ -108,7 +110,7 @@ def _get_all_panels(panels_url, all_results): def _get_all_genes(genes_url: str, results_by_panel_id: dict): - resp = requests.get(genes_url) + resp = requests.get(genes_url, timeout=REQUEST_TIMEOUT_S) resp_json = resp.json() for result in resp_json.get('results', []): From e0611662c4b43a5812935ef6c3bbbbaf99bf8930 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 25 Apr 2024 12:03:21 -0400 Subject: [PATCH 046/736] move unshared utility function --- ui/pages/Search/components/PageHeader.jsx | 5 +++-- ui/pages/Search/constants.js | 4 ---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/ui/pages/Search/components/PageHeader.jsx b/ui/pages/Search/components/PageHeader.jsx index 205150aede..58b2b74ca0 100644 --- a/ui/pages/Search/components/PageHeader.jsx +++ b/ui/pages/Search/components/PageHeader.jsx @@ -5,7 +5,6 @@ import { connect } from 'react-redux' import { getProjectsByGuid, getFamiliesByGuid, getAnalysisGroupsByGuid, getSearchesByHash } from 'redux/selectors' import PageHeaderLayout from 'shared/components/page/PageHeaderLayout' import { snakecaseToTitlecase } from 'shared/utils/stringUtils' -import { getSelectedAnalysisGroups } from '../constants' const PAGE_CONFIGS = { project: (entityGuid, projectsByGuid) => ({ @@ -33,7 +32,9 @@ const PAGE_CONFIGS = { pageType = 'family' specificEntityGuid = familyGuids[0] // eslint-disable-line prefer-destructuring } else { - const analysisGroups = getSelectedAnalysisGroups(analysisGroupsByGuid, familyGuids) + const analysisGroups = Object.values(analysisGroupsByGuid).filter( + group => group.familyGuids?.every(familyGuid => familyGuids.includes(familyGuid)), + ) if (analysisGroups.length === 1 && analysisGroups[0].familyGuids.length === familyGuids.length) { pageType = 'analysis_group' specificEntityGuid = analysisGroups[0].analysisGroupGuid diff --git a/ui/pages/Search/constants.js b/ui/pages/Search/constants.js index b6134dd203..64b1da7276 100644 --- a/ui/pages/Search/constants.js +++ b/ui/pages/Search/constants.js @@ -3,10 +3,6 @@ import { DE_NOVO_FILTER, ANY_AFFECTED, INHERITANCE_FILTER_OPTIONS, } from 'shared/utils/constants' -export const getSelectedAnalysisGroups = (analysisGroupsByGuid, familyGuids) => Object.values( - analysisGroupsByGuid, -).filter(group => group.familyGuids?.every(familyGuid => familyGuids.includes(familyGuid))) - const REF_REF = 'ref_ref' const HAS_REF = 'has_ref' const REF_ALT = 'ref_alt' From 0b51e9a0c9c302c56acc4344e533d063c810d214 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 25 Apr 2024 12:39:16 -0400 Subject: [PATCH 047/736] include full project json in options --- seqr/views/apis/data_manager_api.py | 4 ++-- ui/pages/DataManagement/components/LoadData.jsx | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index 65ca847417..9487b250f6 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -442,9 +442,9 @@ def load_data(request): request_json = json.loads(request.body) sample_type = request_json['sampleType'] dataset_type = request_json['datasetType'] - projects = request_json['projects'] + projects = [json.loads(project) for project in request_json['projects']] - project_models = Project.objects.filter(guid__in=projects) + project_models = Project.objects.filter(guid__in=[p['projectGuid'] for p in projects]) if len(project_models) < len(projects): missing = sorted(set(projects) - {p.guid for p in project_models}) return create_json_response({'error': f'The following projects are invalid: {", ".join(missing)}'}, status=400) diff --git a/ui/pages/DataManagement/components/LoadData.jsx b/ui/pages/DataManagement/components/LoadData.jsx index 7a06bc8f6f..96145c3cac 100644 --- a/ui/pages/DataManagement/components/LoadData.jsx +++ b/ui/pages/DataManagement/components/LoadData.jsx @@ -13,11 +13,11 @@ import { DATASET_TYPE_SNV_INDEL_CALLS, } from 'shared/utils/constants' -const formatProjectOption = ({ name, projectGuid, dataTypeLastLoaded }) => ({ - value: projectGuid, - text: name, - description: dataTypeLastLoaded && `Last Loaded: ${new Date(dataTypeLastLoaded).toLocaleDateString()}`, - color: dataTypeLastLoaded ? 'teal' : 'orange', +const formatProjectOption = opt => ({ + value: JSON.stringify(opt), + text: opt.name, + description: opt.dataTypeLastLoaded && `Last Loaded: ${new Date(opt.dataTypeLastLoaded).toLocaleDateString()}`, + color: opt.dataTypeLastLoaded ? 'teal' : 'orange', }) const renderLabel = ({ color, text }) => ({ color, content: text }) From 34261fba4c5e35f68195a631b12407583a7353c6 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 25 Apr 2024 13:40:41 -0400 Subject: [PATCH 048/736] fetch loadable samples from airtable --- seqr/views/apis/data_manager_api.py | 32 ++++++++++++++++++- .../DataManagement/components/LoadData.jsx | 5 ++- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index 9487b250f6..b4058431f1 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -20,6 +20,7 @@ from seqr.utils.vcf_utils import validate_vcf_exists from seqr.views.utils.airflow_utils import trigger_data_loading, write_data_loading_pedigree +from seqr.views.utils.airtable_utils import AirtableSession from seqr.views.utils.dataset_utils import load_rna_seq, load_phenotype_prioritization_data_file, RNA_DATA_TYPE_CONFIGS, \ post_process_rna_data from seqr.views.utils.file_utils import parse_file, get_temp_upload_directory, load_uploaded_file @@ -412,6 +413,11 @@ def write_pedigree(request, project_guid): Sample.DATASET_TYPE_SV_CALLS: ('.bed',), } +LOADABLE_PDO_STATUSES = [ + 'On hold for phenotips, but ready to load', + 'Methods (Loading)', +] + @pm_or_data_manager_required def validate_callset(request): @@ -425,18 +431,42 @@ def validate_callset(request): @pm_or_data_manager_required def get_loaded_projects(request, sample_type, dataset_type): projects = get_internal_projects().filter(is_demo=False) + project_samples = None if dataset_type == Sample.DATASET_TYPE_VARIANT_CALLS: + project_samples = _fetch_airtable_loadable_project_samples(request.user) + projects = projects.filter(guid__in=project_samples.keys()) exclude_sample_type = Sample.SAMPLE_TYPE_WES if sample_type == Sample.SAMPLE_TYPE_WGS else Sample.SAMPLE_TYPE_WGS projects = projects.exclude(family__individual__sample__sample_type=exclude_sample_type) - # TODO filter for loadable projects from airtable else: projects = projects.filter(family__individual__sample__sample_type=sample_type) + projects = projects.distinct().order_by('name').values('name', projectGuid=F('guid'), dataTypeLastLoaded=Max( 'family__individual__sample__loaded_date', filter=Q(family__individual__sample__dataset_type=dataset_type), )) + + if project_samples: + for project in projects: + project['sampleIds'] = project_samples[project['projectGuid']] + return create_json_response({'projects': list(projects)}) +def _fetch_airtable_loadable_project_samples(user): + pdos = AirtableSession(user).fetch_records( + 'PDO', fields=['PassingCollaboratorSampleIDs', 'SeqrIDs', 'PDOName', 'PDOStatus', 'SeqrProjectURL'], + or_filters={'PDOStatus': LOADABLE_PDO_STATUSES} + ) + project_samples = defaultdict(set) + for pdo in pdos.values(): + project_guid = re.match( + 'https://seqr.broadinstitute.org/project/([^/]+)/project_page', pdo['SeqrProjectURL'], + ).group(1) + project_samples[project_guid].update([ + sample_id for sample_id in pdo['PassingCollaboratorSampleIDs'] + pdo['SeqrIDs'] if sample_id + ]) + return project_samples + + @pm_or_data_manager_required def load_data(request): request_json = json.loads(request.body) diff --git a/ui/pages/DataManagement/components/LoadData.jsx b/ui/pages/DataManagement/components/LoadData.jsx index 96145c3cac..d4a32cf448 100644 --- a/ui/pages/DataManagement/components/LoadData.jsx +++ b/ui/pages/DataManagement/components/LoadData.jsx @@ -16,7 +16,10 @@ import { const formatProjectOption = opt => ({ value: JSON.stringify(opt), text: opt.name, - description: opt.dataTypeLastLoaded && `Last Loaded: ${new Date(opt.dataTypeLastLoaded).toLocaleDateString()}`, + description: [ + opt.sampleIds && `${opt.sampleIds.length} Samples to Load`, + opt.dataTypeLastLoaded && `Last Loaded: ${new Date(opt.dataTypeLastLoaded).toLocaleDateString()}`, + ].filter(val => val).join('; '), color: opt.dataTypeLastLoaded ? 'teal' : 'orange', }) From affe9558966b5e53764dc846cbe8a83443146357 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 25 Apr 2024 15:00:33 -0400 Subject: [PATCH 049/736] upload and validate sample subset --- seqr/views/apis/data_manager_api.py | 7 ++-- seqr/views/utils/airflow_utils.py | 60 ++++++++++++++++++++++++++--- 2 files changed, 58 insertions(+), 9 deletions(-) diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index b4058431f1..119412766f 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -473,17 +473,18 @@ def load_data(request): sample_type = request_json['sampleType'] dataset_type = request_json['datasetType'] projects = [json.loads(project) for project in request_json['projects']] + project_samples = {p['projectGuid']: p.get('sampleIds') for p in projects} - project_models = Project.objects.filter(guid__in=[p['projectGuid'] for p in projects]) + project_models = Project.objects.filter(guid__in=project_samples) if len(project_models) < len(projects): - missing = sorted(set(projects) - {p.guid for p in project_models}) + missing = sorted(set(project_samples.keys()) - {p.guid for p in project_models}) return create_json_response({'error': f'The following projects are invalid: {", ".join(missing)}'}, status=400) success_message = f'*{request.user.email}* triggered loading internal {sample_type} {dataset_type} data for {len(projects)} projects' trigger_data_loading( project_models, sample_type, dataset_type, request_json['filePath'], request.user, success_message, SEQR_SLACK_LOADING_NOTIFICATION_CHANNEL, f'ERROR triggering internal {sample_type} {dataset_type} loading', - is_internal=True, + is_internal=True, project_samples=project_samples if dataset_type == Sample.DATASET_TYPE_VARIANT_CALLS else None, ) return create_json_response({'success': True}) diff --git a/seqr/views/utils/airflow_utils.py b/seqr/views/utils/airflow_utils.py index af3e01146c..e24e835d29 100644 --- a/seqr/views/utils/airflow_utils.py +++ b/seqr/views/utils/airflow_utils.py @@ -1,6 +1,6 @@ from collections import defaultdict, OrderedDict from django.contrib.auth.models import User -from django.db.models import F +from django.db.models import F, Q, Count import google.auth from google.auth.transport.requests import AuthorizedSession import itertools @@ -12,6 +12,7 @@ from seqr.utils.communication_utils import safe_post_to_slack from seqr.utils.file_utils import does_file_exist from seqr.utils.logging_utils import SeqrLogger +from seqr.utils.middleware import ErrorsWarningsException from seqr.views.utils.export_utils import write_multiple_files_to_gs from settings import AIRFLOW_WEBSERVER_URL, SEQR_SLACK_LOADING_NOTIFICATION_CHANNEL @@ -27,7 +28,7 @@ class DagRunningException(Exception): def trigger_data_loading(projects: list[Project], sample_type: str, dataset_type: str, data_path: str, user: User, success_message: str, success_slack_channel: str, error_message: str, - genome_version: str = GENOME_VERSION_GRCh38, is_internal: bool = False): + genome_version: str = GENOME_VERSION_GRCh38, is_internal: bool = False, project_samples: dict = None): success = True dag_name = f'v03_pipeline-{_dag_dataset_type(sample_type, dataset_type)}' @@ -40,7 +41,8 @@ def trigger_data_loading(projects: list[Project], sample_type: str, dataset_type 'reference_genome': GENOME_VERSION_LOOKUP[genome_version], } - upload_info = _upload_data_loading_files(projects, is_internal, user, genome_version, sample_type) + upload_info = _upload_data_loading_files( + projects, is_internal, user, genome_version, sample_type, project_samples=project_samples) try: _check_dag_running_state(dag_name) @@ -106,7 +108,8 @@ def _dag_dataset_type(sample_type: str, dataset_type: str): def _upload_data_loading_files(projects: list[Project], is_internal: bool, - user: User, genome_version: str, sample_type: str, callset: str = 'Internal'): + user: User, genome_version: str, sample_type: str, callset: str = 'Internal', + project_samples: dict = None): file_annotations = OrderedDict({ 'Project_GUID': F('family__project__guid'), 'Family_GUID': F('family__guid'), 'Family_ID': F('family__family_id'), @@ -114,6 +117,8 @@ def _upload_data_loading_files(projects: list[Project], is_internal: bool, 'Paternal_ID': F('father__individual_id'), 'Maternal_ID': F('mother__individual_id'), 'Sex': F('sex'), }) annotations = {'project': F('family__project__guid'), **file_annotations} + if project_samples: + annotations['sampleCount'] = Count('sample', filter=Q(sample__is_active=True) & Q(sample__sample_type=sample_type)) data = Individual.objects.filter(family__project__in=projects).order_by('family_id', 'individual_id').values( **dict(annotations)) @@ -122,18 +127,61 @@ def _upload_data_loading_files(projects: list[Project], is_internal: bool, data_by_project[row.pop('project')].append(row) info = [] + errors = [] for project_guid, rows in data_by_project.items(): gs_path = _get_dag_project_gs_path(project_guid, genome_version, sample_type, is_internal, callset) try: - write_multiple_files_to_gs( - [(f'{project_guid}_pedigree', file_annotations.keys(), rows)], gs_path, user, file_format='tsv') + files, file_suffixes = _parse_project_upload_files(project_guid, rows, file_annotations.keys(), project_samples) + write_multiple_files_to_gs(files, gs_path, user, file_format='tsv', file_suffixes=file_suffixes) + except ValueError as e: + errors.append(str(e)) except Exception as e: logger.error(f'Uploading Pedigree to Google Storage failed. Errors: {e}', user, detail=rows) info.append(f'Pedigree file has been uploaded to {gs_path}') + if errors: + raise ErrorsWarningsException(errors) + return info +def _parse_project_upload_files(project_guid, rows, header, project_samples): + files = [(f'{project_guid}_pedigree', header, rows)] + file_suffixes = None + if project_samples and project_guid in project_samples: + _validate_project_samples(project_samples[project_guid], rows) + file_name = f'{project_guid}_ids' + files.append((file_name, ['s'], [{'s': sample_id} for sample_id in project_samples[project_guid]])) + file_suffixes = {file_name: 'txt'} + return files, file_suffixes + + +def _validate_project_samples(sample_ids, pedigree_rows): + individual_families = {} + loaded_family_individuals = defaultdict(set) + for row in pedigree_rows: + individual_id = row['Individual_ID'] + family_id = row['Family_ID'] + individual_families[individual_id] = family_id + if row['sampleCount']: + loaded_family_individuals[family_id].add(individual_id) + + missing_samples = sorted(set(sample_ids) - set(individual_families.keys())) + if missing_samples: + raise ValueError(f'The following samples are included in airtable but missing from seqr: {", ".join(missing_samples)}') + + airtable_families = defaultdict(set) + for sample_id in sample_ids: + airtable_families[individual_families[sample_id]].add(sample_id) + family_errors = [] + for family_id, family_samples in airtable_families.items(): + missing_family_samples = sorted(loaded_family_individuals[family_id] - family_samples) + if missing_family_samples: + family_errors.append(f'{family_id} ({", ".join(missing_family_samples)})') + if family_errors: + raise ValueError(f'The following families have previously loaded samples absent from airtable: {"; ".join(family_errors)}') + + def _get_dag_project_gs_path(project: str, genome_version: str, sample_type: str, is_internal: bool, callset: str): dag_name = f'RDG_{sample_type}_Broad_{callset}' if is_internal else f'AnVIL_{sample_type}' dag_path = f'{SEQR_DATASETS_GS_PATH}/{GENOME_VERSION_LOOKUP[genome_version]}/{dag_name}' From 3110c52e9b1313647db7a6a76150753b171df133 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Thu, 25 Apr 2024 15:07:36 -0400 Subject: [PATCH 050/736] use defaultdict --- panelapp/panelapp_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/panelapp/panelapp_utils.py b/panelapp/panelapp_utils.py index e656a54093..3ec5ca8504 100644 --- a/panelapp/panelapp_utils.py +++ b/panelapp/panelapp_utils.py @@ -1,3 +1,5 @@ +from collections import defaultdict + import requests from django.db import transaction from django.utils import timezone @@ -28,7 +30,7 @@ def _extract_ensembl_id_from_json(raw_gene_json): all_panels = _get_all_panels(panels_url, []) genes_url = '{}/genes/?page=1'.format(panel_app_api_url) - genes_by_panel_id = _get_all_genes(genes_url, {}) + genes_by_panel_id = _get_all_genes(genes_url, defaultdict(list)) for panel in all_panels: panel_app_id = panel.get('id') @@ -116,8 +118,6 @@ def _get_all_genes(genes_url: str, results_by_panel_id: dict): for result in resp_json.get('results', []): if result.get('panel'): panel_id = result['panel']['id'] - if panel_id not in results_by_panel_id: - results_by_panel_id[panel_id] = [] results_by_panel_id[panel_id].append(result) next_page = resp_json.get('next', None) From e49a3f6e99393f012a433699e5dde972cdd30f07 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 25 Apr 2024 16:07:38 -0400 Subject: [PATCH 051/736] fix unit tests --- seqr/views/apis/data_manager_api_tests.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index 307210a8b8..2263132c56 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -394,6 +394,7 @@ ['R0001_1kg', 'F000002_2', '2', 'HG00731', 'HG00732', 'HG00733', 'F'], ] +PROJECT_OPTION = {'dataTypeLastLoaded': '2018-02-05T06:31:55.397Z', 'name': 'Non-Analyst Project', 'projectGuid': 'R0004_non_analyst_project'} @mock.patch('seqr.views.utils.permissions_utils.PM_USER_GROUP', 'project-managers') class DataManagerAPITest(AuthenticationTestCase): @@ -1315,15 +1316,11 @@ def test_get_loaded_projects(self): response = self.client.get(url) self.assertEqual(response.status_code, 200) - self.assertDictEqual(response.json(), {'projects': [ - {'dataTypeLastLoaded': '2018-02-05T06:31:55.397Z', 'name': 'Non-Analyst Project', 'projectGuid': 'R0004_non_analyst_project'}, - ]}) + self.assertDictEqual(response.json(), {'projects': [PROJECT_OPTION]}) response = self.client.get(url.replace('SV', 'MITO')) self.assertEqual(response.status_code, 200) - self.assertDictEqual(response.json(), {'projects': [ - {'dataTypeLastLoaded': None, 'name': 'Non-Analyst Project', 'projectGuid': 'R0004_non_analyst_project'}, - ]}) + self.assertDictEqual(response.json(), {'projects': [{**PROJECT_OPTION, 'dataTypeLastLoaded': None}]}) # test data manager access self.login_data_manager_user() @@ -1360,7 +1357,7 @@ def test_load_data(self, mock_subprocess, mock_temp_dir, mock_open): mock_subprocess.return_value.wait.return_value = 0 mock_subprocess.return_value.communicate.return_value = b'', b'File not found' body = {'filePath': 'gs://test_bucket/mito_callset.mt', 'datasetType': 'MITO', 'sampleType': 'WGS', 'projects': [ - 'R0001_1kg', 'R0004_non_analyst_project', 'R0005_not_project', + json.dumps({'projectGuid': 'R0001_1kg'}), json.dumps(PROJECT_OPTION), json.dumps({'projectGuid': 'R0005_not_project'}), ]} response = self.client.post(url, content_type='application/json', data=json.dumps(body)) self.assertEqual(response.status_code, 400) From 502e2cd9a61d3bf6097c5a7d36a11c47be471db0 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 26 Apr 2024 13:06:04 -0400 Subject: [PATCH 052/736] test snv indel project options --- seqr/fixtures/users.json | 2 +- seqr/views/apis/data_manager_api.py | 4 +- seqr/views/apis/data_manager_api_tests.py | 71 +++++++++++++++++++++-- 3 files changed, 69 insertions(+), 8 deletions(-) diff --git a/seqr/fixtures/users.json b/seqr/fixtures/users.json index dd84250532..90810e96d3 100644 --- a/seqr/fixtures/users.json +++ b/seqr/fixtures/users.json @@ -161,7 +161,7 @@ "username": "test_data_manager", "first_name": "Test Data Manager", "last_name": "", - "email": "test_data_manager@test.com", + "email": "test_data_manager@broadinstitute.org", "is_staff": true, "is_active": true, "date_joined": "2017-03-12T23:09:54.180Z", diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index 119412766f..43d9ca5607 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -446,14 +446,14 @@ def get_loaded_projects(request, sample_type, dataset_type): if project_samples: for project in projects: - project['sampleIds'] = project_samples[project['projectGuid']] + project['sampleIds'] = sorted(project_samples[project['projectGuid']]) return create_json_response({'projects': list(projects)}) def _fetch_airtable_loadable_project_samples(user): pdos = AirtableSession(user).fetch_records( - 'PDO', fields=['PassingCollaboratorSampleIDs', 'SeqrIDs', 'PDOName', 'PDOStatus', 'SeqrProjectURL'], + 'PDO', fields=['PassingCollaboratorSampleIDs', 'SeqrIDs', 'SeqrProjectURL'], or_filters={'PDOStatus': LOADABLE_PDO_STATUSES} ) project_samples = defaultdict(set) diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index 2263132c56..22ed96dc97 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -10,7 +10,7 @@ update_rna_seq, load_rna_seq_sample_data, load_phenotype_prioritization_data, write_pedigree, validate_callset, \ get_loaded_projects, load_data from seqr.views.utils.orm_to_json_utils import _get_json_for_models -from seqr.views.utils.test_utils import AuthenticationTestCase, AirflowTestCase +from seqr.views.utils.test_utils import AuthenticationTestCase, AirflowTestCase, AirtableTest from seqr.utils.search.elasticsearch.es_utils_tests import urllib3_responses from seqr.models import Individual, RnaSeqOutlier, RnaSeqTpm, RnaSeqSpliceOutlier, Sample, Project, PhenotypePrioritization from settings import SEQR_SLACK_LOADING_NOTIFICATION_CHANNEL @@ -394,10 +394,51 @@ ['R0001_1kg', 'F000002_2', '2', 'HG00731', 'HG00732', 'HG00733', 'F'], ] -PROJECT_OPTION = {'dataTypeLastLoaded': '2018-02-05T06:31:55.397Z', 'name': 'Non-Analyst Project', 'projectGuid': 'R0004_non_analyst_project'} +PROJECT_OPTION = { + 'dataTypeLastLoaded': None, + 'name': 'Non-Analyst Project', + 'projectGuid': 'R0004_non_analyst_project', +} +PROJECT_SAMPLES_OPTION = {**PROJECT_OPTION, 'sampleIds': ['NA21234', 'NA21987', 'NA21988']} +EMPTY_PROJECT_OPTION = { + 'dataTypeLastLoaded': None, + 'name': 'Empty Project', + 'projectGuid': 'R0002_empty', + 'sampleIds': ['HG00738', 'HG00739'], +} + +AIRTABLE_PDO_RECORDS = { + 'records': [ + { + 'id': 'recW24C2CJW5lT64K', + 'fields': { + 'SeqrProjectURL': 'https://seqr.broadinstitute.org/project/R0002_empty/project_page', + 'PassingCollaboratorSampleIDs': ['HG00738', None], + 'SeqrIDs': [None, 'HG00739'], + } + }, + { + 'id': 'rec2B6OGmQpAkQW3s', + 'fields': { + 'SeqrProjectURL': 'https://seqr.broadinstitute.org/project/R0004_non_analyst_project/project_page', + 'PassingCollaboratorSampleIDs': ['NA21234', 'NA21987'], + 'SeqrIDs': [None, None], + } + }, + { + 'id': 'rec2Nkg10N1KssPc3', + 'fields': { + 'SeqrProjectURL': 'https://seqr.broadinstitute.org/project/R0004_non_analyst_project/project_page', + 'PassingCollaboratorSampleIDs': [None], + 'SeqrIDs': ['NA21988'], + } + }, + ] +} + @mock.patch('seqr.views.utils.permissions_utils.PM_USER_GROUP', 'project-managers') -class DataManagerAPITest(AuthenticationTestCase): +class DataManagerAPITest(AuthenticationTestCase, AirtableTest): fixtures = ['users', '1kg_project', 'reference_data'] @mock.patch('seqr.utils.search.elasticsearch.es_utils.ELASTICSEARCH_SERVICE_HOSTNAME', 'testhost') @@ -1310,23 +1351,43 @@ def test_validate_callset(self, mock_subprocess): response = self.client.post(url, content_type='application/json', data=json.dumps(body)) self.assertEqual(response.status_code, 200) + @mock.patch('seqr.views.utils.airtable_utils.is_google_authenticated', lambda x: True) + @responses.activate def test_get_loaded_projects(self): url = reverse(get_loaded_projects, args=['WGS', 'SV']) self.check_pm_login(url) response = self.client.get(url) self.assertEqual(response.status_code, 200) - self.assertDictEqual(response.json(), {'projects': [PROJECT_OPTION]}) + self.assertDictEqual(response.json(), {'projects': [{**PROJECT_OPTION, 'dataTypeLastLoaded': '2018-02-05T06:31:55.397Z'}]}) response = self.client.get(url.replace('SV', 'MITO')) self.assertEqual(response.status_code, 200) - self.assertDictEqual(response.json(), {'projects': [{**PROJECT_OPTION, 'dataTypeLastLoaded': None}]}) + self.assertDictEqual(response.json(), {'projects': [PROJECT_OPTION]}) # test data manager access self.login_data_manager_user() response = self.client.get(url) self.assertEqual(response.status_code, 200) + # test with airtable filter + responses.add( + responses.GET, 'https://api.airtable.com/v0/app3Y97xtbbaOopVR/PDO', json=AIRTABLE_PDO_RECORDS, status=200, + ) + snv_indel_url = url.replace('SV', 'SNV_INDEL') + response = self.client.get(snv_indel_url) + self.assertEqual(response.status_code, 200) + self.assertDictEqual(response.json(), {'projects': [EMPTY_PROJECT_OPTION, PROJECT_SAMPLES_OPTION]}) + self.assert_expected_airtable_call( + call_index=0, filter_formula="OR(PDOStatus='Methods (Loading)',PDOStatus='On hold for phenotips, but ready to load')", + fields=['PassingCollaboratorSampleIDs', 'SeqrIDs', 'SeqrProjectURL'], + ) + + # test projects with no data loaded are returned for any sample type + response = self.client.get(snv_indel_url.replace('WGS', 'WES')) + self.assertEqual(response.status_code, 200) + self.assertDictEqual(response.json(), {'projects': [EMPTY_PROJECT_OPTION]}) + @mock.patch('seqr.views.utils.permissions_utils.PM_USER_GROUP', 'project-managers') class LoadDataAPITest(AirflowTestCase): From b06e953c5d2e7f2e4e07a22b4c18c8aaf88a2bf8 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 26 Apr 2024 13:08:32 -0400 Subject: [PATCH 053/736] fix test --- .../tests/detect_inactive_priveleged_users_tests.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/seqr/management/tests/detect_inactive_priveleged_users_tests.py b/seqr/management/tests/detect_inactive_priveleged_users_tests.py index 267586047f..503b37661d 100644 --- a/seqr/management/tests/detect_inactive_priveleged_users_tests.py +++ b/seqr/management/tests/detect_inactive_priveleged_users_tests.py @@ -31,17 +31,17 @@ def test_command(self, mock_datetime, mock_logger, mock_send_mail): call_command('detect_inactive_privileged_users') self.assertFalse(User.objects.get(email='test_superuser@test.com').is_active) - self.assertTrue(User.objects.get(email='test_data_manager@test.com').is_active) + self.assertTrue(User.objects.get(email='test_data_manager@broadinstitute.org').is_active) mock_send_mail.assert_has_calls([ - mock.call('Warning: seqr account deactivation', WARNING_EMAIL, None, ['test_data_manager@test.com']), + mock.call('Warning: seqr account deactivation', WARNING_EMAIL, None, ['test_data_manager@broadinstitute.org']), mock.call('Warning: seqr account deactivated', DEACTIVATED_EMAIL, None, ['test_superuser@test.com']), ]) mock_logger.error.assert_called_with('Unable to send email: Connection error') mock_logger.info.assert_has_calls([ mock.call('Checking for inactive users'), - mock.call('Warning test_data_manager@test.com of impending account inactivation'), + mock.call('Warning test_data_manager@broadinstitute.org of impending account inactivation'), mock.call('Inactivating account for test_superuser@test.com'), mock.call('Inactive user check complete'), ]) From 7720e39373d92712cc98cf609f489be05a535054 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 26 Apr 2024 17:03:35 -0400 Subject: [PATCH 054/736] test trigger load with subset --- seqr/views/apis/data_manager_api_tests.py | 46 +++++++++++++++++++---- 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index 22ed96dc97..bf0b39608f 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -1485,19 +1485,51 @@ def test_load_data(self, mock_subprocess, mock_temp_dir, mock_open): """ self.mock_slack.assert_called_once_with(SEQR_SLACK_LOADING_NOTIFICATION_CHANNEL, error_message) - def _has_expected_gs_calls(self, mock_subprocess, mock_open, sample_type='WGS', **kwargs): + # Test loading with sample subset + mock_open.reset_mock() + mock_subprocess.reset_mock() + body.update({'datasetType': 'SNV_INDEL', 'sampleType': 'WGS', 'projects': [json.dumps(PROJECT_SAMPLES_OPTION)]}) + response = self.client.post(url, content_type='application/json', data=json.dumps(body)) + self.assertEqual(response.status_code, 400) + self.assertDictEqual(response.json(), { + 'warnings': None, + 'errors': ['The following samples are included in airtable but missing from seqr: NA21988'], + }) + + sample_ids = PROJECT_SAMPLES_OPTION['sampleIds'] + body['projects'] = [json.dumps({**PROJECT_OPTION, 'sampleIds': [sample_ids[1]]})] + response = self.client.post(url, content_type='application/json', data=json.dumps(body)) + self.assertEqual(response.status_code, 400) + self.assertDictEqual(response.json(), { + 'warnings': None, + 'errors': ['The following families have previously loaded samples absent from airtable: 14 (NA21234)'], + }) + + body['projects'] = [json.dumps({**PROJECT_OPTION, 'sampleIds': sample_ids[:2]})] + response = self.client.post(url, content_type='application/json', data=json.dumps(body)) + self.assertEqual(response.status_code, 200) + self.assertDictEqual(response.json(), {'success': True}) + self._has_expected_gs_calls(mock_subprocess, mock_open, has_project_subset=True) + + def _has_expected_gs_calls(self, mock_subprocess, mock_open, sample_type='WGS', has_project_subset=False, **kwargs): + projects = self.PROJECTS[1:] if has_project_subset else self.PROJECTS mock_open.assert_has_calls([ - mock.call(f'/mock/tmp/{project}_pedigree.tsv', 'w') for project in self.PROJECTS + mock.call(f'/mock/tmp/{project}_pedigree.tsv', 'w') for project in projects ], any_order=True) files = [ [row.split('\t') for row in write_call.args[0].split('\n')] for write_call in mock_open.return_value.__enter__.return_value.write.call_args_list ] self.assertEqual(len(files), 2) - self.assertEqual(len(files[0]), 15) - self.assertListEqual(files[0][:5], [PEDIGREE_HEADER] + EXPECTED_PEDIGREE_ROWS) - self.assertEqual(len(files[1]), 3) - self.assertListEqual(files[1], [ + if has_project_subset: + self.assertEqual(len(files[1]), 3) + self.assertListEqual(files[1], [['s'], ['NA21234'], ['NA21987']]) + else: + self.assertEqual(len(files[0]), 15) + self.assertListEqual(files[0][:5], [PEDIGREE_HEADER] + EXPECTED_PEDIGREE_ROWS) + ped_file = files[0 if has_project_subset else 1] + self.assertEqual(len(ped_file), 3) + self.assertListEqual(ped_file, [ PEDIGREE_HEADER, ['R0004_non_analyst_project', 'F000014_14', '14', 'NA21234', '', '', 'F'], ['R0004_non_analyst_project', 'F000014_14', '14', 'NA21987', '', '', 'M'], @@ -1507,5 +1539,5 @@ def _has_expected_gs_calls(self, mock_subprocess, mock_open, sample_type='WGS', mock.call( f'gsutil mv /mock/tmp/* gs://seqr-datasets/v02/GRCh38/RDG_{sample_type}_Broad_Internal/base/projects/{project}/', stdout=-1, stderr=-2, shell=True, # nosec - ) for project in self.PROJECTS + ) for project in projects ], any_order=True) From 0a8b1370db59ac55bb2bb920dd6486c7f9d5e389 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Mon, 29 Apr 2024 10:40:33 -0400 Subject: [PATCH 055/736] add gene field to manual sv form --- .../components/CreateVariantButton.jsx | 26 +++++++++++-------- ui/pages/Project/components/FamilyPage.jsx | 4 +-- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/ui/pages/Project/components/CreateVariantButton.jsx b/ui/pages/Project/components/CreateVariantButton.jsx index a74280cd5d..707b064a36 100644 --- a/ui/pages/Project/components/CreateVariantButton.jsx +++ b/ui/pages/Project/components/CreateVariantButton.jsx @@ -115,6 +115,15 @@ const POS_FIELD = { const START_FIELD = { name: 'pos', label: 'Start Position', ...POS_FIELD } const END_FIELD = { name: 'end', label: 'Stop Position', ...POS_FIELD } +const GENE_FIELD = { + name: GENE_ID_FIELD_NAME, + label: 'Gene', + control: AwesomeBarFormInput, + categories: ['genes'], + fluid: true, + placeholder: 'Search for gene', +} + const SAVED_VARIANT_FIELD = { name: VARIANTS_FIELD_NAME, idField: 'variantGuid', @@ -147,16 +156,7 @@ const SNV_FIELDS = [ { ...END_FIELD, validate: null }, { name: 'ref', label: 'Ref', validate: validators.required, width: 4 }, { name: 'alt', label: 'Alt', validate: validators.required, width: 4 }, - { - name: GENE_ID_FIELD_NAME, - label: 'Gene', - validate: validators.required, - control: AwesomeBarFormInput, - categories: ['genes'], - fluid: true, - width: 8, - placeholder: 'Search for gene', - }, + { ...GENE_FIELD, width: 8, validate: validators.required }, { name: TRANSCRIPT_ID_FIELD_NAME, label: 'Transcript ID', width: 6 }, { name: HGVSC_FIELD_NAME, label: 'HGVSC', width: 5, validate: validateHasTranscriptId }, { name: HGVSP_FIELD_NAME, label: 'HGVSP', width: 5, validate: validateHasTranscriptId }, @@ -181,6 +181,7 @@ const SV_FIELDS = [ CHROM_FIELD, START_FIELD, END_FIELD, + GENE_FIELD, GENOME_FIELD, TAG_FIELD, { name: SV_FIELD_NAME, validate: validators.required, label: 'SV Name', width: 8 }, @@ -205,7 +206,7 @@ const SV_FIELDS = [ max: 12, }, }, -].map(formatField) +].map(formatField).map(field => (field.validate ? { ...field, label: `${field.label}*` } : field)) const BaseCreateVariantButton = React.memo(({ variantType, family, user, ...props }) => ( user.isAnalyst ? ( @@ -246,6 +247,9 @@ const mapDispatchToProps = (dispatch, ownProps) => ({ if (variant.svName) { variant.variantId = values.svName + if (values[GENE_ID_FIELD_NAME]) { + variant.transcripts = { [values[GENE_ID_FIELD_NAME]]: [] } + } } else { variant.variantId = `${values.chrom}-${values.pos}-${values.ref}-${values.alt}` variant.transcripts = { diff --git a/ui/pages/Project/components/FamilyPage.jsx b/ui/pages/Project/components/FamilyPage.jsx index 3f5865df68..1dee86481a 100644 --- a/ui/pages/Project/components/FamilyPage.jsx +++ b/ui/pages/Project/components/FamilyPage.jsx @@ -25,7 +25,7 @@ import { getCurrentProject, getFamilyVariantSummaryLoading, getFamilyTagTypeCounts, } from '../selectors' import IndividualRow from './FamilyTable/IndividualRow' -import CreateVariantButton from './CreateVariantButton' +import CreateVariantButtons from './CreateVariantButton' import VariantTagTypeBar from './VariantTagTypeBar' import RnaSeqResultPage from './RnaSeqResultPage' @@ -86,7 +86,7 @@ const BaseVariantDetail = ( /> )} - + {project.isMmeEnabled && ( From 73c7af51d7669876d9a9b128ebe532e49315ed2e Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Mon, 29 Apr 2024 15:52:35 -0400 Subject: [PATCH 056/736] update model and add notification --- .../0064_alter_phenotypeprioritization.py | 27 +++++++++++++++++++ seqr/models.py | 5 +++- seqr/utils/search/add_data_utils.py | 21 ++++++++++++++- seqr/views/apis/data_manager_api.py | 13 ++++++--- 4 files changed, 61 insertions(+), 5 deletions(-) create mode 100644 seqr/migrations/0064_alter_phenotypeprioritization.py diff --git a/seqr/migrations/0064_alter_phenotypeprioritization.py b/seqr/migrations/0064_alter_phenotypeprioritization.py new file mode 100644 index 0000000000..90c07ca116 --- /dev/null +++ b/seqr/migrations/0064_alter_phenotypeprioritization.py @@ -0,0 +1,27 @@ +# Generated by Django 3.2.25 on 2024-04-29 15:57 + +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion +import django.utils.timezone + + +class Migration(migrations.Migration): + + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ('seqr', '0063_dynamicanalysisgroup'), + ] + + operations = [ + migrations.AddField( + model_name='phenotypeprioritization', + name='created_by', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL), + ), + migrations.AddField( + model_name='phenotypeprioritization', + name='created_date', + field=models.DateTimeField(db_index=True, default=django.utils.timezone.now), + ), + ] diff --git a/seqr/models.py b/seqr/models.py index aa89945fd6..b919d29320 100644 --- a/seqr/models.py +++ b/seqr/models.py @@ -1205,8 +1205,11 @@ class PhenotypePrioritization(BulkOperationBase): disease_name = models.TextField() scores = models.JSONField() + created_date = models.DateTimeField(default=timezone.now, db_index=True) + created_by = models.ForeignKey(User, null=True, blank=True, related_name='+', on_delete=models.SET_NULL) + def __unicode__(self): return "%s:%s:%s" % (self.individual.individual_id, self.gene_id, self.disease_id) class Meta: - json_fields = ['gene_id', 'tool', 'rank', 'disease_id', 'disease_name', 'scores'] + json_fields = ['gene_id', 'tool', 'rank', 'disease_id', 'disease_name', 'scores', 'created_date', 'created_by'] diff --git a/seqr/utils/search/add_data_utils.py b/seqr/utils/search/add_data_utils.py index 91366a6c74..2b800815b6 100644 --- a/seqr/utils/search/add_data_utils.py +++ b/seqr/utils/search/add_data_utils.py @@ -9,6 +9,9 @@ SEQR_SLACK_ANVIL_DATA_LOADING_CHANNEL +BASE_EMAIL_TEMPLATE = 'Dear seqr user,\n\n{}\n\nAll the best,\nThe seqr team' + + def _hail_backend_error(*args, **kwargs): raise ValueError('Adding samples is disabled for the hail backend') @@ -87,6 +90,22 @@ def notify_search_data_loaded(project, dataset_type, sample_type, inactivated_sa send_project_notification( project, notification=f'Loaded {sample_summary}', - email_body=f'Dear seqr user,\n\n{email}\n\nAll the best,\nThe seqr team', + email_body=BASE_EMAIL_TEMPLATE.format(email), subject='New data available in seqr', ) + + +def notify_phenotype_prioritization_loaded(project, tool, num_samples, file_path, user): + url = f'{BASE_URL}project/{project.guid}/project_page' + project_link = f'{project.name}' + email = ( + f'This is to notify you that {tool.title()} data for {num_samples} samples ' + f'has been loaded in seqr project {project_link} by {user.get_full_name()}' + ) + + send_project_notification( + project, + notification=f'Loaded {tool.title()} data from {file_path} for {num_samples} samples', + email_body=BASE_EMAIL_TEMPLATE.format(email), + subject=f'New {tool.title()} data available in seqr', + ) diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index a9706a8e02..81183b8f0e 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -14,6 +14,7 @@ from django.views.decorators.csrf import csrf_exempt from requests.exceptions import ConnectionError as RequestConnectionError +from seqr.utils.search.add_data_utils import notify_phenotype_prioritization_loaded from seqr.utils.search.utils import get_search_backend_status, delete_search_backend_data from seqr.utils.file_utils import file_iter, does_file_exist from seqr.utils.logging_utils import SeqrLogger @@ -356,7 +357,7 @@ def load_phenotype_prioritization_data(request): if missing_info or conflict_info: return create_json_response({'error': missing_info + conflict_info}, status=400) - all_records = [] + all_records_by_project_name = {} to_delete = PhenotypePrioritization.objects.none() error = None for project_name, records_by_indiv in data_by_project_indiv_id.items(): @@ -380,7 +381,7 @@ def load_phenotype_prioritization_data(request): info.append(f'Project {project_name}: {delete_info}loaded {len(indiv_records)} record(s)') to_delete |= exist_records - all_records += indiv_records + all_records_by_project_name[project_name] = indiv_records if error: return create_json_response({'error': error}, status=400) @@ -388,7 +389,13 @@ def load_phenotype_prioritization_data(request): if to_delete: PhenotypePrioritization.bulk_delete(request.user, to_delete) - PhenotypePrioritization.bulk_create(request.user, [PhenotypePrioritization(**data) for data in all_records]) + all_records = [record for indiv_records in all_records_by_project_name.values() for record in indiv_records] + PhenotypePrioritization.bulk_create(request.user, [PhenotypePrioritization(**data, created_by=user) for data in all_records]) + + for project_name, indiv_records in all_records_by_project_name.items(): + project = projects_by_name[project_name][0] + num_samples = len(indiv_records) + notify_phenotype_prioritization_loaded(project, tool, num_samples, file_path, request.user) return create_json_response({ 'info': info, From 104ec47af5e4c40a543d865a87768635db05ce01 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Wed, 1 May 2024 15:33:52 -0400 Subject: [PATCH 057/736] filter ui works --- seqr/views/apis/project_api.py | 4 ++++ seqr/views/utils/orm_to_json_utils.py | 4 ++++ ui/pages/Project/components/PhenotypePrioritizedGenes.jsx | 1 + ui/shared/utils/constants.js | 5 +++++ 4 files changed, 14 insertions(+) diff --git a/seqr/views/apis/project_api.py b/seqr/views/apis/project_api.py index 76860eccd9..40003061b1 100644 --- a/seqr/views/apis/project_api.py +++ b/seqr/views/apis/project_api.py @@ -197,6 +197,10 @@ def project_families(request, project_guid): JSONObject(paternalGuid='individual__father__guid', maternalGuid='individual__mother__guid'), filter=Q(individual__mother__isnull=False) | Q(individual__father__isnull=False), distinct=True, ), + phenotypePrioritizationTools=ArrayAgg( + 'individual__phenotypeprioritization__tool', distinct=True, + filter=Q(individual__phenotypeprioritization__tool__isnull=False), + ), ) families = _get_json_for_families( family_models, request.user, has_case_review_perm=has_case_review_permissions(project, request.user), diff --git a/seqr/views/utils/orm_to_json_utils.py b/seqr/views/utils/orm_to_json_utils.py index fd6d277917..95bd18ed22 100644 --- a/seqr/views/utils/orm_to_json_utils.py +++ b/seqr/views/utils/orm_to_json_utils.py @@ -296,6 +296,10 @@ def _get_json_for_individuals(individuals, user=None, project_guid=None, family_ 'maternalId': F('mother__individual_id'), 'paternalId': F('father__individual_id'), 'displayName': INDIVIDUAL_DISPLAY_NAME_EXPR, + # 'phenotypePrioritizationTools': ArrayAgg( + # 'phenotypeprioritization__tool', + # filter=Q(phenotypeprioritization__tool__isnull=False) + # ) } if add_sample_guids_field: additional_values.update({ diff --git a/ui/pages/Project/components/PhenotypePrioritizedGenes.jsx b/ui/pages/Project/components/PhenotypePrioritizedGenes.jsx index 6eeae095a0..d087a2185d 100644 --- a/ui/pages/Project/components/PhenotypePrioritizedGenes.jsx +++ b/ui/pages/Project/components/PhenotypePrioritizedGenes.jsx @@ -58,6 +58,7 @@ const PHENOTYPE_GENE_INFO_COLUMNS = [ const BasePhenotypePriGenes = React.memo(( { individualGuid, phenotypeGeneScores, familyGuid, loading, load }, ) => ( + // TODO might have to make a change here sampleType !== SAMPLE_TYPE_RNA && datasetType === dataType, ), })), + { + value: `${SHOW_DATA_LOADED}_PHENO`, + name: 'Data Loaded - Phenotype Prioritization', + createFilter: (family, user, samplesByFamily) => family.phenotypePrioritizationTools.length > 0, + }, ], } From 2f0c989644e38f9db09fae63872eebddf94edaf9 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Wed, 1 May 2024 16:25:42 -0400 Subject: [PATCH 058/736] show in sample fields --- seqr/views/utils/orm_to_json_utils.py | 12 ++++++++---- .../Project/components/FamilyTable/IndividualRow.jsx | 8 ++++++-- .../Project/components/PhenotypePrioritizedGenes.jsx | 8 +++++++- ui/pages/Project/constants.js | 5 +++++ ui/shared/components/panel/sample.jsx | 2 +- 5 files changed, 27 insertions(+), 8 deletions(-) diff --git a/seqr/views/utils/orm_to_json_utils.py b/seqr/views/utils/orm_to_json_utils.py index 95bd18ed22..f5113879a4 100644 --- a/seqr/views/utils/orm_to_json_utils.py +++ b/seqr/views/utils/orm_to_json_utils.py @@ -296,10 +296,14 @@ def _get_json_for_individuals(individuals, user=None, project_guid=None, family_ 'maternalId': F('mother__individual_id'), 'paternalId': F('father__individual_id'), 'displayName': INDIVIDUAL_DISPLAY_NAME_EXPR, - # 'phenotypePrioritizationTools': ArrayAgg( - # 'phenotypeprioritization__tool', - # filter=Q(phenotypeprioritization__tool__isnull=False) - # ) + 'phenotypePrioritizationTools': ArrayAgg( + JSONObject( + tool='phenotypeprioritization__tool', + createdDate='phenotypeprioritization__created_date', + ), + distinct=True, + filter=Q(phenotypeprioritization__tool__isnull=False) + ) } if add_sample_guids_field: additional_values.update({ diff --git a/ui/pages/Project/components/FamilyTable/IndividualRow.jsx b/ui/pages/Project/components/FamilyTable/IndividualRow.jsx index 07f1c21db2..f36daad16f 100644 --- a/ui/pages/Project/components/FamilyTable/IndividualRow.jsx +++ b/ui/pages/Project/components/FamilyTable/IndividualRow.jsx @@ -31,7 +31,7 @@ import { getSamplesByGuid, getMmeSubmissionsByGuid } from 'redux/selectors' import { HPO_FORM_FIELDS } from '../HpoTerms' import { CASE_REVIEW_STATUS_MORE_INFO_NEEDED, CASE_REVIEW_STATUS_OPTIONS, CASE_REVIEW_TABLE_NAME, INDIVIDUAL_DETAIL_FIELDS, - ONSET_AGE_OPTIONS, INHERITANCE_MODE_OPTIONS, INHERITANCE_MODE_LOOKUP, AR_FIELDS, + ONSET_AGE_OPTIONS, INHERITANCE_MODE_OPTIONS, INHERITANCE_MODE_LOOKUP, AR_FIELDS, PHENOTYPE_PRIORITIZATION_FIELDS, } from '../../constants' import { updateIndividuals } from '../../reducers' import { getCurrentProject, getParentOptionsByIndividual } from '../../selectors' @@ -179,7 +179,11 @@ const DataDetails = React.memo(({ loadedSamples, individual, mmeSubmission }) =>
)} - {SHOW_DATA_MODAL_CONFIG.filter(({ shouldShowField }) => individual[shouldShowField]).map( + {individual.phenotypePrioritizationTools.map( + tool =>
+ )} + {SHOW_DATA_MODAL_CONFIG.filter(({shouldShowField}) => individual[shouldShowField]).map( ({ modalName, title, modalSize, linkText, component }) => { const sample = loadedSamples.find(({ sampleType, isActive }) => isActive && sampleType === SAMPLE_TYPE_RNA) const titleIds = { sampleId: sample?.sampleId, individualId: individual.individualId } diff --git a/ui/pages/Project/components/PhenotypePrioritizedGenes.jsx b/ui/pages/Project/components/PhenotypePrioritizedGenes.jsx index d087a2185d..e97a2e38b0 100644 --- a/ui/pages/Project/components/PhenotypePrioritizedGenes.jsx +++ b/ui/pages/Project/components/PhenotypePrioritizedGenes.jsx @@ -9,6 +9,7 @@ import { BaseVariantGene } from 'shared/components/panel/variants/VariantGene' import { camelcaseToTitlecase } from 'shared/utils/stringUtils' import { loadPhenotypeGeneScores } from '../reducers' import { getPhenotypeDataLoading, getIndividualPhenotypeGeneScores } from '../selectors' +import {PHENOTYPE_PRIORITIZATION_FIELDS} from "../constants"; const PHENOTYPE_GENE_INFO_COLUMNS = [ { @@ -27,7 +28,12 @@ const PHENOTYPE_GENE_INFO_COLUMNS = [ /> ), }, - { name: 'tool', width: 1, content: 'Tool' }, + { + name: 'tool', + width: 1, + content: 'Tool', + format: ({ tool }) => PHENOTYPE_PRIORITIZATION_FIELDS[tool], + }, { name: 'diseaseName', width: 5, diff --git a/ui/pages/Project/constants.js b/ui/pages/Project/constants.js index f63465019d..1316295cca 100644 --- a/ui/pages/Project/constants.js +++ b/ui/pages/Project/constants.js @@ -504,3 +504,8 @@ export const TAG_FORM_FIELD = { parse: value => (value || []).map(name => ({ name })), validate: value => (value && value.length ? undefined : 'Required'), } + +export const PHENOTYPE_PRIORITIZATION_FIELDS = { + exomiser: 'Exomiser', + lirical: 'LIRICAL', +} diff --git a/ui/shared/components/panel/sample.jsx b/ui/shared/components/panel/sample.jsx index 9a98c00950..56bf048185 100644 --- a/ui/shared/components/panel/sample.jsx +++ b/ui/shared/components/panel/sample.jsx @@ -22,7 +22,7 @@ const Sample = React.memo(({ loadedSample, isOutdated, hoverDetails }) => ( {loadedSample && {loadedSample.sampleType}} - {loadedSample && loadedSample.datasetType !== DATASET_TYPE_SNV_INDEL_CALLS && ` - ${loadedSample.datasetType}`} + {loadedSample && loadedSample.datasetType && loadedSample.datasetType !== DATASET_TYPE_SNV_INDEL_CALLS && ` - ${loadedSample.datasetType}`} { !hoverDetails && (loadedSample ? ( From 71f1ace8c55337d37860f1d06e805a75427f9778 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Wed, 1 May 2024 16:45:08 -0400 Subject: [PATCH 059/736] share da code --- .../Project/components/CreateVariantButton.jsx | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/ui/pages/Project/components/CreateVariantButton.jsx b/ui/pages/Project/components/CreateVariantButton.jsx index 707b064a36..31f164eefb 100644 --- a/ui/pages/Project/components/CreateVariantButton.jsx +++ b/ui/pages/Project/components/CreateVariantButton.jsx @@ -148,7 +148,13 @@ const validateHasTranscriptId = (value, allValues, props, name) => { return allValues[TRANSCRIPT_ID_FIELD_NAME] ? undefined : `Transcript ID is required to include ${name}` } -const formatField = field => ({ inline: true, width: 16, ...field }) +const formatField = (field) => { + let formattedField = { inline: true, width: 16, ...field } + if (field.validate && field.validate !== validateHasTranscriptId) { + formattedField = { ...formattedField, label: `${field.label}*` } + } + return formattedField +} const SNV_FIELDS = [ CHROM_FIELD, @@ -173,9 +179,7 @@ const SNV_FIELDS = [ format: value => (value || {}).numAlt, }, }, -].map(formatField).map(field => ( - field.validate && field.validate !== validateHasTranscriptId ? { ...field, label: `${field.label}*` } : field -)) +].map(formatField) const SV_FIELDS = [ CHROM_FIELD, @@ -206,7 +210,7 @@ const SV_FIELDS = [ max: 12, }, }, -].map(formatField).map(field => (field.validate ? { ...field, label: `${field.label}*` } : field)) +].map(formatField) const BaseCreateVariantButton = React.memo(({ variantType, family, user, ...props }) => ( user.isAnalyst ? ( From 1112274456223591e234ebe00e66f1ec321d7985 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Thu, 2 May 2024 12:08:50 -0400 Subject: [PATCH 060/736] Update ui/pages/Project/components/CreateVariantButton.jsx Co-authored-by: hanars --- ui/pages/Project/components/CreateVariantButton.jsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ui/pages/Project/components/CreateVariantButton.jsx b/ui/pages/Project/components/CreateVariantButton.jsx index 31f164eefb..3d94c55e11 100644 --- a/ui/pages/Project/components/CreateVariantButton.jsx +++ b/ui/pages/Project/components/CreateVariantButton.jsx @@ -149,9 +149,9 @@ const validateHasTranscriptId = (value, allValues, props, name) => { } const formatField = (field) => { - let formattedField = { inline: true, width: 16, ...field } + const formattedField = { inline: true, width: 16, ...field } if (field.validate && field.validate !== validateHasTranscriptId) { - formattedField = { ...formattedField, label: `${field.label}*` } + formattedField.label =`${field.label}*` } return formattedField } From d6df0d281fd4663e63b8e59b115cbe427b43ea4c Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 2 May 2024 13:40:56 -0400 Subject: [PATCH 061/736] add comment --- seqr/views/apis/data_manager_api.py | 1 + 1 file changed, 1 insertion(+) diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index 43d9ca5607..9a7d9d4962 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -436,6 +436,7 @@ def get_loaded_projects(request, sample_type, dataset_type): project_samples = _fetch_airtable_loadable_project_samples(request.user) projects = projects.filter(guid__in=project_samples.keys()) exclude_sample_type = Sample.SAMPLE_TYPE_WES if sample_type == Sample.SAMPLE_TYPE_WGS else Sample.SAMPLE_TYPE_WGS + # Include projects with either the matched sample type OR with no loaded data projects = projects.exclude(family__individual__sample__sample_type=exclude_sample_type) else: projects = projects.filter(family__individual__sample__sample_type=sample_type) From 1e0c8643b767ca3f0158f464cc05768c8d70497e Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 2 May 2024 15:38:24 -0400 Subject: [PATCH 062/736] accept drs file path --- seqr/views/apis/igv_api.py | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/seqr/views/apis/igv_api.py b/seqr/views/apis/igv_api.py index 58fc92c1f0..6d0ae529fe 100644 --- a/seqr/views/apis/igv_api.py +++ b/seqr/views/apis/igv_api.py @@ -147,12 +147,13 @@ def update_individual_igv_sample(request, individual_guid): if not file_path: raise ValueError('request must contain fields: filePath') - sample_type = next((st for suffix, st in SAMPLE_TYPE_MAP if file_path.endswith(suffix)), None) + file_name = _get_valid_file_name(file_path, request.user) + if not file_name: + raise Exception('Error accessing "{}"'.format(file_path)) + sample_type = next((st for suffix, st in SAMPLE_TYPE_MAP if file_name.endswith(suffix)), None) if not sample_type: raise Exception('Invalid file extension for "{}" - valid extensions are {}'.format( - file_path, ', '.join([suffix for suffix, _ in SAMPLE_TYPE_MAP]))) - if not does_file_exist(file_path, user=request.user): - raise Exception('Error accessing "{}"'.format(file_path)) + file_name, ', '.join([suffix for suffix, _ in SAMPLE_TYPE_MAP]))) sample, created = get_or_create_model_from_json( IgvSample, create_json={'individual': individual, 'sample_type': sample_type}, @@ -172,6 +173,21 @@ def update_individual_igv_sample(request, individual_guid): return create_json_response({'error': error}, status=400, reason=error) +def _get_valid_file_name(file_path, user): + if not file_path.startswith('drs://'): + return file_path if does_file_exist(file_path, user=user) else None + + drs_path = file_path.split('/') + response = requests.get( + f'https://{drs_path[-2]}/ga4gh/drs/v1/objects/{drs_path[-1]}', + headers=_get_gs_auth_api_headers(user), + ) + if response.status_code != 200: + return None + #access = next((a['access_url'] for a in drs_info['access_methods'] if a.get('type') == 'https'), None) + return response.json()['name'] + + @login_and_policies_required def fetch_igv_track(request, project_guid, igv_track_path): @@ -198,8 +214,12 @@ def _stream_gs(request, gs_path): content_type='application/octet-stream') +def _get_gs_auth_api_headers(user): + return {'Authorization': 'Bearer {}'.format(_get_access_token(user))} + + def _get_gs_rest_api_headers(range_header, gs_path, user=None): - headers = {'Authorization': 'Bearer {}'.format(_get_access_token(user))} + headers = _get_gs_auth_api_headers(user) if range_header: headers['Range'] = range_header google_project = get_google_project(gs_path) From 944cd05d234175797336cccfcc174e8dc41ea7d7 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 2 May 2024 15:58:52 -0400 Subject: [PATCH 063/736] stream drs uri for igv --- seqr/views/apis/igv_api.py | 54 +++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 15 deletions(-) diff --git a/seqr/views/apis/igv_api.py b/seqr/views/apis/igv_api.py index 6d0ae529fe..262e354a9d 100644 --- a/seqr/views/apis/igv_api.py +++ b/seqr/views/apis/igv_api.py @@ -173,19 +173,26 @@ def update_individual_igv_sample(request, individual_guid): return create_json_response({'error': error}, status=400, reason=error) -def _get_valid_file_name(file_path, user): - if not file_path.startswith('drs://'): - return file_path if does_file_exist(file_path, user=user) else None +def _is_drs_uri_path(file_path): + return file_path.startswith('drs://') + +def _get_drs_info(file_path, user): drs_path = file_path.split('/') response = requests.get( f'https://{drs_path[-2]}/ga4gh/drs/v1/objects/{drs_path[-1]}', headers=_get_gs_auth_api_headers(user), ) - if response.status_code != 200: - return None - #access = next((a['access_url'] for a in drs_info['access_methods'] if a.get('type') == 'https'), None) - return response.json()['name'] + + return response.json() if response.status_code == 200 else None + + +def _get_valid_file_name(file_path, user): + if _is_drs_uri_path(file_path): + drs_info = _get_drs_info(file_path, user) + return None if drs_info is None else drs_info['name'] + + return file_path if does_file_exist(file_path, user=user) else None @login_and_policies_required @@ -199,16 +206,35 @@ def fetch_igv_track(request, project_guid, igv_track_path): if is_google_bucket_file_path(igv_track_path): return _stream_gs(request, igv_track_path) + if _is_drs_uri_path(igv_track_path): + return _stream_drs(request, igv_track_path) + return _stream_file(request, igv_track_path) def _stream_gs(request, gs_path): - headers = _get_gs_rest_api_headers(request.META.get('HTTP_RANGE'), gs_path, user=request.user) + return _stream_response( + url=f"{GS_STORAGE_URL}/{gs_path.replace('gs://', '', 1)}", + headers=_get_gs_rest_api_headers(gs_path, user=request.user), + request=request) - response = requests.get( - f"{GS_STORAGE_URL}/{gs_path.replace('gs://', '', 1)}", - headers=headers, - stream=True) + +def _stream_drs(request, file_path): + drs_info = _get_drs_info(file_path, request.user) + https_access = next(a['access_url'] for a in drs_info['access_methods'] if a['type'] == 'https') + + return _stream_response( + https_access['url'], + headers=dict([h.split(': ') for h in https_access['headers']]), + request=request) + + +def _stream_response(url, headers, request): + range_header = request.META.get('HTTP_RANGE') + if range_header: + headers['Range'] = range_header + + response = requests.get(url, headers=headers, stream=True) return StreamingHttpResponse(response.iter_content(chunk_size=65536), status=response.status_code, content_type='application/octet-stream') @@ -218,10 +244,8 @@ def _get_gs_auth_api_headers(user): return {'Authorization': 'Bearer {}'.format(_get_access_token(user))} -def _get_gs_rest_api_headers(range_header, gs_path, user=None): +def _get_gs_rest_api_headers(gs_path, user): headers = _get_gs_auth_api_headers(user) - if range_header: - headers['Range'] = range_header google_project = get_google_project(gs_path) if google_project: headers['x-goog-user-project'] = get_google_project(gs_path) From c615b272e7bfff4dbe6f1ac868c80179b17a6057 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 2 May 2024 16:32:40 -0400 Subject: [PATCH 064/736] actually store file type --- seqr/views/apis/igv_api.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/seqr/views/apis/igv_api.py b/seqr/views/apis/igv_api.py index 262e354a9d..41f9845a50 100644 --- a/seqr/views/apis/igv_api.py +++ b/seqr/views/apis/igv_api.py @@ -147,13 +147,14 @@ def update_individual_igv_sample(request, individual_guid): if not file_path: raise ValueError('request must contain fields: filePath') - file_name = _get_valid_file_name(file_path, request.user) - if not file_name: - raise Exception('Error accessing "{}"'.format(file_path)) - sample_type = next((st for suffix, st in SAMPLE_TYPE_MAP if file_name.endswith(suffix)), None) + original_file_path = file_path + file_path = _get_valid_file_path(file_path, request.user) + if not file_path: + raise Exception('Error accessing "{}"'.format(original_file_path)) + sample_type = next((st for suffix, st in SAMPLE_TYPE_MAP if file_path.endswith(suffix)), None) if not sample_type: raise Exception('Invalid file extension for "{}" - valid extensions are {}'.format( - file_name, ', '.join([suffix for suffix, _ in SAMPLE_TYPE_MAP]))) + file_path, ', '.join([suffix for suffix, _ in SAMPLE_TYPE_MAP]))) sample, created = get_or_create_model_from_json( IgvSample, create_json={'individual': individual, 'sample_type': sample_type}, @@ -180,17 +181,17 @@ def _is_drs_uri_path(file_path): def _get_drs_info(file_path, user): drs_path = file_path.split('/') response = requests.get( - f'https://{drs_path[-2]}/ga4gh/drs/v1/objects/{drs_path[-1]}', + f'https://{drs_path[2]}/ga4gh/drs/v1/objects/{drs_path[3]}', headers=_get_gs_auth_api_headers(user), ) return response.json() if response.status_code == 200 else None -def _get_valid_file_name(file_path, user): +def _get_valid_file_path(file_path, user): if _is_drs_uri_path(file_path): drs_info = _get_drs_info(file_path, user) - return None if drs_info is None else drs_info['name'] + return None if drs_info is None else f"{file_path}/{drs_info['name']}" return file_path if does_file_exist(file_path, user=user) else None @@ -221,12 +222,16 @@ def _stream_gs(request, gs_path): def _stream_drs(request, file_path): drs_info = _get_drs_info(file_path, request.user) + https_access = next(a['access_url'] for a in drs_info['access_methods'] if a['type'] == 'https') + url = https_access['url'] + headers = dict([h.split(': ') for h in https_access['headers']]) - return _stream_response( - https_access['url'], - headers=dict([h.split(': ') for h in https_access['headers']]), - request=request) + # TODO this does not actually result in a valid index file + if file_path.endswith('.cram.crai'): + url = url.replace('.cram', '.crai') + + return _stream_response(url, headers, request) def _stream_response(url, headers, request): From 0afdb1f0d18ebda3bb80f4b7750a2e9f698e8389 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Thu, 2 May 2024 16:49:33 -0400 Subject: [PATCH 065/736] changes per most review comments --- .../0064_alter_phenotypeprioritization.py | 37 +++++++++++++++++-- seqr/models.py | 8 ++-- seqr/utils/communication_utils.py | 7 +++- seqr/utils/search/add_data_utils.py | 21 +---------- seqr/views/apis/data_manager_api.py | 23 ++++++++++-- seqr/views/apis/family_api.py | 5 --- seqr/views/apis/family_api_tests.py | 3 +- seqr/views/apis/project_api.py | 8 ++-- .../components/FamilyTable/IndividualRow.jsx | 27 ++++++++++---- .../components/PhenotypePrioritizedGenes.jsx | 3 +- ui/shared/utils/constants.js | 3 +- 11 files changed, 90 insertions(+), 55 deletions(-) diff --git a/seqr/migrations/0064_alter_phenotypeprioritization.py b/seqr/migrations/0064_alter_phenotypeprioritization.py index 90c07ca116..0645d5a81f 100644 --- a/seqr/migrations/0064_alter_phenotypeprioritization.py +++ b/seqr/migrations/0064_alter_phenotypeprioritization.py @@ -1,10 +1,12 @@ -# Generated by Django 3.2.25 on 2024-04-29 15:57 - +# Generated by Django 3.2.25 on 2024-05-02 17:45 from django.conf import settings from django.db import migrations, models -import django.db.models.deletion import django.utils.timezone +from seqr.models import _slugify + +MAX_GUID_SIZE = 30 + class Migration(migrations.Migration): @@ -13,6 +15,17 @@ class Migration(migrations.Migration): ('seqr', '0063_dynamicanalysisgroup'), ] + def update_guids(apps, schema_editor): + PhenotypePrioritization = apps.get_model('seqr', 'PhenotypePrioritization') + db_alias = schema_editor.connection.alias + + pps = PhenotypePrioritization.objects.using(db_alias).all() + for pp in pps: + ids_as_str = "%s:%s:%s" % (pp.individual.individual_id, pp.gene_id, pp.disease_id) + pp.guid = 'PP%07d_%s' % (pp.id, _slugify(str(ids_as_str)))[:MAX_GUID_SIZE] + + PhenotypePrioritization.objects.using(db_alias).bulk_update(pps, ['guid']) + operations = [ migrations.AddField( model_name='phenotypeprioritization', @@ -24,4 +37,22 @@ class Migration(migrations.Migration): name='created_date', field=models.DateTimeField(db_index=True, default=django.utils.timezone.now), ), + migrations.AddField( + model_name='phenotypeprioritization', + name='guid', + field=models.CharField(default='', max_length=30), + preserve_default=False, + ), + migrations.AddField( + model_name='phenotypeprioritization', + name='last_modified_date', + field=models.DateTimeField(blank=True, db_index=True, null=True), + ), + migrations.RunPython(update_guids), + # Add uniqueness constraint to guid after default is replaced by update_guids + migrations.AlterField( + model_name='phenotypeprioritization', + name='guid', + field=models.CharField(db_index=True, unique=True, max_length=30), + ), ] diff --git a/seqr/models.py b/seqr/models.py index b919d29320..64636023a8 100644 --- a/seqr/models.py +++ b/seqr/models.py @@ -1193,7 +1193,7 @@ class Meta: 'delta_intron_jaccard_index', 'mean_counts', 'total_counts', 'mean_total_counts'] -class PhenotypePrioritization(BulkOperationBase): +class PhenotypePrioritization(ModelWithGUID, BulkOperationBase): PARENT_FIELD = 'individual' individual = models.ForeignKey('Individual', on_delete=models.CASCADE, db_index=True) @@ -1205,11 +1205,11 @@ class PhenotypePrioritization(BulkOperationBase): disease_name = models.TextField() scores = models.JSONField() - created_date = models.DateTimeField(default=timezone.now, db_index=True) - created_by = models.ForeignKey(User, null=True, blank=True, related_name='+', on_delete=models.SET_NULL) - def __unicode__(self): return "%s:%s:%s" % (self.individual.individual_id, self.gene_id, self.disease_id) + def _compute_guid(self): + return 'PP%07d_%s' % (self.id, _slugify(str(self))) + class Meta: json_fields = ['gene_id', 'tool', 'rank', 'disease_id', 'disease_name', 'scores', 'created_date', 'created_by'] diff --git a/seqr/utils/communication_utils.py b/seqr/utils/communication_utils.py index d00bac09b7..9baf6b6486 100644 --- a/seqr/utils/communication_utils.py +++ b/seqr/utils/communication_utils.py @@ -1,10 +1,13 @@ import logging from slacker import Slacker + from settings import SLACK_TOKEN, BASE_URL from django.core.mail import EmailMultiAlternatives from django.utils.html import strip_tags from notifications.signals import notify +BASE_EMAIL_TEMPLATE = 'Dear seqr user,\n\n{}\n\nAll the best,\nThe seqr team' + logger = logging.getLogger(__name__) @@ -52,11 +55,11 @@ def send_html_email(email_body, process_message=None, **kwargs): email_message.send() -def send_project_notification(project, notification, email_body, subject): +def send_project_notification(project, notification, email, subject): users = project.subscribers.user_set.all() notify.send(project, recipient=users, verb=notification) send_html_email( - email_body, + email_body=BASE_EMAIL_TEMPLATE.format(email), to=list(users.values_list('email', flat=True)), subject=subject, process_message=_set_bulk_notification_stream, diff --git a/seqr/utils/search/add_data_utils.py b/seqr/utils/search/add_data_utils.py index 2b800815b6..ece4f55d48 100644 --- a/seqr/utils/search/add_data_utils.py +++ b/seqr/utils/search/add_data_utils.py @@ -9,9 +9,6 @@ SEQR_SLACK_ANVIL_DATA_LOADING_CHANNEL -BASE_EMAIL_TEMPLATE = 'Dear seqr user,\n\n{}\n\nAll the best,\nThe seqr team' - - def _hail_backend_error(*args, **kwargs): raise ValueError('Adding samples is disabled for the hail backend') @@ -90,22 +87,6 @@ def notify_search_data_loaded(project, dataset_type, sample_type, inactivated_sa send_project_notification( project, notification=f'Loaded {sample_summary}', - email_body=BASE_EMAIL_TEMPLATE.format(email), + email=email, subject='New data available in seqr', ) - - -def notify_phenotype_prioritization_loaded(project, tool, num_samples, file_path, user): - url = f'{BASE_URL}project/{project.guid}/project_page' - project_link = f'{project.name}' - email = ( - f'This is to notify you that {tool.title()} data for {num_samples} samples ' - f'has been loaded in seqr project {project_link} by {user.get_full_name()}' - ) - - send_project_notification( - project, - notification=f'Loaded {tool.title()} data from {file_path} for {num_samples} samples', - email_body=BASE_EMAIL_TEMPLATE.format(email), - subject=f'New {tool.title()} data available in seqr', - ) diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index 81183b8f0e..ec6594bd84 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -14,7 +14,7 @@ from django.views.decorators.csrf import csrf_exempt from requests.exceptions import ConnectionError as RequestConnectionError -from seqr.utils.search.add_data_utils import notify_phenotype_prioritization_loaded +from seqr.utils.communication_utils import send_project_notification from seqr.utils.search.utils import get_search_backend_status, delete_search_backend_data from seqr.utils.file_utils import file_iter, does_file_exist from seqr.utils.logging_utils import SeqrLogger @@ -30,7 +30,7 @@ from seqr.models import Sample, Individual, Project, PhenotypePrioritization -from settings import KIBANA_SERVER, KIBANA_ELASTICSEARCH_PASSWORD, SEQR_SLACK_LOADING_NOTIFICATION_CHANNEL +from settings import KIBANA_SERVER, KIBANA_ELASTICSEARCH_PASSWORD, SEQR_SLACK_LOADING_NOTIFICATION_CHANNEL, BASE_URL logger = SeqrLogger(__name__) @@ -331,6 +331,21 @@ def load_rna_seq_sample_data(request, sample_guid): return create_json_response({'success': True}) +def _notify_phenotype_prioritization_loaded(project, tool, num_samples, user): + url = f'{BASE_URL}project/{project.guid}/project_page' + project_link = f'{project.name}' + email = ( + f'This is to notify you that {tool.title()} data for {num_samples} samples ' + f'has been loaded in seqr project {project_link} by {user.get_full_name()}' + ) + send_project_notification( + project, + notification=f'Loaded {num_samples} {tool.title()} samples', + email=email, + subject=f'New {tool.title()} data available in seqr', + ) + + @data_manager_required def load_phenotype_prioritization_data(request): request_json = json.loads(request.body) @@ -390,12 +405,12 @@ def load_phenotype_prioritization_data(request): PhenotypePrioritization.bulk_delete(request.user, to_delete) all_records = [record for indiv_records in all_records_by_project_name.values() for record in indiv_records] - PhenotypePrioritization.bulk_create(request.user, [PhenotypePrioritization(**data, created_by=user) for data in all_records]) + PhenotypePrioritization.bulk_create(request.user, [PhenotypePrioritization(**data, created_by=request.user) for data in all_records]) for project_name, indiv_records in all_records_by_project_name.items(): project = projects_by_name[project_name][0] num_samples = len(indiv_records) - notify_phenotype_prioritization_loaded(project, tool, num_samples, file_path, request.user) + _notify_phenotype_prioritization_loaded(project, tool, num_samples, request.user) return create_json_response({ 'info': info, diff --git a/seqr/views/apis/family_api.py b/seqr/views/apis/family_api.py index 028791da7f..62e67e2eae 100644 --- a/seqr/views/apis/family_api.py +++ b/seqr/views/apis/family_api.py @@ -80,11 +80,6 @@ def family_page_data(request, family_guid): for individual_guid in outlier_individual_guids: response['individualsByGuid'][individual_guid]['hasRnaOutlierData'] = True - has_phentoype_score_indivs = PhenotypePrioritization.objects.filter(individual__family=family).values_list( - 'individual__guid', flat=True) - for individual_guid in has_phentoype_score_indivs: - response['individualsByGuid'][individual_guid]['hasPhenotypeGeneScores'] = True - submissions = get_json_for_matchmaker_submissions(MatchmakerSubmission.objects.filter(individual__family=family)) individual_mme_submission_guids = {s['individualGuid']: s['submissionGuid'] for s in submissions} for individual in response['individualsByGuid'].values(): diff --git a/seqr/views/apis/family_api_tests.py b/seqr/views/apis/family_api_tests.py index 6d2ee42c95..ec218fb625 100644 --- a/seqr/views/apis/family_api_tests.py +++ b/seqr/views/apis/family_api_tests.py @@ -69,8 +69,7 @@ def test_family_page_data(self): self.assertEqual(len(response_json['individualsByGuid']), 3) individual = response_json['individualsByGuid'][INDIVIDUAL_GUID] - individual_fields = {'sampleGuids', 'igvSampleGuids', 'mmeSubmissionGuid', 'hasRnaOutlierData', - 'hasPhenotypeGeneScores'} + individual_fields = {'sampleGuids', 'igvSampleGuids', 'mmeSubmissionGuid', 'hasRnaOutlierData'} individual_fields.update(INDIVIDUAL_FIELDS) self.assertSetEqual(set(individual.keys()), individual_fields) self.assertListEqual( diff --git a/seqr/views/apis/project_api.py b/seqr/views/apis/project_api.py index 40003061b1..6bb30121ae 100644 --- a/seqr/views/apis/project_api.py +++ b/seqr/views/apis/project_api.py @@ -187,6 +187,9 @@ def project_families(request, project_guid): metadata_individual_count=Count('individual', filter=Q( individual__features__0__isnull=False, individual__birth_year__isnull=False, individual__population__isnull=False, individual__proband_relationship__isnull=False, + )), + pp_individual_count=Count('individual', filter=Q( + individual__phenotypeprioritization__tool__isnull=False, )) ) family_annotations = dict( @@ -197,10 +200,7 @@ def project_families(request, project_guid): JSONObject(paternalGuid='individual__father__guid', maternalGuid='individual__mother__guid'), filter=Q(individual__mother__isnull=False) | Q(individual__father__isnull=False), distinct=True, ), - phenotypePrioritizationTools=ArrayAgg( - 'individual__phenotypeprioritization__tool', distinct=True, - filter=Q(individual__phenotypeprioritization__tool__isnull=False), - ), + hasPhenotypePrioritization=Case(When(pp_individual_count__gt=0, then=Value(True)), default=Value(False)), ) families = _get_json_for_families( family_models, request.user, has_case_review_perm=has_case_review_permissions(project, request.user), diff --git a/ui/pages/Project/components/FamilyTable/IndividualRow.jsx b/ui/pages/Project/components/FamilyTable/IndividualRow.jsx index f36daad16f..fa4b2fa430 100644 --- a/ui/pages/Project/components/FamilyTable/IndividualRow.jsx +++ b/ui/pages/Project/components/FamilyTable/IndividualRow.jsx @@ -120,7 +120,8 @@ CaseReviewStatus.propTypes = { const SHOW_DATA_MODAL_CONFIG = [ { - shouldShowField: 'hasPhenotypeGeneScores', + // TODO this doesn't work any more + shouldShowField: 'phenotypePrioritizationTools', component: PhenotypePrioritizedGenes, modalName: ({ individualId }) => `PHENOTYPE-PRIORITIZATION-${individualId}`, title: ({ individualId }) => `Phenotype Prioritized Genes: ${individualId}`, @@ -146,7 +147,7 @@ MmeStatusLabel.propTypes = { mmeSubmission: PropTypes.object, } -const DataDetails = React.memo(({ loadedSamples, individual, mmeSubmission }) => ( +const DataDetails = React.memo(({ loadedSamples, individual, mmeSubmission, phenotypePrioritizationTools }) => (
{loadedSamples.map( sample =>
, @@ -179,11 +180,10 @@ const DataDetails = React.memo(({ loadedSamples, individual, mmeSubmission }) =>
)} - {individual.phenotypePrioritizationTools.map( - tool =>
+ { phenotypePrioritizationTools.map( + pp => , )} - {SHOW_DATA_MODAL_CONFIG.filter(({shouldShowField}) => individual[shouldShowField]).map( + {SHOW_DATA_MODAL_CONFIG.filter(({ shouldShowField }) => individual[shouldShowField]).map( ({ modalName, title, modalSize, linkText, component }) => { const sample = loadedSamples.find(({ sampleType, isActive }) => isActive && sampleType === SAMPLE_TYPE_RNA) const titleIds = { sampleId: sample?.sampleId, individualId: individual.individualId } @@ -210,6 +210,7 @@ DataDetails.propTypes = { mmeSubmission: PropTypes.object, individual: PropTypes.object, loadedSamples: PropTypes.arrayOf(PropTypes.object), + phenotypePrioritizationTools: PropTypes.arrayOf(PropTypes.object), } const formatGene = gene => `${gene.gene} ${gene.comments ? ` (${gene.comments.trim()})` : ''}` @@ -548,6 +549,10 @@ class IndividualRow extends React.PureComponent { // only show active or first/ last inactive samples loadedSamples = loadedSamples.filter((sample, i) => sample.isActive || i === 0 || i === loadedSamples.length - 1) + const phenotypePrioritizationTools = individual.phenotypePrioritizationTools.map( + pp => ({ sampleType: PHENOTYPE_PRIORITIZATION_FIELDS[pp.tool], loadedDate: pp.createdDate }), + ) + const leftContent = (
@@ -577,8 +582,14 @@ class IndividualRow extends React.PureComponent { const editCaseReview = tableName === CASE_REVIEW_TABLE_NAME const rightContent = editCaseReview ? - : - + : ( + + ) return ( ( - // TODO might have to make a change here family.phenotypePrioritizationTools.length > 0, + // eslint-disable-next-line no-unused-vars + createFilter: (family, _user, _samplesByFamily) => family.hasPhenotypePrioritization, }, ], } From 24de48d9b00b6335003bc409b63f723d0c54e7f5 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 2 May 2024 17:12:25 -0400 Subject: [PATCH 066/736] use provided index url --- .../0064_igvsample_index_file_path.py | 18 ++++++++++++++++ seqr/models.py | 3 ++- seqr/views/apis/igv_api.py | 21 ++++++++++++------- ui/shared/components/form/IGVUploadField.jsx | 2 +- .../components/panel/family/FamilyReads.jsx | 8 +++---- 5 files changed, 38 insertions(+), 14 deletions(-) create mode 100644 seqr/migrations/0064_igvsample_index_file_path.py diff --git a/seqr/migrations/0064_igvsample_index_file_path.py b/seqr/migrations/0064_igvsample_index_file_path.py new file mode 100644 index 0000000000..437cabb23f --- /dev/null +++ b/seqr/migrations/0064_igvsample_index_file_path.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.23 on 2024-05-02 20:48 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('seqr', '0063_dynamicanalysisgroup'), + ] + + operations = [ + migrations.AddField( + model_name='igvsample', + name='index_file_path', + field=models.TextField(blank=True, null=True), + ), + ] diff --git a/seqr/models.py b/seqr/models.py index aa89945fd6..46765f6702 100644 --- a/seqr/models.py +++ b/seqr/models.py @@ -742,6 +742,7 @@ class IgvSample(ModelWithGUID): individual = models.ForeignKey('Individual', on_delete=models.PROTECT) sample_type = models.CharField(max_length=15, choices=SAMPLE_TYPE_CHOICES) file_path = models.TextField() + index_file_path = models.TextField(null=True, blank=True) sample_id = models.TextField(null=True) def __unicode__(self): @@ -753,7 +754,7 @@ def _compute_guid(self): class Meta: unique_together = ('individual', 'sample_type') - json_fields = ['guid', 'file_path', 'sample_type', 'sample_id'] + json_fields = ['guid', 'file_path', 'index_file_path', 'sample_type', 'sample_id'] class SavedVariant(ModelWithGUID): diff --git a/seqr/views/apis/igv_api.py b/seqr/views/apis/igv_api.py index 41f9845a50..3c14fd9e3a 100644 --- a/seqr/views/apis/igv_api.py +++ b/seqr/views/apis/igv_api.py @@ -31,7 +31,14 @@ def _process_alignment_records(rows, num_id_cols=1, **kwargs): parsed_records = defaultdict(list) for row in rows: row_id = row[0] if num_id_cols == 1 else tuple(row[:num_id_cols]) - parsed_records[row_id].append({'filePath': row[num_id_cols], 'sampleId': row[num_cols] if len(row) > num_cols else None}) + sample_id = None + index_file_path = None + if len(row) > num_cols: + if _is_drs_uri_path(row[num_cols]): + index_file_path = row[num_cols] + else: + sample_id = row[num_cols] + parsed_records[row_id].append({'filePath': row[num_id_cols], 'sampleId': sample_id, 'indexFilePath': index_file_path}) return parsed_records @@ -158,7 +165,10 @@ def update_individual_igv_sample(request, individual_guid): sample, created = get_or_create_model_from_json( IgvSample, create_json={'individual': individual, 'sample_type': sample_type}, - update_json={'file_path': file_path, 'sample_id': request_json.get('sampleId')}, user=request.user) + update_json={ + 'file_path': file_path, + **{field: request_json.get(field) for field in ['sampleId', 'indexFilePath']} + }, user=request.user) response = { 'igvSamplesByGuid': { @@ -224,14 +234,9 @@ def _stream_drs(request, file_path): drs_info = _get_drs_info(file_path, request.user) https_access = next(a['access_url'] for a in drs_info['access_methods'] if a['type'] == 'https') - url = https_access['url'] headers = dict([h.split(': ') for h in https_access['headers']]) - # TODO this does not actually result in a valid index file - if file_path.endswith('.cram.crai'): - url = url.replace('.cram', '.crai') - - return _stream_response(url, headers, request) + return _stream_response(https_access['url'], headers, request) def _stream_response(url, headers, request): diff --git a/ui/shared/components/form/IGVUploadField.jsx b/ui/shared/components/form/IGVUploadField.jsx index dc2f412fa6..af244d2e96 100644 --- a/ui/shared/components/form/IGVUploadField.jsx +++ b/ui/shared/components/form/IGVUploadField.jsx @@ -39,7 +39,7 @@ IgvDropzoneLabel.propTypes = { const NO_PROJECT_COLUMNS = [ 'Individual ID', 'IGV Track File Path', - 'gCNV Sample ID, to identify the sample in the gCNV batch path. Not used for other track types', + 'For gCNV data: Sample ID, to identify the sample in the gCNV batch path. For other track types: Index File Path', ] // eslint-disable-next-line react-perf/jsx-no-new-array-as-prop diff --git a/ui/shared/components/panel/family/FamilyReads.jsx b/ui/shared/components/panel/family/FamilyReads.jsx index ddea5e7f07..bba09bd91f 100644 --- a/ui/shared/components/panel/family/FamilyReads.jsx +++ b/ui/shared/components/panel/family/FamilyReads.jsx @@ -30,6 +30,8 @@ const IGV = React.lazy(() => import('../../graph/IGV')) const MIN_LOCUS_RANGE_SIZE = 100 +const igvUrl = (sample, field = 'filePath') => `/api/project/${sample.projectGuid}/igv_track/${encodeURIComponent(sample[field])}` + const getTrackOptions = (type, sample, individual) => { const name = ReactDOMServer.renderToString( @@ -38,9 +40,7 @@ const getTrackOptions = (type, sample, individual) => { , ) - const url = `/api/project/${sample.projectGuid}/igv_track/${encodeURIComponent(sample.filePath)}` - - return { url, name, type, ...TRACK_OPTIONS[type] } + return { url: igvUrl(sample), name, type, ...TRACK_OPTIONS[type] } } const getSampleColor = individual => (individual.affected === AFFECTED ? 'red' : 'blue') @@ -75,7 +75,7 @@ const getIgvTracks = (igvSampleIndividuals, sortedIndividuals, sampleTypes) => { if (sample.filePath.endsWith('.cram')) { Object.assign(track, { format: 'cram', - indexURL: `${track.url}.crai`, + indexURL: sample.indexFilePath ? igvUrl(sample, 'indexFilePath') : `${track.url}.crai`, }) } else { Object.assign(track, BAM_TRACK_OPTIONS) From 59a04ba3d0825ee656dc9719983993334c066a00 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 2 May 2024 17:13:26 -0400 Subject: [PATCH 067/736] add validation for index files --- seqr/views/apis/igv_api.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/seqr/views/apis/igv_api.py b/seqr/views/apis/igv_api.py index 3c14fd9e3a..31f84eae7c 100644 --- a/seqr/views/apis/igv_api.py +++ b/seqr/views/apis/igv_api.py @@ -162,6 +162,8 @@ def update_individual_igv_sample(request, individual_guid): if not sample_type: raise Exception('Invalid file extension for "{}" - valid extensions are {}'.format( file_path, ', '.join([suffix for suffix, _ in SAMPLE_TYPE_MAP]))) + if _is_drs_uri_path(file_path) and not request_json.get('indexFilePath'): + raise Exception('Index File Path is required for DRS URIs') sample, created = get_or_create_model_from_json( IgvSample, create_json={'individual': individual, 'sample_type': sample_type}, From ffcd5405a04cac153c848d18abc29bc52c368386 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 3 May 2024 10:08:42 -0400 Subject: [PATCH 068/736] fix lint error in createvariantbutton --- ui/pages/Project/components/CreateVariantButton.jsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ui/pages/Project/components/CreateVariantButton.jsx b/ui/pages/Project/components/CreateVariantButton.jsx index 3d94c55e11..197f2bc491 100644 --- a/ui/pages/Project/components/CreateVariantButton.jsx +++ b/ui/pages/Project/components/CreateVariantButton.jsx @@ -151,7 +151,7 @@ const validateHasTranscriptId = (value, allValues, props, name) => { const formatField = (field) => { const formattedField = { inline: true, width: 16, ...field } if (field.validate && field.validate !== validateHasTranscriptId) { - formattedField.label =`${field.label}*` + formattedField.label = `${field.label}*` } return formattedField } From f6177c98b0b6ac9e00a36134e35216296768712c Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 3 May 2024 11:01:15 -0400 Subject: [PATCH 069/736] shouldshow function --- ui/pages/Project/components/FamilyTable/IndividualRow.jsx | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/ui/pages/Project/components/FamilyTable/IndividualRow.jsx b/ui/pages/Project/components/FamilyTable/IndividualRow.jsx index fa4b2fa430..df6c8a8340 100644 --- a/ui/pages/Project/components/FamilyTable/IndividualRow.jsx +++ b/ui/pages/Project/components/FamilyTable/IndividualRow.jsx @@ -120,8 +120,7 @@ CaseReviewStatus.propTypes = { const SHOW_DATA_MODAL_CONFIG = [ { - // TODO this doesn't work any more - shouldShowField: 'phenotypePrioritizationTools', + shouldShow: individual => individual.phenotypePrioritizationTools.length > 0, component: PhenotypePrioritizedGenes, modalName: ({ individualId }) => `PHENOTYPE-PRIORITIZATION-${individualId}`, title: ({ individualId }) => `Phenotype Prioritized Genes: ${individualId}`, @@ -181,9 +180,9 @@ const DataDetails = React.memo(({ loadedSamples, individual, mmeSubmission, phen
)} { phenotypePrioritizationTools.map( - pp => , + pp =>
, )} - {SHOW_DATA_MODAL_CONFIG.filter(({ shouldShowField }) => individual[shouldShowField]).map( + {SHOW_DATA_MODAL_CONFIG.filter(({ shouldShow }) => shouldShow(individual)).map( ({ modalName, title, modalSize, linkText, component }) => { const sample = loadedSamples.find(({ sampleType, isActive }) => isActive && sampleType === SAMPLE_TYPE_RNA) const titleIds = { sampleId: sample?.sampleId, individualId: individual.individualId } From a45e251fad2807e387cab5a5491ba2f59f537cd7 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 3 May 2024 13:20:07 -0400 Subject: [PATCH 070/736] backend tests --- seqr/fixtures/1kg_project.json | 16 ++++++- seqr/models.py | 2 +- seqr/views/apis/data_manager_api.py | 10 ++++- seqr/views/apis/data_manager_api_tests.py | 53 ++++++++++++----------- seqr/views/apis/project_api_tests.py | 4 +- seqr/views/utils/test_utils.py | 2 +- 6 files changed, 54 insertions(+), 33 deletions(-) diff --git a/seqr/fixtures/1kg_project.json b/seqr/fixtures/1kg_project.json index ff9b601f52..7da1608c01 100644 --- a/seqr/fixtures/1kg_project.json +++ b/seqr/fixtures/1kg_project.json @@ -1041,7 +1041,7 @@ "created_date": "2017-02-05T06:42:55.397Z", "created_by": null, "last_modified_date": "2017-03-13T09:07:50.052Z", - + "sample_id": "NA20872", "sample_type": "WES", "is_active": false, @@ -1078,7 +1078,7 @@ "created_date": "2017-02-05T06:42:55.397Z", "created_by": null, "last_modified_date": "2017-03-13T09:07:50.111Z", - + "sample_id": "NA20875", "sample_type": "WES", "is_active": false, @@ -1553,6 +1553,9 @@ "model": "seqr.phenotypeprioritization", "pk": 1, "fields": { + "guid": "PP000001_NA19675_1ENSG00000268", + "created_date": "2024-05-02T06:42:55.397Z", + "created_by": null, "individual": 1, "gene_id": "ENSG00000268903", "tool": "exomiser", @@ -1570,6 +1573,9 @@ "model": "seqr.phenotypeprioritization", "pk": 2, "fields": { + "guid": "PP000002_NA19675_ENSG000002689", + "created_date": "2024-05-02T06:42:55.397Z", + "created_by": null, "individual": 1, "gene_id": "ENSG00000268903", "tool": "exomiser", @@ -1587,6 +1593,9 @@ "model": "seqr.phenotypeprioritization", "pk": 3, "fields": { + "guid": "PP000003_NA19678_ENSG000002689", + "created_date": "2024-05-02T06:42:55.397Z", + "created_by": null, "individual": 2, "gene_id": "ENSG00000268903", "tool": "lirical", @@ -1603,6 +1612,9 @@ "model": "seqr.phenotypeprioritization", "pk": 4, "fields": { + "guid": "PP000004_NA19675_ENSG000002689", + "created_date": "2024-05-02T06:42:55.397Z", + "created_by": null, "individual": 1, "gene_id": "ENSG00000268904", "tool": "lirical", diff --git a/seqr/models.py b/seqr/models.py index 64636023a8..933bf73e44 100644 --- a/seqr/models.py +++ b/seqr/models.py @@ -1193,7 +1193,7 @@ class Meta: 'delta_intron_jaccard_index', 'mean_counts', 'total_counts', 'mean_total_counts'] -class PhenotypePrioritization(ModelWithGUID, BulkOperationBase): +class PhenotypePrioritization(ModelWithGUID): PARENT_FIELD = 'individual' individual = models.ForeignKey('Individual', on_delete=models.CASCADE, db_index=True) diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index ec6594bd84..c95f707c08 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -7,6 +7,7 @@ import re import requests import urllib3 +import random from django.contrib.postgres.aggregates import ArrayAgg from django.db.models import Max, F, Q @@ -404,8 +405,13 @@ def load_phenotype_prioritization_data(request): if to_delete: PhenotypePrioritization.bulk_delete(request.user, to_delete) - all_records = [record for indiv_records in all_records_by_project_name.values() for record in indiv_records] - PhenotypePrioritization.bulk_create(request.user, [PhenotypePrioritization(**data, created_by=request.user) for data in all_records]) + models_to_create = [] + for indiv_records in all_records_by_project_name.values(): + for record in indiv_records: + model = PhenotypePrioritization(**record) + model.guid = f'PP{random.randint(10 ** 8, 10 ** 9)}_{model.individual.individual_id}_{model.gene_id}_{model.disease_id}'[:PhenotypePrioritization.MAX_GUID_SIZE] + models_to_create.append(model) + PhenotypePrioritization.bulk_create(request.user, models_to_create) for project_name, indiv_records in all_records_by_project_name.items(): project = projects_by_name[project_name][0] diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index 307210a8b8..f3f07d4189 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -3,6 +3,7 @@ from django.urls.base import reverse import json import mock +from mock.mock import ANY from requests import HTTPError import responses @@ -362,28 +363,28 @@ EXPECTED_LIRICAL_DATA = [ {'diseaseId': 'OMIM:219801', 'geneId': 'ENSG00000268904', 'diseaseName': 'Cystinosis, no syndrome', - 'scores': {'compositeLR': 0.003, 'post_test_probability': 0.1}, - 'tool': 'lirical', 'rank': 11, 'individualGuid': 'I000001_na19675'}, # record from the fixture + 'scores': {'compositeLR': 0.003, 'post_test_probability': 0.1}, 'createdBy': None, + 'tool': 'lirical', 'rank': 11, 'individualGuid': 'I000001_na19675', 'createdDate': ANY}, # record from the fixture {'diseaseId': 'OMIM:618460', 'geneId': 'ENSG00000105357', 'diseaseName': 'Khan-Khan-Katsanis syndrome', - 'scores': {'compositeLR': 0.066, 'postTestProbability': 0.0}, - 'tool': 'lirical', 'rank': 1, 'individualGuid': 'I000002_na19678'}, + 'scores': {'compositeLR': 0.066, 'postTestProbability': 0.0}, 'createdBy': 'Test Data Manager', + 'tool': 'lirical', 'rank': 1, 'individualGuid': 'I000002_na19678', 'createdDate': ANY}, {'diseaseId': 'OMIM:219800', 'geneId': 'ENSG00000105357', 'diseaseName': 'Cystinosis, nephropathic', - 'scores': {'postTestProbability': 0.0}, - 'tool': 'lirical', 'rank': 2, 'individualGuid': 'I000015_na20885'} + 'scores': {'postTestProbability': 0.0}, 'createdBy': 'Test Data Manager', + 'tool': 'lirical', 'rank': 2, 'individualGuid': 'I000015_na20885', 'createdDate': ANY}, ] EXPECTED_UPDATED_LIRICAL_DATA = [ {'diseaseId': 'OMIM:219801', 'geneId': 'ENSG00000268904', 'diseaseName': 'Cystinosis, no syndrome', - 'scores': {'compositeLR': 0.003, 'post_test_probability': 0.1}, - 'tool': 'lirical', 'rank': 11, 'individualGuid': 'I000001_na19675'}, # record from the fixture + 'scores': {'compositeLR': 0.003, 'post_test_probability': 0.1}, 'createdBy': None, + 'tool': 'lirical', 'rank': 11, 'individualGuid': 'I000001_na19675', 'createdDate': ANY}, # record from the fixture {'diseaseId': 'OMIM:219800', 'geneId': 'ENSG00000105357', 'diseaseName': 'Cystinosis, nephropathic', - 'scores': {'postTestProbability': 0.0}, - 'tool': 'lirical', 'rank': 2, 'individualGuid': 'I000015_na20885'}, + 'scores': {'postTestProbability': 0.0}, 'createdBy': 'Test Data Manager', + 'tool': 'lirical', 'rank': 2, 'individualGuid': 'I000015_na20885', 'createdDate': ANY}, {'diseaseId': 'OMIM:618460', 'geneId': 'ENSG00000105357', 'diseaseName': 'Khan-Khan-Katsanis syndrome', - 'scores': {'compositeLR': 0.066, 'postTestProbability': 0.0}, - 'tool': 'lirical', 'rank': 3, 'individualGuid': 'I000002_na19678'}, + 'scores': {'compositeLR': 0.066, 'postTestProbability': 0.0}, 'createdBy': 'Test Data Manager', + 'tool': 'lirical', 'rank': 3, 'individualGuid': 'I000002_na19678', 'createdDate': ANY}, {'diseaseId': 'OMIM:219800', 'geneId': 'ENSG00000105357', 'diseaseName': 'Cystinosis, nephropathic', - 'scores': {'compositeLR': 0.003, 'postTestProbability': 0.0}, - 'tool': 'lirical', 'rank': 4, 'individualGuid': 'I000002_na19678'}, + 'scores': {'compositeLR': 0.003, 'postTestProbability': 0.0}, 'createdBy': 'Test Data Manager', + 'tool': 'lirical', 'rank': 4, 'individualGuid': 'I000002_na19678', 'createdDate': ANY}, ] PEDIGREE_HEADER = ['Project_GUID', 'Family_GUID', 'Family_ID', 'Individual_ID', 'Paternal_ID', 'Maternal_ID', 'Sex'] @@ -1172,13 +1173,13 @@ def test_load_phenotype_prioritization_data(self, mock_subprocess): ] self.assertEqual(response.json()['info'], info) self._has_expected_file_loading_logs('gs://seqr_data/lirical_data.tsv.gz', user=self.data_manager_user, additional_logs=[ - ('delete PhenotypePrioritizations', {'dbUpdate': { - 'dbEntity': 'PhenotypePrioritization', 'numEntities': 1, 'updateType': 'bulk_delete', - 'parentEntityIds': ['I000002_na19678'], + ('delete 1 PhenotypePrioritizations', {'dbUpdate': { + 'dbEntity': 'PhenotypePrioritization', 'updateType': 'bulk_delete', + 'entityIds': ['PP000003_NA19678_ENSG000002689'], }}), - ('create PhenotypePrioritizations', {'dbUpdate': { - 'dbEntity': 'PhenotypePrioritization', 'numEntities': 2, 'updateType': 'bulk_create', - 'parentEntityIds': ['I000002_na19678', 'I000015_na20885'], + ('create 2 PhenotypePrioritizations', {'dbUpdate': { + 'dbEntity': 'PhenotypePrioritization', 'updateType': 'bulk_create', + "entityIds": [ANY, ANY], }}), ]) saved_data = _get_json_for_models(PhenotypePrioritization.objects.filter(tool='lirical').order_by('id'), @@ -1197,13 +1198,13 @@ def test_load_phenotype_prioritization_data(self, mock_subprocess): ] self.assertEqual(response.json()['info'], info) self._has_expected_file_loading_logs('gs://seqr_data/lirical_data.tsv.gz', user=self.data_manager_user, additional_logs=[ - ('delete PhenotypePrioritizations', {'dbUpdate': { - 'dbEntity': 'PhenotypePrioritization', 'numEntities': 1, 'updateType': 'bulk_delete', - 'parentEntityIds': ['I000002_na19678'], + ('delete 1 PhenotypePrioritizations', {'dbUpdate': { + 'dbEntity': 'PhenotypePrioritization', 'updateType': 'bulk_delete', + 'entityIds': [ANY], }}), - ('create PhenotypePrioritizations', {'dbUpdate': { - 'dbEntity': 'PhenotypePrioritization', 'numEntities': 2, 'updateType': 'bulk_create', - 'parentEntityIds': ['I000002_na19678'], + ('create 2 PhenotypePrioritizations', {'dbUpdate': { + 'dbEntity': 'PhenotypePrioritization', 'updateType': 'bulk_create', + 'entityIds': [ANY, ANY], }}), ]) saved_data = _get_json_for_models(PhenotypePrioritization.objects.filter(tool='lirical'), diff --git a/seqr/views/apis/project_api_tests.py b/seqr/views/apis/project_api_tests.py index 16068f5bd8..375c241681 100644 --- a/seqr/views/apis/project_api_tests.py +++ b/seqr/views/apis/project_api_tests.py @@ -367,7 +367,7 @@ def test_project_families(self): family_3 = response_json['familiesByGuid']['F000003_3'] family_fields = { 'individualGuids', 'discoveryTags', 'caseReviewStatuses', 'caseReviewStatusLastModified', 'hasRequiredMetadata', - 'parents', + 'parents', 'hasPhenotypePrioritization', } family_fields.update(FAMILY_FIELDS) self.assertSetEqual(set(family_1.keys()), family_fields) @@ -382,6 +382,8 @@ def test_project_families(self): self.assertFalse(family_3['hasRequiredMetadata']) self.assertListEqual(family_1['parents'], [{'maternalGuid': 'I000003_na19679', 'paternalGuid': 'I000002_na19678'}]) self.assertListEqual(family_3['parents'], []) + self.assertEqual(family_1['hasPhenotypePrioritization'], True) + self.assertFalse(family_3['hasPhenotypePrioritization'], False) self.assertListEqual(family_3['discoveryTags'], []) self.assertSetEqual({tag['variantGuid'] for tag in family_1['discoveryTags']}, {'SV0000001_2103343353_r0390_100'}) diff --git a/seqr/views/utils/test_utils.py b/seqr/views/utils/test_utils.py index 9ce88a67a7..1f03d8e234 100644 --- a/seqr/views/utils/test_utils.py +++ b/seqr/views/utils/test_utils.py @@ -758,7 +758,7 @@ def _get_list_param(call, param): INDIVIDUAL_FIELDS = { 'projectGuid', 'familyGuid', 'paternalId', 'maternalId', 'paternalGuid', 'maternalGuid', - 'features', 'absentFeatures', 'nonstandardFeatures', 'absentNonstandardFeatures', + 'features', 'absentFeatures', 'nonstandardFeatures', 'absentNonstandardFeatures', 'phenotypePrioritizationTools', } INDIVIDUAL_FIELDS.update(INDIVIDUAL_CORE_FIELDS) From f687a72ac1b3ba8371a47003135cef2d6a0f5dd1 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 3 May 2024 14:28:49 -0400 Subject: [PATCH 071/736] notification tests --- seqr/fixtures/1kg_project.json | 2 -- seqr/views/apis/data_manager_api_tests.py | 35 +++++++++++++++++++++-- seqr/views/apis/dataset_api_tests.py | 2 +- seqr/views/apis/family_api_tests.py | 17 +++++------ 4 files changed, 43 insertions(+), 13 deletions(-) diff --git a/seqr/fixtures/1kg_project.json b/seqr/fixtures/1kg_project.json index 7da1608c01..0b959bca28 100644 --- a/seqr/fixtures/1kg_project.json +++ b/seqr/fixtures/1kg_project.json @@ -1041,7 +1041,6 @@ "created_date": "2017-02-05T06:42:55.397Z", "created_by": null, "last_modified_date": "2017-03-13T09:07:50.052Z", - "sample_id": "NA20872", "sample_type": "WES", "is_active": false, @@ -1078,7 +1077,6 @@ "created_date": "2017-02-05T06:42:55.397Z", "created_by": null, "last_modified_date": "2017-03-13T09:07:50.111Z", - "sample_id": "NA20875", "sample_type": "WES", "is_active": false, diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index f3f07d4189..4203b2d538 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -16,7 +16,7 @@ from seqr.models import Individual, RnaSeqOutlier, RnaSeqTpm, RnaSeqSpliceOutlier, Sample, Project, PhenotypePrioritization from settings import SEQR_SLACK_LOADING_NOTIFICATION_CHANNEL - +SEQR_URL = 'https://seqr.broadinstitute.org/' PROJECT_GUID = 'R0001_1kg' ES_CAT_ALLOCATION=[{ @@ -1109,8 +1109,10 @@ def test_load_rna_seq_sample_data(self, mock_open, mock_os): def _join_data(cls, data): return ['\t'.join(line).encode('utf-8') for line in data] + @mock.patch('seqr.views.apis.data_manager_api.BASE_URL', SEQR_URL) + @mock.patch('seqr.utils.communication_utils.send_html_email') @mock.patch('seqr.utils.file_utils.subprocess.Popen') - def test_load_phenotype_prioritization_data(self, mock_subprocess): + def test_load_phenotype_prioritization_data(self, mock_subprocess, mock_send_email): url = reverse(load_phenotype_prioritization_data) self.check_data_manager_login(url) @@ -1186,9 +1188,15 @@ def test_load_phenotype_prioritization_data(self, mock_subprocess): nested_fields=[{'fields': ('individual', 'guid'), 'key': 'individualGuid'}]) self.assertListEqual(saved_data, EXPECTED_LIRICAL_DATA) mock_subprocess.assert_called_with('gsutil cat gs://seqr_data/lirical_data.tsv.gz | gunzip -c -q - ', stdout=-1, stderr=-2, shell=True) + self._assert_expected_notifications(mock_send_email, [ + {'tool': 'lirical', 'num_samples': 1, 'user': self.data_manager_user}, + {'tool': 'lirical', 'num_samples': 1, 'user': self.data_manager_user, + 'project_guid': 'R0003_test', 'project_name': 'Test Reprocessed Project'} + ]) # Test uploading new data self.reset_logs() + mock_send_email.reset_mock() mock_subprocess.return_value.stdout = self._join_data(PHENOTYPE_PRIORITIZATION_HEADER + UPDATE_LIRICAL_DATA) response = self.client.post(url, content_type='application/json', data=json.dumps(request_body)) self.assertEqual(response.status_code, 200) @@ -1210,6 +1218,29 @@ def test_load_phenotype_prioritization_data(self, mock_subprocess): saved_data = _get_json_for_models(PhenotypePrioritization.objects.filter(tool='lirical'), nested_fields=[{'fields': ('individual', 'guid'), 'key': 'individualGuid'}]) self.assertListEqual(saved_data, EXPECTED_UPDATED_LIRICAL_DATA) + self._assert_expected_notifications(mock_send_email, [ + {'tool': 'lirical', 'num_samples': 2, 'user': self.data_manager_user}, + ]) + + @staticmethod + def _assert_expected_notifications(mock_send_email, notification_info: list[dict]): + calls = [] + for notif_dict in notification_info: + project_guid = notif_dict.get('project_guid', PROJECT_GUID) + project_name = notif_dict.get('project_name', '1kg project nåme with uniçøde') + url = f'{SEQR_URL}project/{project_guid}/project_page' + project_link = f'{project_name}' + email = ( + f'This is to notify you that {notif_dict["tool"].title()} data for {notif_dict["num_samples"]} samples ' + f'has been loaded in seqr project {project_link} by {notif_dict["user"].get_full_name()}' + ) + calls.append(mock.call( + email_body=f'Dear seqr user,\n\n{email}\n\nAll the best,\nThe seqr team', + subject=f'New {notif_dict["tool"].title()} data available in seqr', + to=['test_user_manager@test.com'], process_message=ANY, + )) + + mock_send_email.assert_has_calls(calls) @staticmethod def _ls_subprocess_calls(file, is_error=True): diff --git a/seqr/views/apis/dataset_api_tests.py b/seqr/views/apis/dataset_api_tests.py index 3b26ffc9c2..dafbe371e3 100644 --- a/seqr/views/apis/dataset_api_tests.py +++ b/seqr/views/apis/dataset_api_tests.py @@ -304,7 +304,7 @@ def _assert_expected_notification(self, mock_send_email, mock_send_slack, sample if not email_content: email_content = f'This is to notify you that {count} new {sample_type} samples have been loaded in seqr project {project_name}' mock_send_email.assert_called_once_with( - f'Dear seqr user,\n\n{email_content}\n\nAll the best,\nThe seqr team', + email_body=f'Dear seqr user,\n\n{email_content}\n\nAll the best,\nThe seqr team', subject='New data available in seqr', to=[recipient], process_message=mock.ANY, ) slack_message = f'{count} new {sample_type} samples are loaded in {SEQR_URL}/project/{project_guid}/project_page' diff --git a/seqr/views/apis/family_api_tests.py b/seqr/views/apis/family_api_tests.py index ec218fb625..439a59f20e 100644 --- a/seqr/views/apis/family_api_tests.py +++ b/seqr/views/apis/family_api_tests.py @@ -72,10 +72,6 @@ def test_family_page_data(self): individual_fields = {'sampleGuids', 'igvSampleGuids', 'mmeSubmissionGuid', 'hasRnaOutlierData'} individual_fields.update(INDIVIDUAL_FIELDS) self.assertSetEqual(set(individual.keys()), individual_fields) - self.assertListEqual( - [True, True, False], - [response_json['individualsByGuid'][guid].get('hasPhenotypeGeneScores', False) for guid in INDIVIDUAL_GUIDS] - ) self.assertListEqual( [True, False, True], [response_json['individualsByGuid'][guid].get('hasRnaOutlierData', False) for guid in INDIVIDUAL_GUIDS] @@ -577,10 +573,14 @@ def test_get_family_phenotype_gene_scores(self): 'exomiser': [ {'diseaseId': 'OMIM:219800', 'diseaseName': 'Cystinosis, nephropathic', 'rank': 2, 'scores': {'exomiser_score': 0.969347946, 'phenotype_score': 0.443567539, - 'variant_score': 0.999200702}}, + 'variant_score': 0.999200702}, + 'createdDate': '2024-05-02T06:42:55.397Z', 'createdBy': None + }, {'diseaseId': 'OMIM:618460', 'diseaseName': 'Khan-Khan-Katsanis syndrome', 'rank': 1, 'scores': {'exomiser_score': 0.977923765, 'phenotype_score': 0.603998205, - 'variant_score': 1}} + 'variant_score': 1}, + 'createdDate': '2024-05-02T06:42:55.397Z', 'createdBy': None + }, ] } }, @@ -588,8 +588,9 @@ def test_get_family_phenotype_gene_scores(self): 'ENSG00000268903': { 'lirical': [ {'diseaseId': 'OMIM:219800', 'diseaseName': 'Cystinosis, nephropathic', 'rank': 1, - 'scores': {'compositeLR': 0.003, 'post_test_probability': 0} - } + 'scores': {'compositeLR': 0.003, 'post_test_probability': 0}, + 'createdDate': '2024-05-02T06:42:55.397Z', 'createdBy': None, + } ] } } From 40cbaf2d09f745caed928fb18c2e7b148ada20f0 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 3 May 2024 14:52:18 -0400 Subject: [PATCH 072/736] codacy medium fixes --- hail_search/__main__.py | 1 - hail_search/queries/base.py | 7 +++--- hail_search/queries/ont_snv_indel.py | 2 +- matchmaker/views/external_api_tests.py | 2 +- matchmaker/views/matchmaker_api.py | 2 +- matchmaker/views/matchmaker_api_tests.py | 2 +- panelapp/pa_locus_list_api_tests.py | 4 +--- panelapp/panelapp_utils.py | 2 +- .../management/commands/update_omim.py | 2 +- .../commands/utils/download_utils.py | 4 ++-- .../commands/reload_saved_variant_json.py | 1 - seqr/utils/file_utils_tests.py | 8 +++---- seqr/utils/search/hail_search_utils_tests.py | 4 ++-- seqr/views/apis/anvil_workspace_api_tests.py | 24 +++++++++---------- seqr/views/apis/data_manager_api_tests.py | 12 +++++----- seqr/views/apis/igv_api.py | 7 +++--- seqr/views/apis/igv_api_tests.py | 8 +++---- seqr/views/apis/project_api_tests.py | 4 ++-- seqr/views/apis/report_api.py | 2 +- seqr/views/apis/report_api_tests.py | 5 ++-- seqr/views/utils/airflow_utils.py | 1 - seqr/views/utils/anvil_metadata_utils.py | 3 +-- seqr/views/utils/terra_api_utils_tests.py | 2 +- seqr/views/utils/test_utils.py | 6 ++--- settings.py | 1 - 25 files changed, 54 insertions(+), 62 deletions(-) diff --git a/hail_search/__main__.py b/hail_search/__main__.py index 19dc916fba..bef783c48a 100644 --- a/hail_search/__main__.py +++ b/hail_search/__main__.py @@ -1,5 +1,4 @@ from aiohttp import web -import hail as hl import logging from hail_search.web_app import init_web_app diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 2b16458f15..0b7dffc171 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -4,11 +4,10 @@ import logging import os -from hail_search.constants import AFFECTED, AFFECTED_ID, ALT_ALT, ANNOTATION_OVERRIDE_FIELDS, ANY_AFFECTED, COMP_HET_ALT, \ +from hail_search.constants import AFFECTED_ID, ALT_ALT, ANNOTATION_OVERRIDE_FIELDS, ANY_AFFECTED, COMP_HET_ALT, \ COMPOUND_HET, GENOME_VERSION_GRCh38, GROUPED_VARIANTS_FIELD, ALLOWED_TRANSCRIPTS, ALLOWED_SECONDARY_TRANSCRIPTS, HAS_ANNOTATION_OVERRIDE, \ - HAS_ALT, HAS_REF,INHERITANCE_FILTERS, PATH_FREQ_OVERRIDE_CUTOFF, MALE, RECESSIVE, REF_ALT, REF_REF, UNAFFECTED, \ - UNAFFECTED_ID, X_LINKED_RECESSIVE, XPOS, OMIM_SORT, UNKNOWN_AFFECTED, UNKNOWN_AFFECTED_ID, FAMILY_GUID_FIELD, GENOTYPES_FIELD, \ - AFFECTED_ID_MAP + HAS_ALT, HAS_REF,INHERITANCE_FILTERS, PATH_FREQ_OVERRIDE_CUTOFF, MALE, RECESSIVE, REF_ALT, REF_REF, \ + UNAFFECTED_ID, X_LINKED_RECESSIVE, XPOS, OMIM_SORT, FAMILY_GUID_FIELD, GENOTYPES_FIELD, AFFECTED_ID_MAP DATASETS_DIR = os.environ.get('DATASETS_DIR', '/hail_datasets') SSD_DATASETS_DIR = os.environ.get('SSD_DATASETS_DIR', DATASETS_DIR) diff --git a/hail_search/queries/ont_snv_indel.py b/hail_search/queries/ont_snv_indel.py index 36f28f425c..dc99ad8e18 100644 --- a/hail_search/queries/ont_snv_indel.py +++ b/hail_search/queries/ont_snv_indel.py @@ -1,6 +1,6 @@ from aiohttp.web import HTTPBadRequest -from hail_search.queries.base import BaseHailTableQuery, PredictionPath +from hail_search.queries.base import BaseHailTableQuery from hail_search.queries.snv_indel import SnvIndelHailTableQuery diff --git a/matchmaker/views/external_api_tests.py b/matchmaker/views/external_api_tests.py index 491cb4458f..c1cf7c198f 100644 --- a/matchmaker/views/external_api_tests.py +++ b/matchmaker/views/external_api_tests.py @@ -6,7 +6,7 @@ from matchmaker.models import MatchmakerIncomingQuery -TEST_ACCESS_TOKEN = 'erjhtg3558324u82' +TEST_ACCESS_TOKEN = 'erjhtg3558324u82' # nosec TEST_MME_NODES = {TEST_ACCESS_TOKEN: {'name': 'Test Node'}} diff --git a/matchmaker/views/matchmaker_api.py b/matchmaker/views/matchmaker_api.py index 10f276da3b..526c199e8b 100644 --- a/matchmaker/views/matchmaker_api.py +++ b/matchmaker/views/matchmaker_api.py @@ -187,7 +187,7 @@ def _search_external_matches(node, patient_data, user): 'Content-Language': 'en-US', } try: - external_result = requests.post(url=node['url'], headers=headers, data=json.dumps(body)) + external_result = requests.post(url=node['url'], headers=headers, data=json.dumps(body), timeout=300) if external_result.status_code != 200: try: message = external_result.json().get('message') diff --git a/matchmaker/views/matchmaker_api_tests.py b/matchmaker/views/matchmaker_api_tests.py index 149d0cde77..eb60b73893 100644 --- a/matchmaker/views/matchmaker_api_tests.py +++ b/matchmaker/views/matchmaker_api_tests.py @@ -153,7 +153,7 @@ MISMATCHED_GENE_NEW_MATCH_JSON['patient']['genomicFeatures'][0]['gene']['id'] = 'ENSG00000227232' MISMATCHED_GENE_NEW_MATCH_JSON['patient']['id'] = '987' -MOCK_SLACK_TOKEN = 'xoxp-123' +MOCK_SLACK_TOKEN = 'xoxp-123' # nosec MOCK_NODES_BY_NAME = { 'Node A': {'name': 'Node A', 'token': 'abc', 'url': 'http://node_a.com/match'}, diff --git a/panelapp/pa_locus_list_api_tests.py b/panelapp/pa_locus_list_api_tests.py index 8b46eddbed..9c1acd2555 100644 --- a/panelapp/pa_locus_list_api_tests.py +++ b/panelapp/pa_locus_list_api_tests.py @@ -4,9 +4,7 @@ from django.core.management import call_command, CommandError from django.urls.base import reverse -from seqr.models import LocusList -from seqr.views.apis.locus_list_api import locus_lists, locus_list_info, add_project_locus_lists, \ - delete_project_locus_lists +from seqr.views.apis.locus_list_api import locus_lists, locus_list_info from seqr.views.apis.locus_list_api_tests import BaseLocusListAPITest from seqr.views.utils.test_utils import AuthenticationTestCase, LOCUS_LIST_FIELDS diff --git a/panelapp/panelapp_utils.py b/panelapp/panelapp_utils.py index 3ec5ca8504..d9540e777e 100644 --- a/panelapp/panelapp_utils.py +++ b/panelapp/panelapp_utils.py @@ -99,7 +99,7 @@ def _create_pa_locus_list_gene(seqr_locus_list_gene, panel_gene_json): def _get_all_panels(panels_url, all_results): - resp = requests.get(panels_url) + resp = requests.get(panels_url, timeout=REQUEST_TIMEOUT_S) resp_json = resp.json() curr_page_results = [r for r in resp_json.get('results', []) if r.get('stats', {}).get('number_of_genes', 0) > 0] all_results += curr_page_results diff --git a/reference_data/management/commands/update_omim.py b/reference_data/management/commands/update_omim.py index b8adbe47c2..ffb46e1bc3 100644 --- a/reference_data/management/commands/update_omim.py +++ b/reference_data/management/commands/update_omim.py @@ -146,7 +146,7 @@ def _cache_records(models): command = 'gsutil mv {filename} gs://{bucket}'.format(filename=CACHED_RECORDS_FILENAME, bucket=CACHED_RECORDS_BUCKET) logger.info(command) - os.system(command) + os.system(command) # nosec class Command(GeneCommand): diff --git a/reference_data/management/commands/utils/download_utils.py b/reference_data/management/commands/utils/download_utils.py index 18e3b8c073..7a4bfa3ad1 100644 --- a/reference_data/management/commands/utils/download_utils.py +++ b/reference_data/management/commands/utils/download_utils.py @@ -24,7 +24,7 @@ def download_file(url, to_dir=tempfile.gettempdir(), verbose=True): return local_file_path is_gz = url.endswith(".gz") - response = requests.get(url, stream=is_gz) + response = requests.get(url, stream=is_gz, timeout=300) input_iter = response if is_gz else response.iter_content() if verbose: logger.info("Downloading {} to {}".format(url, local_file_path)) @@ -40,7 +40,7 @@ def download_file(url, to_dir=tempfile.gettempdir(), verbose=True): def _get_remote_file_size(url): if url.startswith("http"): - response = requests.head(url) + response = requests.head(url, timeout=10) return int(response.headers.get('Content-Length', '0')) else: return 0 # file size not yet implemented for FTP and other protocols diff --git a/seqr/management/commands/reload_saved_variant_json.py b/seqr/management/commands/reload_saved_variant_json.py index ccb8ff82d3..2e83305eaa 100644 --- a/seqr/management/commands/reload_saved_variant_json.py +++ b/seqr/management/commands/reload_saved_variant_json.py @@ -1,7 +1,6 @@ import logging from django.core.management.base import BaseCommand from django.db.models.query_utils import Q -from tqdm import tqdm from seqr.models import Project from seqr.views.utils.variant_utils import update_projects_saved_variant_json diff --git a/seqr/utils/file_utils_tests.py b/seqr/utils/file_utils_tests.py index d4d7e9028e..32a7bbcb91 100644 --- a/seqr/utils/file_utils_tests.py +++ b/seqr/utils/file_utils_tests.py @@ -19,7 +19,7 @@ def test_mv_file_to_gs(self, mock_logger, mock_subproc): with self.assertRaises(Exception) as ee: mv_file_to_gs('/temp_path', 'gs://bucket/target_path', user=None) self.assertEqual(str(ee.exception), 'Run command failed: -bash: gsutil: command not found. Please check the path.') - mock_subproc.Popen.assert_called_with('gsutil mv /temp_path gs://bucket/target_path', stdout=mock_subproc.PIPE, stderr=mock_subproc.STDOUT, shell=True) + mock_subproc.Popen.assert_called_with('gsutil mv /temp_path gs://bucket/target_path', stdout=mock_subproc.PIPE, stderr=mock_subproc.STDOUT, shell=True) # nosec mock_logger.info.assert_called_with('==> gsutil mv /temp_path gs://bucket/target_path', None) process.wait.assert_called_with() @@ -27,7 +27,7 @@ def test_mv_file_to_gs(self, mock_logger, mock_subproc): mock_logger.reset_mock() process.wait.return_value = 0 mv_file_to_gs('/temp_path', 'gs://bucket/target_path', user=None) - mock_subproc.Popen.assert_called_with('gsutil mv /temp_path gs://bucket/target_path', stdout=mock_subproc.PIPE, stderr=mock_subproc.STDOUT, shell=True) + mock_subproc.Popen.assert_called_with('gsutil mv /temp_path gs://bucket/target_path', stdout=mock_subproc.PIPE, stderr=mock_subproc.STDOUT, shell=True) # nosec mock_logger.info.assert_called_with('==> gsutil mv /temp_path gs://bucket/target_path', None) process.wait.assert_called_with() @@ -44,7 +44,7 @@ def test_get_gs_file_list(self, mock_logger, mock_subproc): get_gs_file_list('gs://bucket/target_path/', user=None) self.assertEqual(str(ee.exception), 'Run command failed: -bash: gsutil: command not found. Please check the path.') mock_subproc.Popen.assert_called_with('gsutil ls gs://bucket/target_path', stdout=mock_subproc.PIPE, - stderr=mock_subproc.PIPE, shell=True) + stderr=mock_subproc.PIPE, shell=True) # nosec mock_logger.info.assert_called_with('==> gsutil ls gs://bucket/target_path', None) process.communicate.assert_called_with() @@ -55,7 +55,7 @@ def test_get_gs_file_list(self, mock_logger, mock_subproc): b'gs://bucket/target_path/data.vcf.gz\n', b'' file_list = get_gs_file_list('gs://bucket/target_path', user=None) mock_subproc.Popen.assert_called_with('gsutil ls gs://bucket/target_path/**', stdout=mock_subproc.PIPE, - stderr=mock_subproc.PIPE, shell=True) + stderr=mock_subproc.PIPE, shell=True) # nosec mock_logger.info.assert_called_with('==> gsutil ls gs://bucket/target_path/**', None) process.communicate.assert_called_with() self.assertEqual(file_list, ['gs://bucket/target_path/id_file.txt', 'gs://bucket/target_path/data.vcf.gz']) diff --git a/seqr/utils/search/hail_search_utils_tests.py b/seqr/utils/search/hail_search_utils_tests.py index b7eaa6994f..8f0cb092cb 100644 --- a/seqr/utils/search/hail_search_utils_tests.py +++ b/seqr/utils/search/hail_search_utils_tests.py @@ -10,10 +10,10 @@ get_variants_for_variant_ids, variant_lookup, sv_variant_lookup, InvalidSearchException from seqr.utils.search.search_utils_tests import SearchTestHelper from hail_search.test_utils import get_hail_search_body, EXPECTED_SAMPLE_DATA, FAMILY_1_SAMPLE_DATA, \ - FAMILY_2_ALL_SAMPLE_DATA, ALL_AFFECTED_SAMPLE_DATA, CUSTOM_AFFECTED_SAMPLE_DATA, HAIL_BACKEND_VARIANTS, \ + ALL_AFFECTED_SAMPLE_DATA, CUSTOM_AFFECTED_SAMPLE_DATA, HAIL_BACKEND_VARIANTS, \ LOCATION_SEARCH, EXCLUDE_LOCATION_SEARCH, VARIANT_ID_SEARCH, RSID_SEARCH, GENE_COUNTS, FAMILY_2_VARIANT_SAMPLE_DATA, \ FAMILY_2_MITO_SAMPLE_DATA, EXPECTED_SAMPLE_DATA_WITH_SEX, VARIANT_LOOKUP_VARIANT, MULTI_PROJECT_SAMPLE_DATA, \ - GCNV_VARIANT4, SV_VARIANT2, SV_VARIANT4 + GCNV_VARIANT4, SV_VARIANT2 MOCK_HOST = 'http://test-hail-host' SV_WGS_SAMPLE_DATA = [{ diff --git a/seqr/views/apis/anvil_workspace_api_tests.py b/seqr/views/apis/anvil_workspace_api_tests.py index 91e13aa0f3..f24cc485d1 100644 --- a/seqr/views/apis/anvil_workspace_api_tests.py +++ b/seqr/views/apis/anvil_workspace_api_tests.py @@ -300,7 +300,7 @@ def test_validate_anvil_vcf(self, mock_subprocess, mock_file_logger, mock_utils_ response = self.client.post(url, content_type='application/json', data=json.dumps(REQUEST_BODY_GZ_DATA_PATH)) self.assertEqual(response.status_code, 400) self.assertListEqual(response.json()['errors'], ['Data file or path /test_path.vcf.gz is not found.']) - mock_subprocess.assert_called_with('gsutil ls gs://test_bucket/test_path.vcf.gz', stdout=-1, stderr=-2, shell=True) + mock_subprocess.assert_called_with('gsutil ls gs://test_bucket/test_path.vcf.gz', stdout=-1, stderr=-2, shell=True) # nosec mock_file_logger.info.assert_has_calls([ mock.call('==> gsutil ls gs://test_bucket/test_path.vcf.gz', self.manager_user), mock.call('File not found', self.manager_user), @@ -312,7 +312,7 @@ def test_validate_anvil_vcf(self, mock_subprocess, mock_file_logger, mock_utils_ response = self.client.post(url, content_type='application/json', data=json.dumps(REQUEST_BODY_SHARDED_DATA_PATH)) self.assertEqual(response.status_code, 400) self.assertListEqual(response.json()['errors'], ['Data file or path /test_path-*.vcf.gz is not found.']) - mock_subprocess.assert_called_with('gsutil ls gs://test_bucket/test_path-*.vcf.gz', stdout=-1, stderr=-1, shell=True) + mock_subprocess.assert_called_with('gsutil ls gs://test_bucket/test_path-*.vcf.gz', stdout=-1, stderr=-1, shell=True) # nosec mock_file_logger.info.assert_has_calls([ mock.call('==> gsutil ls gs://test_bucket/test_path-*.vcf.gz', self.manager_user), mock.call('File not found', self.manager_user), @@ -324,7 +324,7 @@ def test_validate_anvil_vcf(self, mock_subprocess, mock_file_logger, mock_utils_ response = self.client.post(url, content_type='application/json', data=json.dumps(REQUEST_BODY_SHARDED_DATA_PATH)) self.assertEqual(response.status_code, 400) self.assertListEqual(response.json()['errors'], ['Data file or path /test_path-*.vcf.gz is not found.']) - mock_subprocess.assert_called_with('gsutil ls gs://test_bucket/test_path-*.vcf.gz', stdout=-1, stderr=-1, shell=True) + mock_subprocess.assert_called_with('gsutil ls gs://test_bucket/test_path-*.vcf.gz', stdout=-1, stderr=-1, shell=True) # nosec mock_file_logger.info.assert_has_calls([ mock.call('==> gsutil ls gs://test_bucket/test_path-*.vcf.gz', self.manager_user), ]) @@ -342,10 +342,10 @@ def test_validate_anvil_vcf(self, mock_subprocess, mock_file_logger, mock_utils_ self.assertEqual(response.status_code, 400) self.assertListEqual(response.json()['errors'], ['No header found in the VCF file.']) mock_subprocess.assert_has_calls([ - mock.call('gsutil ls gs://test_bucket/test_path.vcf.gz', stdout=-1, stderr=-2, shell=True), + mock.call('gsutil ls gs://test_bucket/test_path.vcf.gz', stdout=-1, stderr=-2, shell=True), # nosec mock.call().wait(), mock.call('gsutil cat -r 0-65536 gs://test_bucket/test_path.vcf.gz | gunzip -c -q - ', - stdout=-1, stderr=-2, shell=True), + stdout=-1, stderr=-2, shell=True), # nosec ]) mock_file_logger.info.assert_has_calls([ mock.call('==> gsutil ls gs://test_bucket/test_path.vcf.gz', self.manager_user), @@ -384,9 +384,9 @@ def test_validate_anvil_vcf(self, mock_subprocess, mock_file_logger, mock_utils_ self.assertEqual(response.status_code, 200) self.assertDictEqual(response.json(), VALIDATE_VFC_RESPONSE) mock_subprocess.assert_has_calls([ - mock.call('gsutil ls gs://test_bucket/test_path.vcf', stdout=-1, stderr=-2, shell=True), + mock.call('gsutil ls gs://test_bucket/test_path.vcf', stdout=-1, stderr=-2, shell=True), # nosec mock.call().wait(), - mock.call('gsutil cat gs://test_bucket/test_path.vcf', stdout=-1, stderr=-2, shell=True), + mock.call('gsutil cat gs://test_bucket/test_path.vcf', stdout=-1, stderr=-2, shell=True), # nosec ]) mock_file_logger.info.assert_has_calls([ mock.call('==> gsutil ls gs://test_bucket/test_path.vcf', self.manager_user), @@ -404,8 +404,8 @@ def test_validate_anvil_vcf(self, mock_subprocess, mock_file_logger, mock_utils_ self.assertEqual(response.status_code, 200) self.assertEqual(response.json(), {'fullDataPath': 'gs://test_bucket/test_path-*.vcf.gz', 'vcfSamples': ['HG00735', 'NA19675_1', 'NA19678']}) mock_subprocess.assert_has_calls([ - mock.call('gsutil ls gs://test_bucket/test_path-*.vcf.gz', stdout=-1, stderr=-1, shell=True), - mock.call('gsutil cat -r 0-65536 gs://test_bucket/test_path-001.vcf.gz | gunzip -c -q - ', stdout=-1, stderr=-2, shell=True), + mock.call('gsutil ls gs://test_bucket/test_path-*.vcf.gz', stdout=-1, stderr=-1, shell=True), # nosec + mock.call('gsutil cat -r 0-65536 gs://test_bucket/test_path-001.vcf.gz | gunzip -c -q - ', stdout=-1, stderr=-2, shell=True), # nosec ]) mock_file_logger.info.assert_has_calls([ mock.call('==> gsutil ls gs://test_bucket/test_path-*.vcf.gz', self.manager_user), @@ -435,7 +435,7 @@ def test_get_anvil_vcf_list(self, mock_subprocess, mock_file_logger, mock_utils_ response = self.client.get(url, content_type='application/json') self.assertEqual(response.status_code, 200) self.assertDictEqual(response.json(), {'dataPathList': []}) - mock_subprocess.assert_called_with('gsutil ls gs://test_bucket', stdout=-1, stderr=-1, shell=True) + mock_subprocess.assert_called_with('gsutil ls gs://test_bucket', stdout=-1, stderr=-1, shell=True) # nosec mock_file_logger.info.assert_called_with('==> gsutil ls gs://test_bucket', self.manager_user) # Test a valid operation @@ -456,9 +456,9 @@ def test_get_anvil_vcf_list(self, mock_subprocess, mock_file_logger, mock_utils_ self.assertDictEqual(response.json(), {'dataPathList': ['/test.vcf', '/data/test.vcf.gz', '/data/test-101.vcf.gz', '/data/test-102.vcf.gz', '/sharded/test-*.vcf.gz']}) mock_subprocess.assert_has_calls([ - mock.call('gsutil ls gs://test_bucket', stdout=-1, stderr=-1, shell=True), + mock.call('gsutil ls gs://test_bucket', stdout=-1, stderr=-1, shell=True), # nosec mock.call().communicate(), - mock.call('gsutil ls gs://test_bucket/**', stdout=-1, stderr=-1, shell=True), + mock.call('gsutil ls gs://test_bucket/**', stdout=-1, stderr=-1, shell=True), # nosec mock.call().communicate(), ]) mock_file_logger.info.assert_has_calls([ diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index bf0b39608f..4b7ddce463 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -1044,7 +1044,7 @@ def _test_basic_data_loading(data, num_parsed_samples, num_loaded_samples, new_s self.assertSetEqual(set(response_json['sampleGuids']), {sample_guid, new_sample_guid}) # test correct file interactions - mock_subprocess.assert_called_with(f'gsutil cat {RNA_FILE_ID} | gunzip -c -q - ', stdout=-1, stderr=-2, shell=True) + mock_subprocess.assert_called_with(f'gsutil cat {RNA_FILE_ID} | gunzip -c -q - ', stdout=-1, stderr=-2, shell=True) # nosec filename = RNA_FILENAME_TEMPLATE.format(data_type) + f'__{new_sample_guid}.json.gz' expected_files = { f'{RNA_FILENAME_TEMPLATE.format(data_type)}__{new_sample_guid if sample_guid == PLACEHOLDER_GUID else sample_guid}.json.gz': data @@ -1160,7 +1160,7 @@ def test_load_phenotype_prioritization_data(self, mock_subprocess): response = self.client.post(url, content_type='application/json', data=json.dumps(request_body)) self.assertEqual(response.status_code, 400) self.assertEqual(response.json()['error'], 'File not found: gs://seqr_data/lirical_data.tsv.gz') - mock_subprocess.assert_called_with('gsutil ls gs://seqr_data/lirical_data.tsv.gz', stdout=-1, stderr=-2, shell=True) + mock_subprocess.assert_called_with('gsutil ls gs://seqr_data/lirical_data.tsv.gz', stdout=-1, stderr=-2, shell=True) # nosec mock_subprocess.reset_mock() mock_subprocess.return_value.wait.return_value = 0 @@ -1168,14 +1168,14 @@ def test_load_phenotype_prioritization_data(self, mock_subprocess): response = self.client.post(url, content_type='application/json', data=json.dumps(request_body)) self.assertEqual(response.status_code, 400) self.assertEqual(response.json()['error'], 'Invalid file: missing column(s) project, diseaseId') - mock_subprocess.assert_called_with('gsutil cat gs://seqr_data/lirical_data.tsv.gz | gunzip -c -q - ', stdout=-1, stderr=-2, shell=True) + mock_subprocess.assert_called_with('gsutil cat gs://seqr_data/lirical_data.tsv.gz | gunzip -c -q - ', stdout=-1, stderr=-2, shell=True) # nosec mock_subprocess.reset_mock() mock_subprocess.return_value.stdout = self._join_data(PHENOTYPE_PRIORITIZATION_HEADER + LIRICAL_NO_PROJECT_DATA) response = self.client.post(url, content_type='application/json', data=json.dumps(request_body)) self.assertEqual(response.status_code, 400) self.assertEqual(response.json()['error'], 'Both sample ID and project fields are required.') - mock_subprocess.assert_called_with('gsutil cat gs://seqr_data/lirical_data.tsv.gz | gunzip -c -q - ', stdout=-1, stderr=-2, shell=True) + mock_subprocess.assert_called_with('gsutil cat gs://seqr_data/lirical_data.tsv.gz | gunzip -c -q - ', stdout=-1, stderr=-2, shell=True) # nosec mock_subprocess.return_value.stdout = self._join_data(PHENOTYPE_PRIORITIZATION_HEADER + LIRICAL_DATA + EXOMISER_DATA) response = self.client.post(url, content_type='application/json', data=json.dumps(request_body)) @@ -1226,7 +1226,7 @@ def test_load_phenotype_prioritization_data(self, mock_subprocess): saved_data = _get_json_for_models(PhenotypePrioritization.objects.filter(tool='lirical').order_by('id'), nested_fields=[{'fields': ('individual', 'guid'), 'key': 'individualGuid'}]) self.assertListEqual(saved_data, EXPECTED_LIRICAL_DATA) - mock_subprocess.assert_called_with('gsutil cat gs://seqr_data/lirical_data.tsv.gz | gunzip -c -q - ', stdout=-1, stderr=-2, shell=True) + mock_subprocess.assert_called_with('gsutil cat gs://seqr_data/lirical_data.tsv.gz | gunzip -c -q - ', stdout=-1, stderr=-2, shell=True) # nosec # Test uploading new data self.reset_logs() @@ -1255,7 +1255,7 @@ def test_load_phenotype_prioritization_data(self, mock_subprocess): @staticmethod def _ls_subprocess_calls(file, is_error=True): calls = [ - mock.call(f'gsutil ls {file}',stdout=-1, stderr=-2, shell=True), + mock.call(f'gsutil ls {file}',stdout=-1, stderr=-2, shell=True), # nosec mock.call().wait(), ] if is_error: diff --git a/seqr/views/apis/igv_api.py b/seqr/views/apis/igv_api.py index 58fc92c1f0..235af48cf9 100644 --- a/seqr/views/apis/igv_api.py +++ b/seqr/views/apis/igv_api.py @@ -21,6 +21,7 @@ 's3': 'https://s3.amazonaws.com', 'gs': GS_STORAGE_URL, } +TIMEOUT = 300 def _process_alignment_records(rows, num_id_cols=1, **kwargs): @@ -192,7 +193,7 @@ def _stream_gs(request, gs_path): response = requests.get( f"{GS_STORAGE_URL}/{gs_path.replace('gs://', '', 1)}", headers=headers, - stream=True) + stream=True, timeout=TIMEOUT) return StreamingHttpResponse(response.iter_content(chunk_size=65536), status=response.status_code, content_type='application/octet-stream') @@ -212,7 +213,7 @@ def _get_gs_rest_api_headers(range_header, gs_path, user=None): def _get_token_expiry(token): response = requests.post('https://www.googleapis.com/oauth2/v1/tokeninfo', headers={'Content-Type': 'application/x-www-form-urlencoded'}, - data='access_token={}'.format(token)) + data='access_token={}'.format(token), timeout=30) if response.status_code == 200: result = json.loads(response.text) return result['expires_in'] @@ -259,7 +260,7 @@ def igv_genomes_proxy(request, cloud_host, file_path): if range_header: headers['Range'] = range_header - genome_response = requests.get(f'{CLOUD_STORAGE_URLS[cloud_host]}/{file_path}', headers=headers) + genome_response = requests.get(f'{CLOUD_STORAGE_URLS[cloud_host]}/{file_path}', headers=headers, timeout=TIMEOUT) proxy_response = HttpResponse( content=genome_response.content, status=genome_response.status_code, diff --git a/seqr/views/apis/igv_api_tests.py b/seqr/views/apis/igv_api_tests.py index 52d441611c..4de3bd6df5 100644 --- a/seqr/views/apis/igv_api_tests.py +++ b/seqr/views/apis/igv_api_tests.py @@ -50,8 +50,8 @@ def test_proxy_google_to_igv(self, mock_set_redis, mock_get_redis, mock_subproce mock_get_redis.assert_called_with(GS_STORAGE_ACCESS_CACHE_KEY) mock_set_redis.assert_called_with(GS_STORAGE_ACCESS_CACHE_KEY, 'token1', expire=3594) mock_subprocess.assert_has_calls([ - mock.call('gsutil -u anvil-datastorage ls gs://fc-secure-project_A/sample_1.bam.bai', stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True), - mock.call('gcloud auth print-access-token', stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True), + mock.call('gsutil -u anvil-datastorage ls gs://fc-secure-project_A/sample_1.bam.bai', stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True), # nosec + mock.call('gcloud auth print-access-token', stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True), # nosec ]) mock_ls_subprocess.wait.assert_called_once() mock_access_token_subprocess.wait.assert_called_once() @@ -88,7 +88,7 @@ def test_proxy_local_to_igv(self, mock_open, mock_subprocess): self.assertListEqual([val for val in response.streaming_content], STREAMING_READS_CONTENT) mock_subprocess.assert_called_with( 'dd skip=100 count=151 bs=1 if=/project_A/sample_1.bai status="none"', - stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True) + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True) # nosec mock_open.assert_not_called() # test no byte range @@ -256,7 +256,7 @@ def test_add_alignment_sample(self, mock_local_file_exists, mock_subprocess): set(response_json['individualsByGuid']['I000001_na19675']['igvSampleGuids']), {'S000145_na19675', sample_guid} ) - mock_subprocess.assert_called_with('gsutil ls gs://readviz/batch_10.dcr.bed.gz', stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True) + mock_subprocess.assert_called_with('gsutil ls gs://readviz/batch_10.dcr.bed.gz', stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True) # nosec response = self.client.post(url, content_type='application/json', data=json.dumps({ 'filePath': 'gs://readviz/batch_10.junctions.bed.gz', 'sampleId': 'NA19675', diff --git a/seqr/views/apis/project_api_tests.py b/seqr/views/apis/project_api_tests.py index 16068f5bd8..e321bcdea5 100644 --- a/seqr/views/apis/project_api_tests.py +++ b/seqr/views/apis/project_api_tests.py @@ -687,8 +687,8 @@ class AnvilProjectAPITest(AnvilAuthenticationTestCase, ProjectAPITest): PROJECT_COLLABORATOR_GROUPS = None HAS_EMPTY_PROJECT = False - def test_create_and_delete_project(self): - super(AnvilProjectAPITest, self).test_create_and_delete_project() + def test_create_and_delete_project(self, *args, **kwargs): + super(AnvilProjectAPITest, self).test_create_and_delete_project(*args, **kwargs) self.mock_list_workspaces.assert_not_called() self.mock_get_ws_acl.assert_not_called() self.mock_get_group_members.assert_not_called() diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index e80cab6477..ae464cf6d9 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -666,7 +666,7 @@ def _populate_gregor_files(file_data): def _load_data_model_validators(): - response = requests.get(GREGOR_DATA_MODEL_URL) + response = requests.get(GREGOR_DATA_MODEL_URL, timeout=10) response.raise_for_status() # remove commented out lines from json response_json = json.loads(re.sub('\\n\s*//.*\\n', '', response.text)) diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index ebbc87d20b..2320228029 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -1,5 +1,4 @@ from django.urls.base import reverse -from django.utils.dateparse import parse_datetime import json import mock import responses @@ -826,9 +825,9 @@ def test_gregor_export(self, mock_subprocess, mock_temp_dir, mock_open, mock_dat # test gsutil commands mock_subprocess.assert_has_calls([ - mock.call('gsutil ls gs://anvil-upload', stdout=-1, stderr=-2, shell=True), + mock.call('gsutil ls gs://anvil-upload', stdout=-1, stderr=-2, shell=True), # nosec mock.call().wait(), - mock.call('gsutil mv /mock/tmp/* gs://anvil-upload', stdout=-1, stderr=-2, shell=True), + mock.call('gsutil mv /mock/tmp/* gs://anvil-upload', stdout=-1, stderr=-2, shell=True), # nosec mock.call().wait(), ]) diff --git a/seqr/views/utils/airflow_utils.py b/seqr/views/utils/airflow_utils.py index e24e835d29..1e7c0c11b3 100644 --- a/seqr/views/utils/airflow_utils.py +++ b/seqr/views/utils/airflow_utils.py @@ -5,7 +5,6 @@ from google.auth.transport.requests import AuthorizedSession import itertools import json -import requests from reference_data.models import GENOME_VERSION_GRCh38, GENOME_VERSION_LOOKUP from seqr.models import Individual, Sample, Project diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index cb194ffd03..b88762aaed 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -1,6 +1,6 @@ from collections import defaultdict from datetime import datetime -from django.db.models import F, Q, Value, CharField, Case, When +from django.db.models import F, Q, Value, CharField from django.db.models.functions import Replace from django.contrib.auth.models import User from django.contrib.postgres.aggregates import ArrayAgg @@ -12,7 +12,6 @@ from seqr.models import Project, Family, Individual, Sample, SavedVariant, VariantTagType from seqr.views.utils.airtable_utils import get_airtable_samples from seqr.utils.gene_utils import get_genes -from seqr.utils.middleware import ErrorsWarningsException from seqr.utils.search.utils import get_search_samples from seqr.utils.xpos_utils import get_chrom_pos from seqr.views.utils.variant_utils import DISCOVERY_CATEGORY diff --git a/seqr/views/utils/terra_api_utils_tests.py b/seqr/views/utils/terra_api_utils_tests.py index b6a5a67e80..56d4b1099d 100644 --- a/seqr/views/utils/terra_api_utils_tests.py +++ b/seqr/views/utils/terra_api_utils_tests.py @@ -288,7 +288,7 @@ def test_get_anvil_group_members(self, mock_redis, mock_datetime, mock_credentia # test with service account credentials mock_datetime.now.return_value = datetime(2021, 1, 1) mock_credentials.expiry = datetime(2021, 1, 2) - mock_credentials.token = 'ya29.SA_EXAMPLE' + mock_credentials.token = 'ya29.SA_EXAMPLE' # nosec get_anvil_group_members(self.analyst_user, USERS_GROUP, use_sa_credentials=True) self.assertEqual(responses.calls[1].request.headers['Authorization'], 'Bearer ya29.SA_EXAMPLE') mock_credentials.refresh.assert_not_called() diff --git a/seqr/views/utils/test_utils.py b/seqr/views/utils/test_utils.py index 9ce88a67a7..d34e9fb1e1 100644 --- a/seqr/views/utils/test_utils.py +++ b/seqr/views/utils/test_utils.py @@ -1494,7 +1494,7 @@ def _get_list_param(call, param): }, }) -GOOGLE_API_TOKEN_URL = 'https://oauth2.googleapis.com/token' -GOOGLE_ACCESS_TOKEN_URL = 'https://accounts.google.com/o/oauth2/token' +GOOGLE_API_TOKEN_URL = 'https://oauth2.googleapis.com/token' # nosec +GOOGLE_ACCESS_TOKEN_URL = 'https://accounts.google.com/o/oauth2/token' # nosec -GOOGLE_TOKEN_RESULT = '{"access_token":"ya29.c.EXAMPLE","expires_in":3599,"token_type":"Bearer"}' +GOOGLE_TOKEN_RESULT = '{"access_token":"ya29.c.EXAMPLE","expires_in":3599,"token_type":"Bearer"}' # nosec diff --git a/settings.py b/settings.py index a24598df77..b061521b87 100644 --- a/settings.py +++ b/settings.py @@ -1,7 +1,6 @@ import json import os import random -import re import string import subprocess # nosec From 00ec8a1176b1c0cfc0ec9a20d3458c6548a0a61f Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 3 May 2024 15:07:58 -0400 Subject: [PATCH 073/736] move tools list to family api --- seqr/views/apis/family_api.py | 16 ++++++++++++++++ seqr/views/apis/family_api_tests.py | 11 ++++++++++- seqr/views/utils/orm_to_json_utils.py | 8 -------- seqr/views/utils/test_utils.py | 2 +- 4 files changed, 27 insertions(+), 10 deletions(-) diff --git a/seqr/views/apis/family_api.py b/seqr/views/apis/family_api.py index 62e67e2eae..4cf77137a3 100644 --- a/seqr/views/apis/family_api.py +++ b/seqr/views/apis/family_api.py @@ -80,10 +80,26 @@ def family_page_data(request, family_guid): for individual_guid in outlier_individual_guids: response['individualsByGuid'][individual_guid]['hasRnaOutlierData'] = True + phentoype_prioritization_tools = ( + PhenotypePrioritization.objects.filter(individual__family=family).values( + 'individual__guid', + 'individual__phenotypeprioritization__tool', + 'individual__phenotypeprioritization__created_date' + ).distinct() + ) + pp_tools_by_indiv = defaultdict(list) + for pp in phentoype_prioritization_tools: + individual_guid = pp.get('individual__guid') + pp_tools_by_indiv[individual_guid].append({ + 'tool': pp.get('individual__phenotypeprioritization__tool'), + 'createdDate': pp.get('individual__phenotypeprioritization__created_date'), + }) + submissions = get_json_for_matchmaker_submissions(MatchmakerSubmission.objects.filter(individual__family=family)) individual_mme_submission_guids = {s['individualGuid']: s['submissionGuid'] for s in submissions} for individual in response['individualsByGuid'].values(): individual['mmeSubmissionGuid'] = individual_mme_submission_guids.get(individual['individualGuid']) + individual['phenotypePrioritizationTools'] = pp_tools_by_indiv.get(individual['individualGuid'], []) response['mmeSubmissionsByGuid'] = {s['submissionGuid']: s for s in submissions} return create_json_response(response) diff --git a/seqr/views/apis/family_api_tests.py b/seqr/views/apis/family_api_tests.py index 439a59f20e..64c8a03f47 100644 --- a/seqr/views/apis/family_api_tests.py +++ b/seqr/views/apis/family_api_tests.py @@ -69,9 +69,18 @@ def test_family_page_data(self): self.assertEqual(len(response_json['individualsByGuid']), 3) individual = response_json['individualsByGuid'][INDIVIDUAL_GUID] - individual_fields = {'sampleGuids', 'igvSampleGuids', 'mmeSubmissionGuid', 'hasRnaOutlierData'} + individual_fields = {'sampleGuids', 'igvSampleGuids', 'mmeSubmissionGuid', 'hasRnaOutlierData', + 'phenotypePrioritizationTools'} individual_fields.update(INDIVIDUAL_FIELDS) self.assertSetEqual(set(individual.keys()), individual_fields) + self.assertListEqual([[ + {'createdDate': '2024-05-02T06:42:55.397Z', 'tool': 'exomiser'}, + {'createdDate': '2024-05-02T06:42:55.397Z', 'tool': 'lirical'} + ], [ + {'createdDate': '2024-05-02T06:42:55.397Z', 'tool': 'lirical'} + ], []], + [response_json['individualsByGuid'][guid].get('phenotypePrioritizationTools') for guid in INDIVIDUAL_GUIDS] + ) self.assertListEqual( [True, False, True], [response_json['individualsByGuid'][guid].get('hasRnaOutlierData', False) for guid in INDIVIDUAL_GUIDS] diff --git a/seqr/views/utils/orm_to_json_utils.py b/seqr/views/utils/orm_to_json_utils.py index f5113879a4..fd6d277917 100644 --- a/seqr/views/utils/orm_to_json_utils.py +++ b/seqr/views/utils/orm_to_json_utils.py @@ -296,14 +296,6 @@ def _get_json_for_individuals(individuals, user=None, project_guid=None, family_ 'maternalId': F('mother__individual_id'), 'paternalId': F('father__individual_id'), 'displayName': INDIVIDUAL_DISPLAY_NAME_EXPR, - 'phenotypePrioritizationTools': ArrayAgg( - JSONObject( - tool='phenotypeprioritization__tool', - createdDate='phenotypeprioritization__created_date', - ), - distinct=True, - filter=Q(phenotypeprioritization__tool__isnull=False) - ) } if add_sample_guids_field: additional_values.update({ diff --git a/seqr/views/utils/test_utils.py b/seqr/views/utils/test_utils.py index 1f03d8e234..9ce88a67a7 100644 --- a/seqr/views/utils/test_utils.py +++ b/seqr/views/utils/test_utils.py @@ -758,7 +758,7 @@ def _get_list_param(call, param): INDIVIDUAL_FIELDS = { 'projectGuid', 'familyGuid', 'paternalId', 'maternalId', 'paternalGuid', 'maternalGuid', - 'features', 'absentFeatures', 'nonstandardFeatures', 'absentNonstandardFeatures', 'phenotypePrioritizationTools', + 'features', 'absentFeatures', 'nonstandardFeatures', 'absentNonstandardFeatures', } INDIVIDUAL_FIELDS.update(INDIVIDUAL_CORE_FIELDS) From 8634ff985ce7766a419499d365cec56deaec818e Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 3 May 2024 15:13:35 -0400 Subject: [PATCH 074/736] simplify complex function --- seqr/views/apis/report_api.py | 95 ++++++++++++++++++++--------------- 1 file changed, 55 insertions(+), 40 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index ae464cf6d9..6155c4d1d1 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -355,19 +355,7 @@ def gregor_export(request): consent_code=consent_code[0], projectcategory__name=GREGOR_CATEGORY, ) - sample_types = Sample.objects.filter(individual__family__project__in=projects).values_list('individual_id', 'sample_type') - individual_data_types = defaultdict(set) - for individual_db_id, sample_type in sample_types: - individual_data_types[individual_db_id].add(sample_type) - individuals = Individual.objects.filter(id__in=individual_data_types).prefetch_related( - 'family__project', 'mother', 'father') - - grouped_data_type_individuals = defaultdict(dict) - family_individuals = defaultdict(dict) - for i in individuals: - participant_id = _format_gregor_id(i.individual_id) - grouped_data_type_individuals[participant_id].update({data_type: i for data_type in individual_data_types[i.id]}) - family_individuals[i.family_id][i.guid] = participant_id + grouped_data_type_individuals = _get_individual_data_types(projects) # If multiple individual records, prefer WGS individual_lookup = { @@ -415,38 +403,17 @@ def _add_row(row, family_id, row_type): experiment_lookup_rows = [] experiment_ids_by_participant = {} for participant in participant_rows: - # phenotype table - base_phenotype_row = {'participant_id': participant['participant_id'], 'presence': 'Present', 'ontology': 'HPO'} - phenotype_rows += [ - dict(**base_phenotype_row, **_get_phenotype_row(feature)) for feature in participant['features'] or [] - ] - base_phenotype_row['presence'] = 'Absent' - phenotype_rows += [ - dict(**base_phenotype_row, **_get_phenotype_row(feature)) for feature in participant['absent_features'] or [] - ] + phenotype_rows += _parse_participant_phenotype_rows(participant) if not participant[PARTICIPANT_ID_FIELD]: continue airtable_metadata = airtable_metadata_by_participant.get(participant[PARTICIPANT_ID_FIELD]) or {} - - has_analyte = False - # airtable data - for data_type in grouped_data_type_individuals[participant['participant_id']]: - if data_type not in airtable_metadata: - continue - is_rna, row = _get_airtable_row(data_type, airtable_metadata) - has_analyte = True - analyte_rows.append({**participant, **row}) - if not is_rna: - experiment_ids_by_participant[participant['participant_id']] = row['experiment_dna_short_read_id'] - (airtable_rna_rows if is_rna else airtable_rows).append(row) - experiment_lookup_rows.append( - {'participant_id': participant['participant_id'], **_get_experiment_lookup_row(is_rna, row)} - ) - - if participant['analyte_id'] and not has_analyte: - analyte_rows.append(participant) + data_types = grouped_data_type_individuals[participant['participant_id']] + _parse_participant_airtable_rows( + participant, airtable_metadata, data_types, experiment_ids_by_participant, + analyte_rows, airtable_rows, airtable_rna_rows, experiment_lookup_rows, + ) # Add experiment IDs for variant in genetic_findings_rows: @@ -478,6 +445,54 @@ def _add_row(row, family_id, row_type): }) +def _get_individual_data_types(projects): + sample_types = Sample.objects.filter(individual__family__project__in=projects).values_list('individual_id', 'sample_type') + individual_data_types = defaultdict(set) + for individual_db_id, sample_type in sample_types: + individual_data_types[individual_db_id].add(sample_type) + individuals = Individual.objects.filter(id__in=individual_data_types).prefetch_related( + 'family__project', 'mother', 'father') + + grouped_data_type_individuals = defaultdict(dict) + for i in individuals: + participant_id = _format_gregor_id(i.individual_id) + grouped_data_type_individuals[participant_id].update( + {data_type: i for data_type in individual_data_types[i.id]}) + return grouped_data_type_individuals + + +def _parse_participant_phenotype_rows(participant): + base_phenotype_row = {'participant_id': participant['participant_id'], 'presence': 'Present', 'ontology': 'HPO'} + present_rows = [ + dict(**base_phenotype_row, **_get_phenotype_row(feature)) for feature in participant['features'] or [] + ] + base_phenotype_row['presence'] = 'Absent' + return present_rows + [ + dict(**base_phenotype_row, **_get_phenotype_row(feature)) for feature in participant['absent_features'] or [] + ] + + +def _parse_participant_airtable_rows(participant, airtable_metadata, data_types, experiment_ids_by_participant, + analyte_rows, airtable_rows, airtable_rna_rows, experiment_lookup_rows): + has_analyte = False + # airtable data + for data_type in data_types: + if data_type not in airtable_metadata: + continue + is_rna, row = _get_airtable_row(data_type, airtable_metadata) + has_analyte = True + analyte_rows.append({**participant, **row}) + if not is_rna: + experiment_ids_by_participant[participant['participant_id']] = row['experiment_dna_short_read_id'] + (airtable_rna_rows if is_rna else airtable_rows).append(row) + experiment_lookup_rows.append( + {'participant_id': participant['participant_id'], **_get_experiment_lookup_row(is_rna, row)} + ) + + if participant['analyte_id'] and not has_analyte: + analyte_rows.append(participant) + + def _get_gregor_airtable_data(participants, user): session = AirtableSession(user) From 6dc11cda1a07ac4953aeb2f16cdfcd417c94c075 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 3 May 2024 15:13:55 -0400 Subject: [PATCH 075/736] add nosec --- generated_files/media/pedigree_images/new_ped_image_123.png | 0 .../media/pedigree_images/new_ped_image_123_DXFoqK3.png | 0 .../media/pedigree_images/new_ped_image_123_H6Es8HD.png | 0 .../media/pedigree_images/new_ped_image_123_TFm2uxY.png | 0 .../media/pedigree_images/new_ped_image_123_b4i0Il0.png | 0 .../media/pedigree_images/new_ped_image_123_t854KDU.png | 0 seqr/views/apis/data_manager_api.py | 2 +- 7 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 generated_files/media/pedigree_images/new_ped_image_123.png create mode 100644 generated_files/media/pedigree_images/new_ped_image_123_DXFoqK3.png create mode 100644 generated_files/media/pedigree_images/new_ped_image_123_H6Es8HD.png create mode 100644 generated_files/media/pedigree_images/new_ped_image_123_TFm2uxY.png create mode 100644 generated_files/media/pedigree_images/new_ped_image_123_b4i0Il0.png create mode 100644 generated_files/media/pedigree_images/new_ped_image_123_t854KDU.png diff --git a/generated_files/media/pedigree_images/new_ped_image_123.png b/generated_files/media/pedigree_images/new_ped_image_123.png new file mode 100644 index 0000000000..e69de29bb2 diff --git a/generated_files/media/pedigree_images/new_ped_image_123_DXFoqK3.png b/generated_files/media/pedigree_images/new_ped_image_123_DXFoqK3.png new file mode 100644 index 0000000000..e69de29bb2 diff --git a/generated_files/media/pedigree_images/new_ped_image_123_H6Es8HD.png b/generated_files/media/pedigree_images/new_ped_image_123_H6Es8HD.png new file mode 100644 index 0000000000..e69de29bb2 diff --git a/generated_files/media/pedigree_images/new_ped_image_123_TFm2uxY.png b/generated_files/media/pedigree_images/new_ped_image_123_TFm2uxY.png new file mode 100644 index 0000000000..e69de29bb2 diff --git a/generated_files/media/pedigree_images/new_ped_image_123_b4i0Il0.png b/generated_files/media/pedigree_images/new_ped_image_123_b4i0Il0.png new file mode 100644 index 0000000000..e69de29bb2 diff --git a/generated_files/media/pedigree_images/new_ped_image_123_t854KDU.png b/generated_files/media/pedigree_images/new_ped_image_123_t854KDU.png new file mode 100644 index 0000000000..e69de29bb2 diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index c95f707c08..d0980e3097 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -409,7 +409,7 @@ def load_phenotype_prioritization_data(request): for indiv_records in all_records_by_project_name.values(): for record in indiv_records: model = PhenotypePrioritization(**record) - model.guid = f'PP{random.randint(10 ** 8, 10 ** 9)}_{model.individual.individual_id}_{model.gene_id}_{model.disease_id}'[:PhenotypePrioritization.MAX_GUID_SIZE] + model.guid = f'PP{random.randint(10 ** 8, 10 ** 9)}_{model.individual.individual_id}_{model.gene_id}_{model.disease_id}'[:PhenotypePrioritization.MAX_GUID_SIZE] # nosec models_to_create.append(model) PhenotypePrioritization.bulk_create(request.user, models_to_create) From 5098013d4d89673d7bc3dd87086f90c1472ae61f Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 3 May 2024 15:14:21 -0400 Subject: [PATCH 076/736] oops --- generated_files/media/pedigree_images/new_ped_image_123.png | 0 .../media/pedigree_images/new_ped_image_123_DXFoqK3.png | 0 .../media/pedigree_images/new_ped_image_123_H6Es8HD.png | 0 .../media/pedigree_images/new_ped_image_123_TFm2uxY.png | 0 .../media/pedigree_images/new_ped_image_123_b4i0Il0.png | 0 .../media/pedigree_images/new_ped_image_123_t854KDU.png | 0 6 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 generated_files/media/pedigree_images/new_ped_image_123.png delete mode 100644 generated_files/media/pedigree_images/new_ped_image_123_DXFoqK3.png delete mode 100644 generated_files/media/pedigree_images/new_ped_image_123_H6Es8HD.png delete mode 100644 generated_files/media/pedigree_images/new_ped_image_123_TFm2uxY.png delete mode 100644 generated_files/media/pedigree_images/new_ped_image_123_b4i0Il0.png delete mode 100644 generated_files/media/pedigree_images/new_ped_image_123_t854KDU.png diff --git a/generated_files/media/pedigree_images/new_ped_image_123.png b/generated_files/media/pedigree_images/new_ped_image_123.png deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/generated_files/media/pedigree_images/new_ped_image_123_DXFoqK3.png b/generated_files/media/pedigree_images/new_ped_image_123_DXFoqK3.png deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/generated_files/media/pedigree_images/new_ped_image_123_H6Es8HD.png b/generated_files/media/pedigree_images/new_ped_image_123_H6Es8HD.png deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/generated_files/media/pedigree_images/new_ped_image_123_TFm2uxY.png b/generated_files/media/pedigree_images/new_ped_image_123_TFm2uxY.png deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/generated_files/media/pedigree_images/new_ped_image_123_b4i0Il0.png b/generated_files/media/pedigree_images/new_ped_image_123_b4i0Il0.png deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/generated_files/media/pedigree_images/new_ped_image_123_t854KDU.png b/generated_files/media/pedigree_images/new_ped_image_123_t854KDU.png deleted file mode 100644 index e69de29bb2..0000000000 From 7eac3682f128d0f484303095c398b12e925c4a41 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 3 May 2024 15:23:59 -0400 Subject: [PATCH 077/736] remove created from json response --- seqr/models.py | 2 +- seqr/views/apis/data_manager_api_tests.py | 28 +++++++++++------------ seqr/views/apis/family_api_tests.py | 12 +++------- 3 files changed, 18 insertions(+), 24 deletions(-) diff --git a/seqr/models.py b/seqr/models.py index 933bf73e44..e9ec7268eb 100644 --- a/seqr/models.py +++ b/seqr/models.py @@ -1212,4 +1212,4 @@ def _compute_guid(self): return 'PP%07d_%s' % (self.id, _slugify(str(self))) class Meta: - json_fields = ['gene_id', 'tool', 'rank', 'disease_id', 'disease_name', 'scores', 'created_date', 'created_by'] + json_fields = ['gene_id', 'tool', 'rank', 'disease_id', 'disease_name', 'scores'] diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index 4203b2d538..8b874a2d01 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -363,28 +363,28 @@ EXPECTED_LIRICAL_DATA = [ {'diseaseId': 'OMIM:219801', 'geneId': 'ENSG00000268904', 'diseaseName': 'Cystinosis, no syndrome', - 'scores': {'compositeLR': 0.003, 'post_test_probability': 0.1}, 'createdBy': None, - 'tool': 'lirical', 'rank': 11, 'individualGuid': 'I000001_na19675', 'createdDate': ANY}, # record from the fixture + 'scores': {'compositeLR': 0.003, 'post_test_probability': 0.1}, + 'tool': 'lirical', 'rank': 11, 'individualGuid': 'I000001_na19675'}, # record from the fixture {'diseaseId': 'OMIM:618460', 'geneId': 'ENSG00000105357', 'diseaseName': 'Khan-Khan-Katsanis syndrome', - 'scores': {'compositeLR': 0.066, 'postTestProbability': 0.0}, 'createdBy': 'Test Data Manager', - 'tool': 'lirical', 'rank': 1, 'individualGuid': 'I000002_na19678', 'createdDate': ANY}, + 'scores': {'compositeLR': 0.066, 'postTestProbability': 0.0}, + 'tool': 'lirical', 'rank': 1, 'individualGuid': 'I000002_na19678'}, {'diseaseId': 'OMIM:219800', 'geneId': 'ENSG00000105357', 'diseaseName': 'Cystinosis, nephropathic', - 'scores': {'postTestProbability': 0.0}, 'createdBy': 'Test Data Manager', - 'tool': 'lirical', 'rank': 2, 'individualGuid': 'I000015_na20885', 'createdDate': ANY}, + 'scores': {'postTestProbability': 0.0}, + 'tool': 'lirical', 'rank': 2, 'individualGuid': 'I000015_na20885'} ] EXPECTED_UPDATED_LIRICAL_DATA = [ {'diseaseId': 'OMIM:219801', 'geneId': 'ENSG00000268904', 'diseaseName': 'Cystinosis, no syndrome', - 'scores': {'compositeLR': 0.003, 'post_test_probability': 0.1}, 'createdBy': None, - 'tool': 'lirical', 'rank': 11, 'individualGuid': 'I000001_na19675', 'createdDate': ANY}, # record from the fixture + 'scores': {'compositeLR': 0.003, 'post_test_probability': 0.1}, + 'tool': 'lirical', 'rank': 11, 'individualGuid': 'I000001_na19675'}, # record from the fixture {'diseaseId': 'OMIM:219800', 'geneId': 'ENSG00000105357', 'diseaseName': 'Cystinosis, nephropathic', - 'scores': {'postTestProbability': 0.0}, 'createdBy': 'Test Data Manager', - 'tool': 'lirical', 'rank': 2, 'individualGuid': 'I000015_na20885', 'createdDate': ANY}, + 'scores': {'postTestProbability': 0.0}, + 'tool': 'lirical', 'rank': 2, 'individualGuid': 'I000015_na20885'}, {'diseaseId': 'OMIM:618460', 'geneId': 'ENSG00000105357', 'diseaseName': 'Khan-Khan-Katsanis syndrome', - 'scores': {'compositeLR': 0.066, 'postTestProbability': 0.0}, 'createdBy': 'Test Data Manager', - 'tool': 'lirical', 'rank': 3, 'individualGuid': 'I000002_na19678', 'createdDate': ANY}, + 'scores': {'compositeLR': 0.066, 'postTestProbability': 0.0}, + 'tool': 'lirical', 'rank': 3, 'individualGuid': 'I000002_na19678'}, {'diseaseId': 'OMIM:219800', 'geneId': 'ENSG00000105357', 'diseaseName': 'Cystinosis, nephropathic', - 'scores': {'compositeLR': 0.003, 'postTestProbability': 0.0}, 'createdBy': 'Test Data Manager', - 'tool': 'lirical', 'rank': 4, 'individualGuid': 'I000002_na19678', 'createdDate': ANY}, + 'scores': {'compositeLR': 0.003, 'postTestProbability': 0.0}, + 'tool': 'lirical', 'rank': 4, 'individualGuid': 'I000002_na19678'} ] PEDIGREE_HEADER = ['Project_GUID', 'Family_GUID', 'Family_ID', 'Individual_ID', 'Paternal_ID', 'Maternal_ID', 'Sex'] diff --git a/seqr/views/apis/family_api_tests.py b/seqr/views/apis/family_api_tests.py index 64c8a03f47..fecccf577b 100644 --- a/seqr/views/apis/family_api_tests.py +++ b/seqr/views/apis/family_api_tests.py @@ -582,14 +582,10 @@ def test_get_family_phenotype_gene_scores(self): 'exomiser': [ {'diseaseId': 'OMIM:219800', 'diseaseName': 'Cystinosis, nephropathic', 'rank': 2, 'scores': {'exomiser_score': 0.969347946, 'phenotype_score': 0.443567539, - 'variant_score': 0.999200702}, - 'createdDate': '2024-05-02T06:42:55.397Z', 'createdBy': None - }, + 'variant_score': 0.999200702}}, {'diseaseId': 'OMIM:618460', 'diseaseName': 'Khan-Khan-Katsanis syndrome', 'rank': 1, 'scores': {'exomiser_score': 0.977923765, 'phenotype_score': 0.603998205, - 'variant_score': 1}, - 'createdDate': '2024-05-02T06:42:55.397Z', 'createdBy': None - }, + 'variant_score': 1}}, ] } }, @@ -597,9 +593,7 @@ def test_get_family_phenotype_gene_scores(self): 'ENSG00000268903': { 'lirical': [ {'diseaseId': 'OMIM:219800', 'diseaseName': 'Cystinosis, nephropathic', 'rank': 1, - 'scores': {'compositeLR': 0.003, 'post_test_probability': 0}, - 'createdDate': '2024-05-02T06:42:55.397Z', 'createdBy': None, - } + 'scores': {'compositeLR': 0.003, 'post_test_probability': 0}} ] } } From ee480427b85089b7d1cd720a6ba03e5ba86b9595 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 3 May 2024 15:25:45 -0400 Subject: [PATCH 078/736] a few more --- seqr/views/apis/data_manager_api_tests.py | 2 +- seqr/views/apis/family_api_tests.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index 8b874a2d01..d789a4c934 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -384,7 +384,7 @@ 'tool': 'lirical', 'rank': 3, 'individualGuid': 'I000002_na19678'}, {'diseaseId': 'OMIM:219800', 'geneId': 'ENSG00000105357', 'diseaseName': 'Cystinosis, nephropathic', 'scores': {'compositeLR': 0.003, 'postTestProbability': 0.0}, - 'tool': 'lirical', 'rank': 4, 'individualGuid': 'I000002_na19678'} + 'tool': 'lirical', 'rank': 4, 'individualGuid': 'I000002_na19678'}, ] PEDIGREE_HEADER = ['Project_GUID', 'Family_GUID', 'Family_ID', 'Individual_ID', 'Paternal_ID', 'Maternal_ID', 'Sex'] diff --git a/seqr/views/apis/family_api_tests.py b/seqr/views/apis/family_api_tests.py index fecccf577b..cf04264c64 100644 --- a/seqr/views/apis/family_api_tests.py +++ b/seqr/views/apis/family_api_tests.py @@ -585,7 +585,7 @@ def test_get_family_phenotype_gene_scores(self): 'variant_score': 0.999200702}}, {'diseaseId': 'OMIM:618460', 'diseaseName': 'Khan-Khan-Katsanis syndrome', 'rank': 1, 'scores': {'exomiser_score': 0.977923765, 'phenotype_score': 0.603998205, - 'variant_score': 1}}, + 'variant_score': 1}} ] } }, @@ -593,7 +593,8 @@ def test_get_family_phenotype_gene_scores(self): 'ENSG00000268903': { 'lirical': [ {'diseaseId': 'OMIM:219800', 'diseaseName': 'Cystinosis, nephropathic', 'rank': 1, - 'scores': {'compositeLR': 0.003, 'post_test_probability': 0}} + 'scores': {'compositeLR': 0.003, 'post_test_probability': 0} + } ] } } From aa0ef6fd2b73ca63e35c1224e30f6a0f4c969c2c Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 3 May 2024 15:29:31 -0400 Subject: [PATCH 079/736] a single space --- seqr/views/apis/family_api_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seqr/views/apis/family_api_tests.py b/seqr/views/apis/family_api_tests.py index cf04264c64..7292d6a43d 100644 --- a/seqr/views/apis/family_api_tests.py +++ b/seqr/views/apis/family_api_tests.py @@ -594,7 +594,7 @@ def test_get_family_phenotype_gene_scores(self): 'lirical': [ {'diseaseId': 'OMIM:219800', 'diseaseName': 'Cystinosis, nephropathic', 'rank': 1, 'scores': {'compositeLR': 0.003, 'post_test_probability': 0} - } + } ] } } From 829a71959a53e74e3920c31081d35d2f2fca6b3a Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 3 May 2024 15:34:29 -0400 Subject: [PATCH 080/736] take out the extras --- ui/pages/Project/components/FamilyTable/IndividualRow.jsx | 4 ++-- ui/pages/Project/components/PhenotypePrioritizedGenes.jsx | 8 +------- ui/pages/Project/constants.js | 5 ----- 3 files changed, 3 insertions(+), 14 deletions(-) diff --git a/ui/pages/Project/components/FamilyTable/IndividualRow.jsx b/ui/pages/Project/components/FamilyTable/IndividualRow.jsx index df6c8a8340..9a9977d7e7 100644 --- a/ui/pages/Project/components/FamilyTable/IndividualRow.jsx +++ b/ui/pages/Project/components/FamilyTable/IndividualRow.jsx @@ -31,7 +31,7 @@ import { getSamplesByGuid, getMmeSubmissionsByGuid } from 'redux/selectors' import { HPO_FORM_FIELDS } from '../HpoTerms' import { CASE_REVIEW_STATUS_MORE_INFO_NEEDED, CASE_REVIEW_STATUS_OPTIONS, CASE_REVIEW_TABLE_NAME, INDIVIDUAL_DETAIL_FIELDS, - ONSET_AGE_OPTIONS, INHERITANCE_MODE_OPTIONS, INHERITANCE_MODE_LOOKUP, AR_FIELDS, PHENOTYPE_PRIORITIZATION_FIELDS, + ONSET_AGE_OPTIONS, INHERITANCE_MODE_OPTIONS, INHERITANCE_MODE_LOOKUP, AR_FIELDS } from '../../constants' import { updateIndividuals } from '../../reducers' import { getCurrentProject, getParentOptionsByIndividual } from '../../selectors' @@ -549,7 +549,7 @@ class IndividualRow extends React.PureComponent { loadedSamples = loadedSamples.filter((sample, i) => sample.isActive || i === 0 || i === loadedSamples.length - 1) const phenotypePrioritizationTools = individual.phenotypePrioritizationTools.map( - pp => ({ sampleType: PHENOTYPE_PRIORITIZATION_FIELDS[pp.tool], loadedDate: pp.createdDate }), + pp => ({ sampleType: pp.tool.charAt(0).toUpperCase() + pp.tool.slice(1), loadedDate: pp.createdDate }), ) const leftContent = ( diff --git a/ui/pages/Project/components/PhenotypePrioritizedGenes.jsx b/ui/pages/Project/components/PhenotypePrioritizedGenes.jsx index 1121577c97..6eeae095a0 100644 --- a/ui/pages/Project/components/PhenotypePrioritizedGenes.jsx +++ b/ui/pages/Project/components/PhenotypePrioritizedGenes.jsx @@ -9,7 +9,6 @@ import { BaseVariantGene } from 'shared/components/panel/variants/VariantGene' import { camelcaseToTitlecase } from 'shared/utils/stringUtils' import { loadPhenotypeGeneScores } from '../reducers' import { getPhenotypeDataLoading, getIndividualPhenotypeGeneScores } from '../selectors' -import { PHENOTYPE_PRIORITIZATION_FIELDS } from '../constants' const PHENOTYPE_GENE_INFO_COLUMNS = [ { @@ -28,12 +27,7 @@ const PHENOTYPE_GENE_INFO_COLUMNS = [ /> ), }, - { - name: 'tool', - width: 1, - content: 'Tool', - format: ({ tool }) => PHENOTYPE_PRIORITIZATION_FIELDS[tool], - }, + { name: 'tool', width: 1, content: 'Tool' }, { name: 'diseaseName', width: 5, diff --git a/ui/pages/Project/constants.js b/ui/pages/Project/constants.js index 1316295cca..f63465019d 100644 --- a/ui/pages/Project/constants.js +++ b/ui/pages/Project/constants.js @@ -504,8 +504,3 @@ export const TAG_FORM_FIELD = { parse: value => (value || []).map(name => ({ name })), validate: value => (value && value.length ? undefined : 'Required'), } - -export const PHENOTYPE_PRIORITIZATION_FIELDS = { - exomiser: 'Exomiser', - lirical: 'LIRICAL', -} From 973d79c76b6d38ee28be88a41a41ff72c4cd2812 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 3 May 2024 15:36:52 -0400 Subject: [PATCH 081/736] lint --- ui/pages/Project/components/FamilyTable/IndividualRow.jsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ui/pages/Project/components/FamilyTable/IndividualRow.jsx b/ui/pages/Project/components/FamilyTable/IndividualRow.jsx index 9a9977d7e7..5e1e828597 100644 --- a/ui/pages/Project/components/FamilyTable/IndividualRow.jsx +++ b/ui/pages/Project/components/FamilyTable/IndividualRow.jsx @@ -31,7 +31,7 @@ import { getSamplesByGuid, getMmeSubmissionsByGuid } from 'redux/selectors' import { HPO_FORM_FIELDS } from '../HpoTerms' import { CASE_REVIEW_STATUS_MORE_INFO_NEEDED, CASE_REVIEW_STATUS_OPTIONS, CASE_REVIEW_TABLE_NAME, INDIVIDUAL_DETAIL_FIELDS, - ONSET_AGE_OPTIONS, INHERITANCE_MODE_OPTIONS, INHERITANCE_MODE_LOOKUP, AR_FIELDS + ONSET_AGE_OPTIONS, INHERITANCE_MODE_OPTIONS, INHERITANCE_MODE_LOOKUP, AR_FIELDS, } from '../../constants' import { updateIndividuals } from '../../reducers' import { getCurrentProject, getParentOptionsByIndividual } from '../../selectors' From e40229cb4f35bfb6bef8771b13044c464686f09f Mon Sep 17 00:00:00 2001 From: snyk-bot Date: Fri, 3 May 2024 20:28:40 +0000 Subject: [PATCH 082/736] fix: requirements.txt to reduce vulnerabilities The following vulnerabilities are fixed by pinning transitive dependencies: - https://snyk.io/vuln/SNYK-PYTHON-TQDM-6807582 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 4cc64b5e1e..8f070eb5fd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -171,7 +171,7 @@ sqlparse==0.5.0 # via django swapper==1.3.0 # via django-notifications-hq -tqdm==4.64.1 +tqdm==4.66.3 # via -r requirements.in urllib3==1.26.18 # via From 04a836ff6eeb59a1390e49116be4e282093b9136 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Mon, 6 May 2024 11:22:02 -0400 Subject: [PATCH 083/736] test notifications --- CHANGELOG.md | 1 + seqr/views/apis/data_manager_api.py | 4 ++-- seqr/views/apis/data_manager_api_tests.py | 7 +++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5164fd92bc..396d28ea63 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## dev * Adds dynamic analysis groups (REQUIRES DB MIGRATION) +* Adds guid and created fields to PhenotypePrioritization model (REQUIRES DB MIGRATION) ## 4/4/24 * Add ability to import project metadata from gregor metadata diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index 5cbfb4eabf..95b7e29df8 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -337,12 +337,12 @@ def _notify_phenotype_prioritization_loaded(project, tool, num_samples, user): url = f'{BASE_URL}project/{project.guid}/project_page' project_link = f'{project.name}' email = ( - f'This is to notify you that {tool.title()} data for {num_samples} samples ' + f'This is to notify you that {tool.title()} data for {num_samples} sample(s) ' f'has been loaded in seqr project {project_link} by {user.get_full_name()}' ) send_project_notification( project, - notification=f'Loaded {num_samples} {tool.title()} samples', + notification=f'Loaded {num_samples} {tool.title()} sample(s)', email=email, subject=f'New {tool.title()} data available in seqr', ) diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index db880889f1..2ebd5e77d0 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -1265,15 +1265,15 @@ def test_load_phenotype_prioritization_data(self, mock_subprocess, mock_send_ema ]) @staticmethod - def _assert_expected_notifications(mock_send_email, notification_info: list[dict]): + def _assert_expected_notifications(mock_send_email, expected_notifs: list[dict]): calls = [] - for notif_dict in notification_info: + for notif_dict in expected_notifs: project_guid = notif_dict.get('project_guid', PROJECT_GUID) project_name = notif_dict.get('project_name', '1kg project nåme with uniçøde') url = f'{SEQR_URL}project/{project_guid}/project_page' project_link = f'{project_name}' email = ( - f'This is to notify you that {notif_dict["tool"].title()} data for {notif_dict["num_samples"]} samples ' + f'This is to notify you that {notif_dict["tool"].title()} data for {notif_dict["num_samples"]} sample(s) ' f'has been loaded in seqr project {project_link} by {notif_dict["user"].get_full_name()}' ) calls.append(mock.call( @@ -1281,7 +1281,6 @@ def _assert_expected_notifications(mock_send_email, notification_info: list[dict subject=f'New {notif_dict["tool"].title()} data available in seqr', to=['test_user_manager@test.com'], process_message=ANY, )) - mock_send_email.assert_has_calls(calls) @staticmethod From 7570f67e4e583e56edbfa7f95995c705801669ba Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Mon, 6 May 2024 13:09:47 -0400 Subject: [PATCH 084/736] Allow optional "dataset_version" in GitHub action. (#4044) * Conditionalize dataset version * Format message correctly --------- Co-authored-by: hanars --- .../hail-search-persistent-volume-snapshot-release.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/hail-search-persistent-volume-snapshot-release.yaml b/.github/workflows/hail-search-persistent-volume-snapshot-release.yaml index 6e4e0bf0d9..a342308546 100644 --- a/.github/workflows/hail-search-persistent-volume-snapshot-release.yaml +++ b/.github/workflows/hail-search-persistent-volume-snapshot-release.yaml @@ -24,7 +24,7 @@ on: - SV required: true version: - required: true + required: false volume_handle: required: true @@ -42,6 +42,7 @@ jobs: fetch-depth: 0 - name: update dataset version in the broad-seqr chart + if: "${{ inputs.version != '' }}" uses: mikefarah/yq@v4.22.1 with: cmd: > @@ -61,4 +62,4 @@ jobs: github_token: ${{ secrets.SEQR_VERSION_UPDATE_TOKEN }} author_email: ${{ github.actor }}@users.noreply.github.com author_name: tgg-automation - message: "Updating ${{ inputs.environment }} ${{ inputs.reference_genome }}/${{ inputs.dataset_type }} dataset version to ${{ inputs.version }} and volume handle to ${{ inputs.volume_handle }} " + message: "Updating ${{ inputs.environment }} ${{ inputs.reference_genome }}/${{ inputs.dataset_type }} ${{ inputs.version != '' && format('{0} {1} {2}', 'dataset version to', inputs.version, 'and') || ''}} volume handle to ${{ inputs.volume_handle }} " From c26f7331d8401a4b2b705442976c7dd6fa8f0de9 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 6 May 2024 14:39:34 -0400 Subject: [PATCH 085/736] only show update transcript button to users with edit permission --- ui/shared/components/panel/variants/Transcripts.jsx | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/ui/shared/components/panel/variants/Transcripts.jsx b/ui/shared/components/panel/variants/Transcripts.jsx index c4374661f7..fc1a0523f7 100644 --- a/ui/shared/components/panel/variants/Transcripts.jsx +++ b/ui/shared/components/panel/variants/Transcripts.jsx @@ -4,7 +4,7 @@ import styled from 'styled-components' import { connect } from 'react-redux' import { Label, Header, Table, Segment } from 'semantic-ui-react' -import { getGenesById, getTranscriptsById } from 'redux/selectors' +import { getGenesById, getTranscriptsById, getFamiliesByGuid, getProjectsByGuid } from 'redux/selectors' import { updateVariantMainTranscript } from 'redux/rootReducer' import { VerticalSpacer } from '../../Spacers' import DispatchRequestButton from '../../buttons/DispatchRequestButton' @@ -42,7 +42,7 @@ const TRANSCRIPT_LABELS = [ }, ] -const Transcripts = React.memo(({ variant, genesById, transcriptsById, updateMainTranscript }) => ( +const Transcripts = React.memo(({ variant, genesById, transcriptsById, updateMainTranscript, project }) => ( variant.transcripts && Object.entries(variant.transcripts).sort((transcriptsA, transcriptsB) => ( Math.min(...transcriptsA[1].map(t => t.transcriptRank)) - Math.min(...transcriptsB[1].map(t => t.transcriptRank)) )).map(([geneId, geneTranscripts]) => ( @@ -79,7 +79,7 @@ const Transcripts = React.memo(({ variant, genesById, transcriptsById, updateMai ) ))} { - variant.variantGuid && ( + variant.variantGuid && project?.canEdit && ( { @@ -142,11 +142,13 @@ Transcripts.propTypes = { genesById: PropTypes.object.isRequired, transcriptsById: PropTypes.object.isRequired, updateMainTranscript: PropTypes.func.isRequired, + project: PropTypes.object, } -const mapStateToProps = state => ({ +const mapStateToProps = (state, ownProps) => ({ genesById: getGenesById(state), transcriptsById: getTranscriptsById(state), + project: getProjectsByGuid(state)[getFamiliesByGuid(state)[ownProps.variant.familyGuids[0]]?.projectGuid], }) const mapDispatchToProps = (dispatch, ownProps) => ({ From f1e97202a12ff5701bffa4c2486839d4304a5c8f Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 6 May 2024 14:45:45 -0400 Subject: [PATCH 086/736] fix unit test --- ui/shared/components/panel/variants/Transcripts.test.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ui/shared/components/panel/variants/Transcripts.test.js b/ui/shared/components/panel/variants/Transcripts.test.js index 234d49f9b8..648bb6ac5e 100644 --- a/ui/shared/components/panel/variants/Transcripts.test.js +++ b/ui/shared/components/panel/variants/Transcripts.test.js @@ -4,12 +4,12 @@ import Adapter from '@wojtekmaj/enzyme-adapter-react-17' import configureStore from 'redux-mock-store' import Transcripts from './Transcripts' -import { STATE1, GENE } from '../fixtures' +import { STATE1, GENE, VARIANT } from '../fixtures' configure({ adapter: new Adapter() }) test('shallow-render without crashing', () => { const store = configureStore()(STATE1) - shallow() + shallow() }) From 8fafc2a1afe44c1991574fd54beaa9c65e2fb8c4 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 6 May 2024 17:23:53 -0400 Subject: [PATCH 087/736] shared logic for comupte_guid --- seqr/models.py | 86 ++++++++++++++++++++++---------------------------- 1 file changed, 37 insertions(+), 49 deletions(-) diff --git a/seqr/models.py b/seqr/models.py index aa89945fd6..5a7706b623 100644 --- a/seqr/models.py +++ b/seqr/models.py @@ -56,6 +56,8 @@ def __new__(cls, name, bases, attrs, **kwargs): class ModelWithGUID(models.Model, metaclass=CustomModelBase): MAX_GUID_SIZE = 30 + GUID_PREFIX = '' + GUID_PRECISION = 7 guid = models.CharField(max_length=MAX_GUID_SIZE, db_index=True, unique=True) @@ -72,13 +74,11 @@ class Meta: internal_json_fields = [] audit_fields = set() - @abstractmethod + def _format_guid(self, id): + return f'{self.GUID_PREFIX}{id:0{self.GUID_PRECISION}d}_{_slugify(str(self))}'[:self.MAX_GUID_SIZE] + def _compute_guid(self): - """Returns a human-readable label (aka. slug) for this object with only alphanumeric - chars, '-' and '_'. This label doesn't need to be globally unique by itself, but should not - be null or blank, and should be globally unique when paired with this object's created-time - in seconds. - """ + return self._format_guid(self.id) def __unicode__(self): return self.guid @@ -112,7 +112,7 @@ def save(self, *args, **kwargs): self.created_date = kwargs.pop('created_date', current_time) super(ModelWithGUID, self).save(*args, **kwargs) - self.guid = self._compute_guid()[:ModelWithGUID.MAX_GUID_SIZE] + self.guid = self._compute_guid() super(ModelWithGUID, self).save() def delete_model(self, user, user_can_delete=False): @@ -208,8 +208,8 @@ class Project(ModelWithGUID): def __unicode__(self): return self.name.strip() - def _compute_guid(self): - return 'R%04d_%s' % (self.id, _slugify(str(self))) + GUID_PREFIX = 'R' + GUID_PRECISION = 4 def save(self, *args, **kwargs): """Override the save method and create user permissions groups + add the created_by user. @@ -271,8 +271,8 @@ class ProjectCategory(ModelWithGUID): def __unicode__(self): return self.name.strip() - def _compute_guid(self): - return 'PC%06d_%s' % (self.id, _slugify(str(self))) + GUID_PREFIX = 'PC' + GUID_PRECISION = 6 class Family(ModelWithGUID): @@ -355,8 +355,8 @@ class Family(ModelWithGUID): def __unicode__(self): return self.family_id.strip() - def _compute_guid(self): - return 'F%06d_%s' % (self.id, _slugify(str(self))) + GUID_PREFIX = 'F' + GUID_PRECISION = 6 class Meta: unique_together = ('project', 'family_id') @@ -386,8 +386,8 @@ class FamilyAnalysedBy(ModelWithGUID): def __unicode__(self): return '{}_{}_{}'.format(self.family.guid, self.created_by, self.data_type) - def _compute_guid(self): - return 'FAB%06d_%s' % (self.id, _slugify(str(self))) + GUID_PREFIX = 'FAB' + GUID_PRECISION = 6 class Meta: json_fields = ['last_modified_date', 'created_by', 'data_type'] @@ -407,8 +407,8 @@ class FamilyNote(ModelWithGUID): def __unicode__(self): return '{}_{}_{}'.format(self.family.family_id, self.note_type, self.note)[:20] - def _compute_guid(self): - return 'FAN{:06d}_{}'.format(self.id, _slugify(str(self))) + GUID_PREFIX = 'FAN' + GUID_PRECISION = 6 class Meta: json_fields = ['guid', 'note', 'note_type', 'last_modified_date', 'created_by'] @@ -632,8 +632,7 @@ class Individual(ModelWithGUID): def __unicode__(self): return self.individual_id.strip() - def _compute_guid(self): - return 'I%07d_%s' % (self.id, _slugify(str(self))) + GUID_PREFIX = 'I' def save(self, *args, **kwargs): if Individual.objects.filter(individual_id=self.individual_id, family__project_id=self.family.project_id).count() > 1: @@ -714,8 +713,8 @@ class Sample(ModelWithGUID): def __unicode__(self): return self.sample_id.strip() - def _compute_guid(self): - return 'S%010d_%s' % (self.id, _slugify(str(self))) + GUID_PREFIX = 'S' + GUID_PRECISION = 10 class Meta: json_fields = [ @@ -747,8 +746,8 @@ class IgvSample(ModelWithGUID): def __unicode__(self): return self.file_path.split('/')[-1].split('.')[0].strip() - def _compute_guid(self): - return 'S%010d_%s' % (self.id, _slugify(str(self))) + GUID_PREFIX = 'S' + GUID_PRECISION = 10 class Meta: unique_together = ('individual', 'sample_type') @@ -774,8 +773,7 @@ def __unicode__(self): chrom, pos = get_chrom_pos(self.xpos) return "%s:%s-%s" % (chrom, pos, self.family.guid) - def _compute_guid(self): - return 'SV%07d_%s' % (self.id, _slugify(str(self))) + GUID_PREFIX = 'SV' class Meta: unique_together = ('xpos', 'xpos_end', 'variant_id', 'family') @@ -810,8 +808,8 @@ class VariantTagType(ModelWithGUID): def __unicode__(self): return self.name.strip() - def _compute_guid(self): - return 'VTT%05d_%s' % (self.id, _slugify(str(self))) + GUID_PREFIX = 'VTT' + GUID_PRECISION = 5 class Meta: unique_together = ('project', 'name', 'color') @@ -831,8 +829,7 @@ def __unicode__(self): saved_variants_ids = "".join(str(saved_variant) for saved_variant in self.saved_variants.all()) return "%s:%s" % (saved_variants_ids, self.variant_tag_type.name) - def _compute_guid(self): - return 'VT%07d_%s' % (self.id, _slugify(str(self))) + GUID_PREFIX = 'VT' class Meta: json_fields = ['guid', 'search_hash', 'metadata', 'last_modified_date', 'created_by'] @@ -850,8 +847,7 @@ def __unicode__(self): saved_variants_ids = "".join(str(saved_variant) for saved_variant in self.saved_variants.all()) return "%s:%s" % (saved_variants_ids, (self.note or "")[:20]) - def _compute_guid(self): - return 'VN%07d_%s' % (self.id, _slugify(str(self))) + GUID_PREFIX = 'VN' class Meta: json_fields = ['guid', 'note', 'submit_to_clinvar', 'last_modified_date', 'created_by'] @@ -944,8 +940,7 @@ def __unicode__(self): saved_variants_ids = "".join(str(saved_variant) for saved_variant in self.saved_variants.all()) return "%s:%s" % (saved_variants_ids, self.functional_data_tag) - def _compute_guid(self): - return 'VFD%07d_%s' % (self.id, _slugify(str(self))) + GUID_PREFIX = 'VFD' class Meta: json_fields = ['guid', 'functional_data_tag', 'metadata', 'last_modified_date', 'created_by'] @@ -958,8 +953,7 @@ class GeneNote(ModelWithGUID): def __unicode__(self): return "%s:%s" % (self.gene_id, (self.note or "")[:20]) - def _compute_guid(self): - return 'GN%07d_%s' % (self.id, _slugify(str(self))) + GUID_PREFIX = 'GN' class Meta: json_fields = ['guid', 'note', 'gene_id', 'last_modified_date', 'created_by'] @@ -977,8 +971,8 @@ class LocusList(ModelWithGUID): def __unicode__(self): return self.name.strip() - def _compute_guid(self): - return 'LL%05d_%s' % (self.id, _slugify(str(self))) + GUID_PREFIX = 'LL' + GUID_PRECISION = 5 class Meta: unique_together = ('name', 'description', 'is_public', 'created_by') @@ -994,8 +988,7 @@ class LocusListGene(ModelWithGUID): def __unicode__(self): return "%s:%s" % (self.locus_list, self.gene_id) - def _compute_guid(self): - return 'LLG%07d_%s' % (self.id, _slugify(str(self))) + GUID_PREFIX = 'LLG' class Meta: unique_together = ('locus_list', 'gene_id') @@ -1012,8 +1005,7 @@ class LocusListInterval(ModelWithGUID): def __unicode__(self): return "%s:%s:%s-%s" % (self.locus_list, self.chrom, self.start, self.end) - def _compute_guid(self): - return 'LLI%07d_%s' % (self.id, _slugify(str(self))) + GUID_PREFIX = 'LLI' class Meta: unique_together = ('locus_list', 'genome_version', 'chrom', 'start', 'end') @@ -1031,8 +1023,7 @@ class AnalysisGroup(ModelWithGUID): def __unicode__(self): return self.name.strip() - def _compute_guid(self): - return 'AG%07d_%s' % (self.id, _slugify(str(self))) + GUID_PREFIX = 'AG' class Meta: unique_together = ('project', 'name') @@ -1048,8 +1039,7 @@ class DynamicAnalysisGroup(ModelWithGUID): def __unicode__(self): return self.name.strip() - def _compute_guid(self): - return 'DAG%07d_%s' % (self.id, _slugify(str(self))) + GUID_PREFIX = 'DAG' class Meta: unique_together = ('project', 'name') @@ -1065,8 +1055,7 @@ class VariantSearch(ModelWithGUID): def __unicode__(self): return self.name or str(self.id) - def _compute_guid(self): - return 'VS%07d_%s' % (self.id, _slugify(self.name or '')) + GUID_PREFIX = 'VS' class Meta: unique_together = ('created_by', 'name') @@ -1082,8 +1071,7 @@ class VariantSearchResults(ModelWithGUID): def __unicode__(self): return self.search_hash - def _compute_guid(self): - return 'VSR%07d_%s' % (self.id, _slugify(str(self))) + GUID_PREFIX = 'VSR' class BulkOperationBase(models.Model): From e6ec2246bb8f67a6a51731448b3ecb8d1d7d9493 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 6 May 2024 17:40:23 -0400 Subject: [PATCH 088/736] add guid computation to bulk create helper --- seqr/models.py | 1 + seqr/views/apis/summary_data_api.py | 2 -- seqr/views/utils/variant_utils.py | 4 +--- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/seqr/models.py b/seqr/models.py index 5a7706b623..8a63ab4ff6 100644 --- a/seqr/models.py +++ b/seqr/models.py @@ -127,6 +127,7 @@ def bulk_create(cls, user, new_models): """Helper bulk create method that logs the creation""" for model in new_models: model.created_by = user + model.guid = model._format_guid(randint(10**(cls.GUID_PRECISION-1), 10**cls.GUID_PRECISION)) # nosec models = cls.objects.bulk_create(new_models) log_model_bulk_update(logger, models, user, 'create') return models diff --git a/seqr/views/apis/summary_data_api.py b/seqr/views/apis/summary_data_api.py index 4af43c1bee..10568c4fe5 100644 --- a/seqr/views/apis/summary_data_api.py +++ b/seqr/views/apis/summary_data_api.py @@ -173,8 +173,6 @@ def bulk_update_family_external_analysis(request): FamilyAnalysedBy(family_id=family_db_id_lookup[family_key], data_type=data_type, last_modified_date=datetime.now()) for family_key in requested_families if family_key in family_db_id_lookup ] - for ab in analysed_by_models: - ab.guid = f'FAB{randint(10**5, 10**6)}_{ab}'[:FamilyAnalysedBy.MAX_GUID_SIZE] # nosec FamilyAnalysedBy.bulk_create(request.user, analysed_by_models) return create_json_response({ diff --git a/seqr/views/utils/variant_utils.py b/seqr/views/utils/variant_utils.py index 9598b92748..86d364a05c 100644 --- a/seqr/views/utils/variant_utils.py +++ b/seqr/views/utils/variant_utils.py @@ -148,9 +148,7 @@ def bulk_create_tagged_variants(family_variant_data, tag_name, get_metadata, use new_variant_models = [] for (family_id, variant_id), variant in new_variant_data.items(): create_json, update_json = parse_saved_variant_json(variant, family_id, variant_id=variant_id) - variant_model = SavedVariant(**create_json, **update_json) - variant_model.guid = f'SV{str(variant_model)}'[:SavedVariant.MAX_GUID_SIZE] - new_variant_models.append(variant_model) + new_variant_models.append(SavedVariant(**create_json, **update_json)) saved_variant_map.update({ (v.family_id, v.variant_id): v for v in SavedVariant.bulk_create(user, new_variant_models) From 405ce1619d694e400cdd26649b675035ef837fe5 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 6 May 2024 17:56:52 -0400 Subject: [PATCH 089/736] remove guid generation from sample model creation --- seqr/views/utils/dataset_utils.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/seqr/views/utils/dataset_utils.py b/seqr/views/utils/dataset_utils.py index 3fa826a840..5f5dd7b421 100644 --- a/seqr/views/utils/dataset_utils.py +++ b/seqr/views/utils/dataset_utils.py @@ -3,7 +3,6 @@ from django.db.models import Count, F, Q from django.utils import timezone from tqdm import tqdm -import random from seqr.models import Sample, Individual, Family, Project, RnaSeqOutlier, RnaSeqTpm, RnaSeqSpliceOutlier from seqr.utils.communication_utils import safe_post_to_slack @@ -60,7 +59,8 @@ def _find_or_create_samples( matched_individual_ids = {sample['individual_id'] for sample in existing_samples.values()} loaded_date = timezone.now() - samples = {**existing_samples} + samples_guids = [sample['guid'] for sample in existing_samples.values()] + individual_ids = {sample['individual_id'] for sample in existing_samples.values()} if len(remaining_sample_keys) > 0: remaining_individuals_dict = _get_individuals_by_key(projects, matched_individual_ids) @@ -88,14 +88,16 @@ def _find_or_create_samples( sample_key: _get_new_sample_args(sample_key, individual) for sample_key, individual in sample_id_to_individual_record.items() } - samples.update(new_sample_args) - _create_samples( + individual_ids.update({sample['individual_id'] for sample in new_sample_args.values()}) + sample_models = _create_samples( new_sample_args.values(), user, loaded_date=loaded_date, **sample_params, ) - return samples, remaining_sample_keys, loaded_date + samples_guids += list(sample_models.values_list('guid', flat=True)) + + return samples_guids, individual_ids, remaining_sample_keys, loaded_date def _create_samples(sample_data, user, loaded_date=timezone.now(), **kwargs): @@ -106,7 +108,7 @@ def _create_samples(sample_data, user, loaded_date=timezone.now(), **kwargs): **created_sample_data, **kwargs, ) for created_sample_data in sorted(sample_data, key=lambda s: s['guid'])] - Sample.bulk_create(user, new_samples) + return Sample.bulk_create(user, new_samples) def _get_matched_samples_by_key(projects, key_fields=None, values=None, **sample_params): @@ -135,7 +137,6 @@ def _get_individual_key(sample_key, sample_id_to_individual_id_mapping): def _get_new_sample_args(sample_key, individual_data, key_fields=None): return { - 'guid': f'S{random.randint(10 ** 9, 10 ** 10)}_{individual_data["individual_id"]}'[:Sample.MAX_GUID_SIZE], # nosec 'individual_id': individual_data['id'], 'sample_id': sample_key[0], **{key_field: sample_key[i+2] for i, key_field in enumerate(key_fields or [])} @@ -193,7 +194,7 @@ def match_and_update_search_samples( projects, sample_project_tuples, sample_type, dataset_type, sample_data, user, expected_families=None, sample_id_to_individual_id_mapping=None, raise_unmatched_error_template='Matches not found for sample ids: {sample_ids}', ): - samples, remaining_sample_keys, loaded_date = _find_or_create_samples( + samples_guids, individual_ids, remaining_sample_keys, loaded_date = _find_or_create_samples( sample_project_tuples=sample_project_tuples, projects=projects, user=user, @@ -206,8 +207,6 @@ def match_and_update_search_samples( sample_data=sample_data, ) - samples_guids = [sample['guid'] for sample in samples.values()] - individual_ids = {sample['individual_id'] for sample in samples.values()} included_families = dict(Family.objects.filter(individual__id__in=individual_ids).values_list('guid', 'analysis_status')) _validate_samples_families(samples_guids, included_families.keys(), sample_type, dataset_type, expected_families=expected_families) @@ -489,7 +488,7 @@ def get_matched_sample(sample_key, unmatched_samples, sample_id_to_individual_id if sample_key not in samples_to_create and sample_key not in unmatched_samples: individual_key = _get_individual_key(sample_key, sample_id_to_individual_id_mapping) if individual_key in individual_data_by_key: - samples_to_create[sample_key] = _get_new_sample_args( + samples_to_create[sample_key] = _get_new_sample_args( # TODO sample_key, individual_data_by_key[individual_key], key_fields=['tissue_type'], ) else: From 5e690f7244f159e8e0667d81903221758298f7f7 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 6 May 2024 18:04:42 -0400 Subject: [PATCH 090/736] fix syntax --- seqr/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seqr/models.py b/seqr/models.py index 8a63ab4ff6..747c08aee1 100644 --- a/seqr/models.py +++ b/seqr/models.py @@ -127,7 +127,7 @@ def bulk_create(cls, user, new_models): """Helper bulk create method that logs the creation""" for model in new_models: model.created_by = user - model.guid = model._format_guid(randint(10**(cls.GUID_PRECISION-1), 10**cls.GUID_PRECISION)) # nosec + model.guid = model._format_guid(random.randint(10**(cls.GUID_PRECISION-1), 10**cls.GUID_PRECISION)) # nosec models = cls.objects.bulk_create(new_models) log_model_bulk_update(logger, models, user, 'create') return models From 63281fc19370a0ec1c3d6e9e075d3f69940c583a Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 6 May 2024 18:45:28 -0400 Subject: [PATCH 091/736] migrate rna smple creation to guid computation --- seqr/management/commands/load_rna_seq.py | 15 ++++---- seqr/views/apis/data_manager_api.py | 24 ++++++++---- seqr/views/utils/dataset_utils.py | 48 +++++++++++++----------- 3 files changed, 52 insertions(+), 35 deletions(-) diff --git a/seqr/management/commands/load_rna_seq.py b/seqr/management/commands/load_rna_seq.py index d592fefdad..c7b126aea9 100644 --- a/seqr/management/commands/load_rna_seq.py +++ b/seqr/management/commands/load_rna_seq.py @@ -29,22 +29,23 @@ def handle(self, *args, **options): config = RNA_DATA_TYPE_CONFIGS[data_type] model_cls = config['model_class'] - sample_data_by_guid = defaultdict(list) + sample_data_by_key = defaultdict(list) - def _save_sample_data(sample_guid, row): - sample_data_by_guid[sample_guid].append(row) + def _save_sample_data(sample_key, row): + sample_data_by_key[sample_key].append(row) - possible_sample_guids, _, _ = load_rna_seq( + possible_sample_guids_to_keys, _, _ = load_rna_seq( data_type, options['input_file'], _save_sample_data, mapping_file=mapping_file, ignore_extra_samples=options['ignore_extra_samples']) sample_models_by_guid = { - s.guid: s for s in Sample.objects.filter(guid__in=sample_data_by_guid) + s.guid: s for s in Sample.objects.filter(guid__in=possible_sample_guids_to_keys) } errors = [] sample_guids = [] - for sample_guid in possible_sample_guids: - data_rows, error = post_process_rna_data(sample_guid, sample_data_by_guid[sample_guid], **config.get('post_process_kwargs', {})) + for sample_guid in possible_sample_guids_to_keys: + sample_key = possible_sample_guids_to_keys[sample_guid] + data_rows, error = post_process_rna_data(sample_guid, sample_data_by_key[sample_key], **config.get('post_process_kwargs', {})) if error: errors.append(error) continue diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index 9a7d9d4962..6cd7cf8163 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -276,27 +276,37 @@ def update_rna_seq(request): sample_files = {} - def _save_sample_data(sample_guid, sample_data): - if sample_guid not in sample_files: - file_name = os.path.join(get_temp_upload_directory(), _get_sample_file_name(file_name_prefix, sample_guid)) - sample_files[sample_guid] = gzip.open(file_name, 'at') - sample_files[sample_guid].write(f'{json.dumps(sample_data)}\n') + def _save_sample_data(sample_key, sample_data): + if sample_key not in sample_files: + file_name = _get_sample_file_path(file_name_prefix, sample_key.join('_')) + sample_files[sample_key] = gzip.open(file_name, 'at') + sample_files[sample_key].write(f'{json.dumps(sample_data)}\n') try: - sample_guids, info, warnings = load_rna_seq( + sample_guids_to_keys, info, warnings = load_rna_seq( data_type, file_path, _save_sample_data, user=request.user, mapping_file=mapping_file, ignore_extra_samples=request_json.get('ignoreExtraSamples')) except ValueError as e: return create_json_response({'error': str(e)}, status=400) + for sample_guid, sample_key in sample_guids_to_keys.items(): + os.rename( + _get_sample_file_path(file_name_prefix, sample_key.join('_')), + _get_sample_file_path(file_name_prefix, sample_guid), + ) + return create_json_response({ 'info': info, 'warnings': warnings, 'fileName': file_name_prefix, - 'sampleGuids': sorted(sample_guids), + 'sampleGuids': sorted(sample_guids_to_keys.keys()), }) +def _get_sample_file_path(file_name_prefix, sample_guid): + return os.path.join(get_temp_upload_directory(), _get_sample_file_name(file_name_prefix, sample_guid)) + + def _get_sample_file_name(file_name_prefix, sample_guid): return f'{file_name_prefix}__{sample_guid}.json.gz' diff --git a/seqr/views/utils/dataset_utils.py b/seqr/views/utils/dataset_utils.py index 5f5dd7b421..2cad2177a2 100644 --- a/seqr/views/utils/dataset_utils.py +++ b/seqr/views/utils/dataset_utils.py @@ -89,13 +89,13 @@ def _find_or_create_samples( for sample_key, individual in sample_id_to_individual_record.items() } individual_ids.update({sample['individual_id'] for sample in new_sample_args.values()}) - sample_models = _create_samples( + new_sample_models = _create_samples( new_sample_args.values(), user, loaded_date=loaded_date, **sample_params, ) - samples_guids += list(sample_models.values_list('guid', flat=True)) + samples_guids += list(new_sample_models.values_list('guid', flat=True)) return samples_guids, individual_ids, remaining_sample_keys, loaded_date @@ -112,12 +112,16 @@ def _create_samples(sample_data, user, loaded_date=timezone.now(), **kwargs): def _get_matched_samples_by_key(projects, key_fields=None, values=None, **sample_params): + return _get_sample_models_by_key(samples=Sample.objects.filter( + individual__family__project__in=projects, + **sample_params + ), key_fields=key_fields, values=values) + + +def _get_sample_models_by_key(samples, key_fields=None, values=None): return { (s.pop('sample_id'), s.pop('individual__family__project__name'), *[s[field] for field in (key_fields or [])]): s - for s in Sample.objects.filter( - individual__family__project__in=projects, - **sample_params - ).values('guid', 'individual_id', 'sample_id', 'tissue_type', 'individual__family__project__name', **(values or {})) + for s in samples.values('guid', 'individual_id', 'sample_id', 'tissue_type', 'individual__family__project__name', **(values or {})) } @@ -378,7 +382,7 @@ def _load_rna_seq_file( if any(row_gene_ids): gene_ids.update(row_gene_ids) - sample_guid = get_matched_sample(sample_key, unmatched_samples, sample_id_to_individual_id_mapping) + get_matched_sample(sample_key, unmatched_samples, sample_id_to_individual_id_mapping) if missing_required_fields or (unmatched_samples and not ignore_extra_samples) or (sample_key in unmatched_samples): # If there are definite errors, do not process/save data, just continue to check for additional errors @@ -386,7 +390,7 @@ def _load_rna_seq_file( for gene_id in row_gene_ids: row_dict = {**row_dict, GENE_ID_COL: gene_id} - save_sample_data(sample_guid, row_dict) + save_sample_data(sample_key, row_dict) errors, warnings = _process_rna_errors( gene_ids, missing_required_fields, unmatched_samples, ignore_extra_samples, loaded_samples, @@ -431,9 +435,10 @@ def _load_rna_seq(model_cls, file_path, save_data, *args, user=None, **kwargs): projects = get_internal_projects() data_source = file_path.split('/')[-1].split('_-_')[-1] + key_fields = ['tissue_type'] potential_samples = _get_matched_samples_by_key( projects, sample_type=Sample.SAMPLE_TYPE_RNA, dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS, - key_fields=['tissue_type'], values={ + key_fields=key_fields, values={ 'dataSource': F('data_source'), 'model_count': Count(model_cls.__name__.lower()), 'active': F('is_active'), @@ -443,19 +448,21 @@ def _load_rna_seq(model_cls, file_path, save_data, *args, user=None, **kwargs): individual_data_by_key = _get_individuals_by_key(projects) prev_loaded_individual_ids = set() - sample_guids_to_load = set() + sample_guid_keys_to_load = set() existing_samples_by_guid = {} samples_to_create = {} def update_sample_models(): if samples_to_create: - _create_samples( + new_sample_models = _create_samples( samples_to_create.values(), user=user, data_source=data_source, sample_type=Sample.SAMPLE_TYPE_RNA, dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS, ) + sample_key_map = _get_sample_models_by_key(new_sample_models, key_fields=key_fields) + sample_guid_keys_to_load.update({s['guid']: sample_key for sample_key, s in sample_key_map.items()}) # Delete old data to_delete_sample_individuals = { @@ -471,45 +478,44 @@ def update_sample_models(): for guid in to_delete_sample_individuals: existing_samples_by_guid[guid]['dataSource'] = data_source - def save_sample_data(sample_guid, sample_data): + def save_sample_data(sample_key, sample_data): if not sample_data: return - sample_guids_to_load.add(sample_guid) - save_data(sample_guid, sample_data) + save_data(sample_key, sample_data) def get_matched_sample(sample_key, unmatched_samples, sample_id_to_individual_id_mapping): if sample_key in potential_samples: sample = potential_samples[sample_key] sample_guid = sample['guid'] existing_samples_by_guid[sample_guid] = sample - return sample_guid + sample_guid_keys_to_load[sample_guid] = sample_key if sample_key not in samples_to_create and sample_key not in unmatched_samples: individual_key = _get_individual_key(sample_key, sample_id_to_individual_id_mapping) if individual_key in individual_data_by_key: - samples_to_create[sample_key] = _get_new_sample_args( # TODO + samples_to_create[sample_key] = _get_new_sample_args( sample_key, individual_data_by_key[individual_key], key_fields=['tissue_type'], ) else: unmatched_samples.add(sample_key) - return samples_to_create.get(sample_key, {}).get('guid') + return None warnings, not_loaded_count = _load_rna_seq_file( file_path, user, potential_loaded_samples, update_sample_models, save_sample_data, get_matched_sample, *args, **kwargs) - message = f'Parsed {len(sample_guids_to_load) + not_loaded_count} RNA-seq samples' + message = f'Parsed {len(sample_guid_keys_to_load) + not_loaded_count} RNA-seq samples' info = [message] logger.info(message, user) - sample_projects = Project.objects.filter(family__individual__sample__guid__in=sample_guids_to_load).values( + sample_projects = Project.objects.filter(family__individual__sample__guid__in=sample_guid_keys_to_load).values( 'guid', 'name', new_sample_ids=ArrayAgg( 'family__individual__sample__sample_id', distinct=True, ordering='family__individual__sample__sample_id', filter=~Q(family__individual__id__in=prev_loaded_individual_ids) if prev_loaded_individual_ids else None )) project_names = ', '.join(sorted([project['name'] for project in sample_projects])) - message = f'Attempted data loading for {len(sample_guids_to_load)} RNA-seq samples in the following {len(sample_projects)} projects: {project_names}' + message = f'Attempted data loading for {len(sample_guid_keys_to_load)} RNA-seq samples in the following {len(sample_projects)} projects: {project_names}' info.append(message) logger.info(message, user) @@ -518,7 +524,7 @@ def get_matched_sample(sample_key, unmatched_samples, sample_id_to_individual_id for warning in warnings: logger.warning(warning, user) - return sample_guids_to_load, info, warnings + return sample_guid_keys_to_load, info, warnings def post_process_rna_data(sample_guid, data, get_unique_key=None, format_fields=None): From b2054dfae8c25894a1075f1ae26fed2a2be4cf3a Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 7 May 2024 11:37:19 -0400 Subject: [PATCH 092/736] fix syntax --- seqr/views/utils/dataset_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seqr/views/utils/dataset_utils.py b/seqr/views/utils/dataset_utils.py index 2cad2177a2..becc5ef9c5 100644 --- a/seqr/views/utils/dataset_utils.py +++ b/seqr/views/utils/dataset_utils.py @@ -448,7 +448,7 @@ def _load_rna_seq(model_cls, file_path, save_data, *args, user=None, **kwargs): individual_data_by_key = _get_individuals_by_key(projects) prev_loaded_individual_ids = set() - sample_guid_keys_to_load = set() + sample_guid_keys_to_load = {} existing_samples_by_guid = {} samples_to_create = {} From 05fd830b3b9449c8b50478d359baddfede0ee234 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 7 May 2024 12:26:08 -0400 Subject: [PATCH 093/736] fix updates and tests for rna loading --- seqr/utils/logging_utils.py | 2 +- seqr/views/apis/data_manager_api.py | 4 ++-- seqr/views/apis/data_manager_api_tests.py | 15 +++++++++++---- seqr/views/utils/dataset_utils.py | 8 ++++---- 4 files changed, 18 insertions(+), 11 deletions(-) diff --git a/seqr/utils/logging_utils.py b/seqr/utils/logging_utils.py index 88ca198ff2..1f594a380e 100644 --- a/seqr/utils/logging_utils.py +++ b/seqr/utils/logging_utils.py @@ -77,7 +77,7 @@ def log_model_bulk_update(logger, models, user, update_type, update_fields=None) if not models: return [] db_entity = type(models[0]).__name__ - entity_ids = [o.guid for o in models] + entity_ids = sorted([o.guid for o in models]) db_update = { 'dbEntity': db_entity, 'entityIds': entity_ids, 'updateType': 'bulk_{}'.format(update_type), } diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index 6cd7cf8163..9abd25276d 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -278,7 +278,7 @@ def update_rna_seq(request): def _save_sample_data(sample_key, sample_data): if sample_key not in sample_files: - file_name = _get_sample_file_path(file_name_prefix, sample_key.join('_')) + file_name = _get_sample_file_path(file_name_prefix, '_'.join(sample_key)) sample_files[sample_key] = gzip.open(file_name, 'at') sample_files[sample_key].write(f'{json.dumps(sample_data)}\n') @@ -291,7 +291,7 @@ def _save_sample_data(sample_key, sample_data): for sample_guid, sample_key in sample_guids_to_keys.items(): os.rename( - _get_sample_file_path(file_name_prefix, sample_key.join('_')), + _get_sample_file_path(file_name_prefix, '_'.join(sample_key)), _get_sample_file_path(file_name_prefix, sample_guid), ) diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index 4b7ddce463..f19636b027 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -1051,10 +1051,10 @@ def _test_basic_data_loading(data, num_parsed_samples, num_loaded_samples, new_s for sample_guid, data in params['parsed_file_data'].items() } self.assertIn(filename, expected_files) - mock_open.assert_has_calls([mock.call(filename, 'at') for filename in expected_files]) + file_rename = self._assert_expected_file_open(mock_os, mock_open, expected_files.keys()) for filename in expected_files: self.assertEqual( - ''.join([call.args[0] for call in mock_files[filename].write.call_args_list]), + ''.join([call.args[0] for call in mock_files[file_rename[filename]].write.call_args_list]), expected_files[filename], ) @@ -1077,6 +1077,7 @@ def _test_basic_data_loading(data, num_parsed_samples, num_loaded_samples, new_s # Test loading data when where an individual has multiple tissue types data = [data[1][:2] + data[0][2:], data[1]] mock_files = defaultdict(mock.MagicMock) + mock_os.reset_mock() new_sample_individual_id = 7 response_json, new_sample_guid = _test_basic_data_loading(data, 2, 2, new_sample_individual_id, body, '1kg project nåme with uniçøde') @@ -1086,8 +1087,8 @@ def _test_basic_data_loading(data, num_parsed_samples, num_loaded_samples, new_s ) self.assertTrue(second_tissue_sample_guid != new_sample_guid) self.assertTrue(second_tissue_sample_guid in response_json['sampleGuids']) - mock_open.assert_has_calls([ - mock.call(f'{RNA_FILENAME_TEMPLATE.format(data_type)}__{sample_guid}.json.gz', 'at') + self._assert_expected_file_open(mock_os, mock_open, [ + f'{RNA_FILENAME_TEMPLATE.format(data_type)}__{sample_guid}.json.gz' for sample_guid in response_json['sampleGuids'] ]) self.assertSetEqual( @@ -1095,6 +1096,12 @@ def _test_basic_data_loading(data, num_parsed_samples, num_loaded_samples, new_s params['write_data'], ) + def _assert_expected_file_open(self, mock_os, mock_open, expected_file_names): + file_rename = {call.args[1]: call.args[0] for call in mock_os.rename.call_args_list} + self.assertSetEqual(set(expected_file_names), set(file_rename.keys())) + mock_open.assert_has_calls([mock.call(file_rename[filename], 'at') for filename in expected_file_names]) + return file_rename + @mock.patch('seqr.views.apis.data_manager_api.os') @mock.patch('seqr.views.apis.data_manager_api.gzip.open') def test_load_rna_seq_sample_data(self, mock_open, mock_os): diff --git a/seqr/views/utils/dataset_utils.py b/seqr/views/utils/dataset_utils.py index becc5ef9c5..5126401e64 100644 --- a/seqr/views/utils/dataset_utils.py +++ b/seqr/views/utils/dataset_utils.py @@ -107,7 +107,7 @@ def _create_samples(sample_data, user, loaded_date=timezone.now(), **kwargs): loaded_date=loaded_date, **created_sample_data, **kwargs, - ) for created_sample_data in sorted(sample_data, key=lambda s: s['guid'])] + ) for created_sample_data in sample_data] return Sample.bulk_create(user, new_samples) @@ -461,7 +461,8 @@ def update_sample_models(): sample_type=Sample.SAMPLE_TYPE_RNA, dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS, ) - sample_key_map = _get_sample_models_by_key(new_sample_models, key_fields=key_fields) + new_sample_ids = [s.id for s in new_sample_models] + sample_key_map = _get_sample_models_by_key(Sample.objects.filter(id__in=new_sample_ids), key_fields=key_fields) sample_guid_keys_to_load.update({s['guid']: sample_key for sample_key, s in sample_key_map.items()}) # Delete old data @@ -490,6 +491,7 @@ def get_matched_sample(sample_key, unmatched_samples, sample_id_to_individual_id sample_guid = sample['guid'] existing_samples_by_guid[sample_guid] = sample sample_guid_keys_to_load[sample_guid] = sample_key + return if sample_key not in samples_to_create and sample_key not in unmatched_samples: individual_key = _get_individual_key(sample_key, sample_id_to_individual_id_mapping) @@ -500,8 +502,6 @@ def get_matched_sample(sample_key, unmatched_samples, sample_id_to_individual_id else: unmatched_samples.add(sample_key) - return None - warnings, not_loaded_count = _load_rna_seq_file( file_path, user, potential_loaded_samples, update_sample_models, save_sample_data, get_matched_sample, *args, **kwargs) From 62b1870952d7c81014fe313eea87b72bcf66926c Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 7 May 2024 12:34:45 -0400 Subject: [PATCH 094/736] fix updates and tests for snp loading --- seqr/views/apis/dataset_api_tests.py | 10 +++++----- seqr/views/utils/dataset_utils.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/seqr/views/apis/dataset_api_tests.py b/seqr/views/apis/dataset_api_tests.py index 3b26ffc9c2..4ac5cc10e0 100644 --- a/seqr/views/apis/dataset_api_tests.py +++ b/seqr/views/apis/dataset_api_tests.py @@ -51,7 +51,7 @@ @mock.patch('seqr.utils.file_utils.open', MOCK_OPEN) class DatasetAPITest(object): - @mock.patch('seqr.views.utils.dataset_utils.random.randint') + @mock.patch('seqr.models.random.randint') @mock.patch('seqr.utils.search.add_data_utils.safe_post_to_slack') @mock.patch('seqr.utils.communication_utils.send_html_email') @mock.patch('seqr.views.utils.airtable_utils.AIRTABLE_URL', MOCK_AIRTABLE_URL) @@ -109,8 +109,8 @@ def test_add_variants_dataset(self, mock_send_email, mock_send_slack, mock_rando response_json = response.json() self.assertSetEqual(set(response_json.keys()), {'samplesByGuid', 'individualsByGuid', 'familiesByGuid'}) - new_sample_guid = 'S98765432101234567890_NA20878' - replaced_sample_guid = 'S98765432101234567890_NA19678' + new_sample_guid = 'S98765432101234567890_na20878' + replaced_sample_guid = 'S98765432101234567890_na19678_' self.assertSetEqual( set(response_json['samplesByGuid'].keys()), {existing_sample_guid, existing_old_index_sample_guid, replaced_sample_guid, new_sample_guid} @@ -191,7 +191,7 @@ def test_add_variants_dataset(self, mock_send_email, mock_send_slack, mock_rando response_json = response.json() self.assertSetEqual(set(response_json.keys()), {'samplesByGuid', 'individualsByGuid', 'familiesByGuid'}) - sv_sample_guid = 'S1234567_NA19675_1' + sv_sample_guid = 'S0001234567_na19675_1' self.assertDictEqual(response_json['familiesByGuid'], {}) self.assertListEqual(list(response_json['samplesByGuid'].keys()), [sv_sample_guid]) self.assertEqual(response_json['samplesByGuid'][sv_sample_guid]['datasetType'], 'SV') @@ -239,7 +239,7 @@ def test_add_variants_dataset(self, mock_send_email, mock_send_slack, mock_rando response_json = response.json() self.assertSetEqual(set(response_json.keys()), {'samplesByGuid', 'individualsByGuid', 'familiesByGuid'}) - new_sample_type_sample_guid = 'S987654_NA19675_1' + new_sample_type_sample_guid = 'S0000987654_na19675_1' self.assertDictEqual(response_json['familiesByGuid'], {}) self.assertListEqual(list(response_json['samplesByGuid'].keys()), [new_sample_type_sample_guid]) self.assertEqual(response_json['samplesByGuid'][new_sample_type_sample_guid]['datasetType'], 'SNV_INDEL') diff --git a/seqr/views/utils/dataset_utils.py b/seqr/views/utils/dataset_utils.py index 5126401e64..7e2ffd7d17 100644 --- a/seqr/views/utils/dataset_utils.py +++ b/seqr/views/utils/dataset_utils.py @@ -95,7 +95,7 @@ def _find_or_create_samples( loaded_date=loaded_date, **sample_params, ) - samples_guids += list(new_sample_models.values_list('guid', flat=True)) + samples_guids += [s.guid for s in new_sample_models] return samples_guids, individual_ids, remaining_sample_keys, loaded_date From bc7dd6929ab2a49f62d23f421cfd83046b22780e Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Tue, 7 May 2024 12:41:10 -0400 Subject: [PATCH 095/736] most review comments --- .../0064_alter_phenotypeprioritization.py | 4 +-- seqr/views/apis/data_manager_api.py | 6 ++--- seqr/views/apis/data_manager_api_tests.py | 2 +- seqr/views/apis/family_api.py | 27 +++++++++---------- ui/shared/utils/constants.js | 3 +-- 5 files changed, 18 insertions(+), 24 deletions(-) diff --git a/seqr/migrations/0064_alter_phenotypeprioritization.py b/seqr/migrations/0064_alter_phenotypeprioritization.py index 0645d5a81f..b3538361e9 100644 --- a/seqr/migrations/0064_alter_phenotypeprioritization.py +++ b/seqr/migrations/0064_alter_phenotypeprioritization.py @@ -18,12 +18,10 @@ class Migration(migrations.Migration): def update_guids(apps, schema_editor): PhenotypePrioritization = apps.get_model('seqr', 'PhenotypePrioritization') db_alias = schema_editor.connection.alias - pps = PhenotypePrioritization.objects.using(db_alias).all() for pp in pps: ids_as_str = "%s:%s:%s" % (pp.individual.individual_id, pp.gene_id, pp.disease_id) pp.guid = 'PP%07d_%s' % (pp.id, _slugify(str(ids_as_str)))[:MAX_GUID_SIZE] - PhenotypePrioritization.objects.using(db_alias).bulk_update(pps, ['guid']) operations = [ @@ -48,7 +46,7 @@ def update_guids(apps, schema_editor): name='last_modified_date', field=models.DateTimeField(blank=True, db_index=True, null=True), ), - migrations.RunPython(update_guids), + migrations.RunPython(update_guids, reverse_code=migrations.RunPython.noop), # Add uniqueness constraint to guid after default is replaced by update_guids migrations.AlterField( model_name='phenotypeprioritization', diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index 95b7e29df8..9e44c40f03 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -333,12 +333,12 @@ def load_rna_seq_sample_data(request, sample_guid): return create_json_response({'success': True}) -def _notify_phenotype_prioritization_loaded(project, tool, num_samples, user): +def _notify_phenotype_prioritization_loaded(project, tool, num_samples): url = f'{BASE_URL}project/{project.guid}/project_page' project_link = f'{project.name}' email = ( f'This is to notify you that {tool.title()} data for {num_samples} sample(s) ' - f'has been loaded in seqr project {project_link} by {user.get_full_name()}' + f'has been loaded in seqr project {project_link}' ) send_project_notification( project, @@ -417,7 +417,7 @@ def load_phenotype_prioritization_data(request): for project_name, indiv_records in all_records_by_project_name.items(): project = projects_by_name[project_name][0] num_samples = len(indiv_records) - _notify_phenotype_prioritization_loaded(project, tool, num_samples, request.user) + _notify_phenotype_prioritization_loaded(project, tool, num_samples) return create_json_response({ 'info': info, diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index 4812137540..10c770a713 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -1274,7 +1274,7 @@ def _assert_expected_notifications(mock_send_email, expected_notifs: list[dict]) project_link = f'{project_name}' email = ( f'This is to notify you that {notif_dict["tool"].title()} data for {notif_dict["num_samples"]} sample(s) ' - f'has been loaded in seqr project {project_link} by {notif_dict["user"].get_full_name()}' + f'has been loaded in seqr project {project_link}' ) calls.append(mock.call( email_body=f'Dear seqr user,\n\n{email}\n\nAll the best,\nThe seqr team', diff --git a/seqr/views/apis/family_api.py b/seqr/views/apis/family_api.py index 4cf77137a3..d2dd83c23f 100644 --- a/seqr/views/apis/family_api.py +++ b/seqr/views/apis/family_api.py @@ -4,8 +4,10 @@ import json from collections import defaultdict from django.contrib.auth.models import User +from django.contrib.postgres.aggregates import ArrayAgg from django.db.models import Count, Q from django.db.models.fields.files import ImageFieldFile +from django.db.models.functions import JSONObject from matchmaker.models import MatchmakerSubmission from reference_data.models import Omim @@ -80,26 +82,21 @@ def family_page_data(request, family_guid): for individual_guid in outlier_individual_guids: response['individualsByGuid'][individual_guid]['hasRnaOutlierData'] = True - phentoype_prioritization_tools = ( - PhenotypePrioritization.objects.filter(individual__family=family).values( - 'individual__guid', - 'individual__phenotypeprioritization__tool', - 'individual__phenotypeprioritization__created_date' - ).distinct() - ) - pp_tools_by_indiv = defaultdict(list) - for pp in phentoype_prioritization_tools: - individual_guid = pp.get('individual__guid') - pp_tools_by_indiv[individual_guid].append({ - 'tool': pp.get('individual__phenotypeprioritization__tool'), - 'createdDate': pp.get('individual__phenotypeprioritization__created_date'), - }) + tools_by_indiv = {} + tools_agg = PhenotypePrioritization.objects.filter(individual__family=family).values('individual__guid').annotate( + phenotypePrioritizationTools=ArrayAgg( + JSONObject(tool='tool', createdDate='created_date'), + distinct=True, + )) + for indiv_record in tools_agg: + individual_guid = indiv_record.get('individual__guid') + tools_by_indiv[individual_guid] = indiv_record.get('phenotypePrioritizationTools') submissions = get_json_for_matchmaker_submissions(MatchmakerSubmission.objects.filter(individual__family=family)) individual_mme_submission_guids = {s['individualGuid']: s['submissionGuid'] for s in submissions} for individual in response['individualsByGuid'].values(): individual['mmeSubmissionGuid'] = individual_mme_submission_guids.get(individual['individualGuid']) - individual['phenotypePrioritizationTools'] = pp_tools_by_indiv.get(individual['individualGuid'], []) + individual['phenotypePrioritizationTools'] = tools_by_indiv.get(individual['individualGuid'], []) response['mmeSubmissionsByGuid'] = {s['submissionGuid']: s for s in submissions} return create_json_response(response) diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js index ef9f4f1c4e..78d74f7ebd 100644 --- a/ui/shared/utils/constants.js +++ b/ui/shared/utils/constants.js @@ -379,8 +379,7 @@ export const CATEGORY_FAMILY_FILTERS = { { value: `${SHOW_DATA_LOADED}_PHENO`, name: 'Data Loaded - Phenotype Prioritization', - // eslint-disable-next-line no-unused-vars - createFilter: (family, _user, _samplesByFamily) => family.hasPhenotypePrioritization, + createFilter: family => family.hasPhenotypePrioritization, }, ], } From 3a512c7a44193c949be4e27500625d7789258789 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Tue, 7 May 2024 12:52:53 -0400 Subject: [PATCH 096/736] fix test --- seqr/views/apis/family_api_tests.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/seqr/views/apis/family_api_tests.py b/seqr/views/apis/family_api_tests.py index 7292d6a43d..e5786b6544 100644 --- a/seqr/views/apis/family_api_tests.py +++ b/seqr/views/apis/family_api_tests.py @@ -73,12 +73,14 @@ def test_family_page_data(self): 'phenotypePrioritizationTools'} individual_fields.update(INDIVIDUAL_FIELDS) self.assertSetEqual(set(individual.keys()), individual_fields) - self.assertListEqual([[ - {'createdDate': '2024-05-02T06:42:55.397Z', 'tool': 'exomiser'}, - {'createdDate': '2024-05-02T06:42:55.397Z', 'tool': 'lirical'} - ], [ - {'createdDate': '2024-05-02T06:42:55.397Z', 'tool': 'lirical'} - ], []], + self.assertListEqual([ + [ + {'createdDate': '2024-05-02T06:42:55.397+00:00', 'tool': 'exomiser'}, + {'createdDate': '2024-05-02T06:42:55.397+00:00', 'tool': 'lirical'} + ], [ + {'createdDate': '2024-05-02T06:42:55.397+00:00', 'tool': 'lirical'} + ], [] + ], [response_json['individualsByGuid'][guid].get('phenotypePrioritizationTools') for guid in INDIVIDUAL_GUIDS] ) self.assertListEqual( From 804407d1405bd75da57e2c524a1c98bfbd5435aa Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 7 May 2024 14:10:48 -0400 Subject: [PATCH 097/736] update mock sample guids --- .../tests/check_for_new_samples_from_pipeline_tests.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py b/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py index a7d2dc8501..61d88c8bb2 100644 --- a/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py +++ b/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py @@ -14,9 +14,9 @@ MOCK_HAIL_HOST = 'http://test-hail-host' GUID_ID = 54321 -NEW_SAMPLE_GUID_P3 = f'S{GUID_ID}_NA20888' -NEW_SAMPLE_GUID_P4 = f'S{GUID_ID}_NA21234' -REPLACED_SAMPLE_GUID = f'S{GUID_ID}_NA20885' +NEW_SAMPLE_GUID_P3 = f'S00000{GUID_ID}_na20888' +NEW_SAMPLE_GUID_P4 = f'S00000{GUID_ID}_na21234' +REPLACED_SAMPLE_GUID = f'S00000{GUID_ID}_na20885' EXISTING_SAMPLE_GUID = 'S000154_na20889' EXISTING_WGS_SAMPLE_GUID = 'S000144_na20888' EXISTING_SV_SAMPLE_GUID = 'S000147_na21234' @@ -49,7 +49,7 @@ @mock.patch('seqr.utils.search.hail_search_utils.HAIL_BACKEND_SERVICE_HOSTNAME', MOCK_HAIL_HOST) -@mock.patch('seqr.views.utils.dataset_utils.random.randint', lambda *args: GUID_ID) +@mock.patch('seqr.models.random.randint', lambda *args: GUID_ID) @mock.patch('seqr.views.utils.airtable_utils.AIRTABLE_URL', 'http://testairtable') @mock.patch('seqr.utils.search.add_data_utils.BASE_URL', SEQR_URL) @mock.patch('seqr.utils.search.add_data_utils.SEQR_SLACK_ANVIL_DATA_LOADING_CHANNEL', 'anvil-data-loading') @@ -381,7 +381,7 @@ def test_gcnv_command(self): 'sample_type': 'WES', 'family_samples': {'F000004_4': ['NA20872'], 'F000012_12': ['NA20889']}, } - self._test_success('GRCh37/GCNV', metadata, dataset_type='SV', sample_guids={f'S{GUID_ID}_NA20872', f'S{GUID_ID}_NA20889'}, reload_calls=[{ + self._test_success('GRCh37/GCNV', metadata, dataset_type='SV', sample_guids={f'S00000{GUID_ID}_na20872', f'S00000{GUID_ID}_na20889'}, reload_calls=[{ 'genome_version': 'GRCh37', 'num_results': 1, 'variant_ids': [], 'variant_keys': ['prefix_19107_DEL'], 'sample_data': {'SV_WES': [{'individual_guid': 'I000017_na20889', 'family_guid': 'F000012_12', 'project_guid': 'R0003_test', 'affected': 'A', 'sample_id': 'NA20889'}]}, }], reload_annotations_logs=['No additional saved variants to update']) From a62bc453ccd2de747b3c7b5ebf5290ef159fd1c1 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 7 May 2024 15:26:21 -0400 Subject: [PATCH 098/736] better error handling when rna temp fiel moissing --- seqr/views/apis/data_manager_api.py | 6 +++++- seqr/views/apis/data_manager_api_tests.py | 14 +++++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index 9a7d9d4962..e91a88979c 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -320,7 +320,11 @@ def load_rna_seq_sample_data(request, sample_guid): config = RNA_DATA_TYPE_CONFIGS[data_type] data_rows = _load_saved_sample_data(file_name, sample_guid) - data_rows, error = post_process_rna_data(sample_guid, data_rows, **config.get('post_process_kwargs', {})) + if data_rows: + data_rows, error = post_process_rna_data(sample_guid, data_rows, **config.get('post_process_kwargs', {})) + else: + logger.error(f'No saved temp data found for {sample_guid} with file prefix {file_name}', request.user) + error = f'Data for this sample was not properly parsed. Please re-upload the data' if error: return create_json_response({'error': error}, status=400) diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index 4b7ddce463..85efc87aaf 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -1099,7 +1099,6 @@ def _test_basic_data_loading(data, num_parsed_samples, num_loaded_samples, new_s @mock.patch('seqr.views.apis.data_manager_api.gzip.open') def test_load_rna_seq_sample_data(self, mock_open, mock_os): mock_os.path.join.side_effect = lambda *args: '/'.join(args[1:]) - mock_os.path.exists.return_value = True url = reverse(load_rna_seq_sample_data, args=[RNA_MUSCLE_SAMPLE_GUID]) self.check_pm_login(url) @@ -1110,6 +1109,7 @@ def test_load_rna_seq_sample_data(self, mock_open, mock_os): url = reverse(load_rna_seq_sample_data, args=[sample_guid]) model_cls = params['model_cls'] model_cls.objects.all().delete() + mock_os.path.exists.return_value = False self.reset_logs() parsed_file_lines = params['parsed_file_data'][sample_guid].strip().split('\n') mock_open.return_value.__enter__.return_value.readlines.return_value = parsed_file_lines @@ -1117,6 +1117,18 @@ def test_load_rna_seq_sample_data(self, mock_open, mock_os): body = {'fileName': file_name, 'dataType': data_type} response = self.client.post(url, content_type='application/json', data=json.dumps(body)) + self.assertEqual(response.status_code, 400) + self.assertDictEqual(response.json(), {'error': 'Data for this sample was not properly parsed. Please re-upload the data'}) + self.assert_json_logs(self.pm_user, [ + (f'Loading outlier data for {params["loaded_data_row"][0]}', None), + (f'No saved temp data found for {sample_guid} with file prefix {file_name}', { + 'severity': 'ERROR', '@type': 'type.googleapis.com/google.devtools.clouderrorreporting.v1beta1.ReportedErrorEvent', + }), + ]) + + mock_os.path.exists.return_value = True + self.reset_logs() + response = self.client.post(url, content_type='application/json', data=json.dumps(body)) self.assertEqual(response.status_code, 200) self.assertDictEqual(response.json(), {'success': True}) From 2a52314718d3a7619f1bba1a8a2cb6a911a75354 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 7 May 2024 16:24:51 -0400 Subject: [PATCH 099/736] remove unneeded f string --- seqr/views/apis/data_manager_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index e91a88979c..0d835fc032 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -324,7 +324,7 @@ def load_rna_seq_sample_data(request, sample_guid): data_rows, error = post_process_rna_data(sample_guid, data_rows, **config.get('post_process_kwargs', {})) else: logger.error(f'No saved temp data found for {sample_guid} with file prefix {file_name}', request.user) - error = f'Data for this sample was not properly parsed. Please re-upload the data' + error = 'Data for this sample was not properly parsed. Please re-upload the data' if error: return create_json_response({'error': error}, status=400) From eb15e423ae077a90f2504a14a3c4249b421704ce Mon Sep 17 00:00:00 2001 From: snyk-bot Date: Wed, 8 May 2024 15:12:20 +0000 Subject: [PATCH 100/736] fix: requirements.txt to reduce vulnerabilities The following vulnerabilities are fixed by pinning transitive dependencies: - https://snyk.io/vuln/SNYK-PYTHON-CRYPTOGRAPHY-6592767 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 4cc64b5e1e..0268a31d33 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,7 +20,7 @@ cffi==1.15.1 # via cryptography charset-normalizer==3.0.1 # via requests -cryptography==42.0.4 +cryptography==42.0.6 # via social-auth-core defusedxml==0.7.1 # via From ce766481fa379de8c94b208de3c8570731af2a73 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 8 May 2024 13:20:04 -0400 Subject: [PATCH 101/736] phenotype prioritization buylk create --- seqr/models.py | 3 +-- seqr/views/apis/data_manager_api.py | 10 +++------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/seqr/models.py b/seqr/models.py index b2da5db92b..5866c9c592 100644 --- a/seqr/models.py +++ b/seqr/models.py @@ -1197,8 +1197,7 @@ class PhenotypePrioritization(ModelWithGUID): def __unicode__(self): return "%s:%s:%s" % (self.individual.individual_id, self.gene_id, self.disease_id) - def _compute_guid(self): - return 'PP%07d_%s' % (self.id, _slugify(str(self))) + GUID_PREFIX = 'PP' class Meta: json_fields = ['gene_id', 'tool', 'rank', 'disease_id', 'disease_name', 'scores'] diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index 7d16e4d737..d3a87b1bb7 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -7,7 +7,6 @@ import re import requests import urllib3 -import random from django.contrib.postgres.aggregates import ArrayAgg from django.db.models import Max, F, Q @@ -416,12 +415,9 @@ def load_phenotype_prioritization_data(request): if to_delete: PhenotypePrioritization.bulk_delete(request.user, to_delete) - models_to_create = [] - for indiv_records in all_records_by_project_name.values(): - for record in indiv_records: - model = PhenotypePrioritization(**record) - model.guid = f'PP{random.randint(10 ** 8, 10 ** 9)}_{model.individual.individual_id}_{model.gene_id}_{model.disease_id}'[:PhenotypePrioritization.MAX_GUID_SIZE] # nosec - models_to_create.append(model) + models_to_create = [ + PhenotypePrioritization(**record) for records in all_records_by_project_name.values() for record in records + ] PhenotypePrioritization.bulk_create(request.user, models_to_create) for project_name, indiv_records in all_records_by_project_name.items(): From fa7e84a06810fe17f05ea3f0e59b9d9e3c9e5ebc Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 8 May 2024 13:22:09 -0400 Subject: [PATCH 102/736] bump changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5164fd92bc..59d0c755a2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ # _seqr_ Changes ## dev + +## 5/8/24 * Adds dynamic analysis groups (REQUIRES DB MIGRATION) ## 4/4/24 From 1b2473bd6d88080c2cfa77e6fbbc072a2f52efb2 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 8 May 2024 17:13:06 -0400 Subject: [PATCH 103/736] store rna temp data in gs --- seqr/views/apis/data_manager_api.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index 0d835fc032..e2cf4720d0 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -15,7 +15,7 @@ from requests.exceptions import ConnectionError as RequestConnectionError from seqr.utils.search.utils import get_search_backend_status, delete_search_backend_data -from seqr.utils.file_utils import file_iter, does_file_exist +from seqr.utils.file_utils import file_iter, does_file_exist, mv_file_to_gs from seqr.utils.logging_utils import SeqrLogger from seqr.utils.vcf_utils import validate_vcf_exists @@ -34,6 +34,8 @@ logger = SeqrLogger(__name__) +TEMP_GS_BUCKET = 'gs://seqr-scratch-temp' + @data_manager_required def elasticsearch_status(request): @@ -273,12 +275,13 @@ def update_rna_seq(request): mapping_file = load_uploaded_file(uploaded_mapping_file_id) file_name_prefix = f'rna_sample_data__{data_type}__{datetime.now().isoformat()}' + file_dir = os.path.join(get_temp_upload_directory(), file_name_prefix) sample_files = {} def _save_sample_data(sample_guid, sample_data): if sample_guid not in sample_files: - file_name = os.path.join(get_temp_upload_directory(), _get_sample_file_name(file_name_prefix, sample_guid)) + file_name = os.path.join(file_dir, f'{sample_guid}.json.gz') sample_files[sample_guid] = gzip.open(file_name, 'at') sample_files[sample_guid].write(f'{json.dumps(sample_data)}\n') @@ -289,6 +292,8 @@ def _save_sample_data(sample_guid, sample_data): except ValueError as e: return create_json_response({'error': str(e)}, status=400) + mv_file_to_gs(f'{file_dir}/*', f'{TEMP_GS_BUCKET}/{file_name_prefix}', request.user) + return create_json_response({ 'info': info, 'warnings': warnings, @@ -297,15 +302,10 @@ def _save_sample_data(sample_guid, sample_data): }) -def _get_sample_file_name(file_name_prefix, sample_guid): - return f'{file_name_prefix}__{sample_guid}.json.gz' - - def _load_saved_sample_data(file_name_prefix, sample_guid): - file_name = os.path.join(get_temp_upload_directory(), _get_sample_file_name(file_name_prefix, sample_guid)) - if os.path.exists(file_name): - with gzip.open(file_name, 'rt') as f: - return [json.loads(line) for line in f.readlines()] + file_name = f'{TEMP_GS_BUCKET}/{file_name_prefix}/{sample_guid}.json.gz' + if does_file_exist(file_name, user=request.user): + return [json.loads(line) for line in file_iter(file_name, user=request.user)] return None From 0f0df9e1e8d989bb92b256700b4c9e12a81f6ac6 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Thu, 9 May 2024 10:38:37 -0400 Subject: [PATCH 104/736] fix family api payload for ui and get rid ANY in tests --- seqr/views/apis/data_manager_api_tests.py | 26 ++++++++++++------- seqr/views/apis/family_api.py | 8 +++--- .../components/FamilyTable/IndividualRow.jsx | 12 +++------ 3 files changed, 24 insertions(+), 22 deletions(-) diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index 10c770a713..35f1d0a295 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -3,10 +3,10 @@ from django.urls.base import reverse import json import mock -from mock.mock import ANY from requests import HTTPError import responses +from seqr.utils.communication_utils import _set_bulk_notification_stream from seqr.views.apis.data_manager_api import elasticsearch_status, upload_qc_pipeline_output, delete_index, \ update_rna_seq, load_rna_seq_sample_data, load_phenotype_prioritization_data, write_pedigree, validate_callset, \ get_loaded_projects, load_data @@ -1152,9 +1152,10 @@ def _join_data(cls, data): return ['\t'.join(line).encode('utf-8') for line in data] @mock.patch('seqr.views.apis.data_manager_api.BASE_URL', SEQR_URL) + @mock.patch('seqr.views.apis.data_manager_api.random') @mock.patch('seqr.utils.communication_utils.send_html_email') @mock.patch('seqr.utils.file_utils.subprocess.Popen') - def test_load_phenotype_prioritization_data(self, mock_subprocess, mock_send_email): + def test_load_phenotype_prioritization_data(self, mock_subprocess, mock_send_email, mock_random): url = reverse(load_phenotype_prioritization_data) self.check_data_manager_login(url) @@ -1208,6 +1209,7 @@ def test_load_phenotype_prioritization_data(self, mock_subprocess, mock_send_ema mock_subprocess.reset_mock() mock_subprocess.return_value.stdout = self._join_data(PHENOTYPE_PRIORITIZATION_HEADER + LIRICAL_DATA) self.reset_logs() + mock_random.randint.side_effect = [256989491, 295284416] response = self.client.post(url, content_type='application/json', data=json.dumps(request_body)) self.assertEqual(response.status_code, 200) info = [ @@ -1223,7 +1225,7 @@ def test_load_phenotype_prioritization_data(self, mock_subprocess, mock_send_ema }}), ('create 2 PhenotypePrioritizations', {'dbUpdate': { 'dbEntity': 'PhenotypePrioritization', 'updateType': 'bulk_create', - "entityIds": [ANY, ANY], + "entityIds": ['PP256989491_NA19678_ENSG000001', 'PP295284416_NA20885_ENSG000001'], }}), ]) saved_data = _get_json_for_models(PhenotypePrioritization.objects.filter(tool='lirical').order_by('id'), @@ -1240,6 +1242,7 @@ def test_load_phenotype_prioritization_data(self, mock_subprocess, mock_send_ema self.reset_logs() mock_send_email.reset_mock() mock_subprocess.return_value.stdout = self._join_data(PHENOTYPE_PRIORITIZATION_HEADER + UPDATE_LIRICAL_DATA) + mock_random.randint.side_effect = [177442291, 215071655] response = self.client.post(url, content_type='application/json', data=json.dumps(request_body)) self.assertEqual(response.status_code, 200) info = [ @@ -1250,11 +1253,11 @@ def test_load_phenotype_prioritization_data(self, mock_subprocess, mock_send_ema self._has_expected_file_loading_logs('gs://seqr_data/lirical_data.tsv.gz', user=self.data_manager_user, additional_logs=[ ('delete 1 PhenotypePrioritizations', {'dbUpdate': { 'dbEntity': 'PhenotypePrioritization', 'updateType': 'bulk_delete', - 'entityIds': [ANY], + 'entityIds': ['PP256989491_NA19678_ENSG000001'], }}), ('create 2 PhenotypePrioritizations', {'dbUpdate': { 'dbEntity': 'PhenotypePrioritization', 'updateType': 'bulk_create', - 'entityIds': [ANY, ANY], + 'entityIds': ['PP177442291_NA19678_ENSG000001', 'PP215071655_NA19678_ENSG000001'], }}), ]) saved_data = _get_json_for_models(PhenotypePrioritization.objects.filter(tool='lirical'), @@ -1276,11 +1279,14 @@ def _assert_expected_notifications(mock_send_email, expected_notifs: list[dict]) f'This is to notify you that {notif_dict["tool"].title()} data for {notif_dict["num_samples"]} sample(s) ' f'has been loaded in seqr project {project_link}' ) - calls.append(mock.call( - email_body=f'Dear seqr user,\n\n{email}\n\nAll the best,\nThe seqr team', - subject=f'New {notif_dict["tool"].title()} data available in seqr', - to=['test_user_manager@test.com'], process_message=ANY, - )) + calls.append( + mock.call( + email_body=f'Dear seqr user,\n\n{email}\n\nAll the best,\nThe seqr team', + subject=f'New {notif_dict["tool"].title()} data available in seqr', + to=['test_user_manager@test.com'], + process_message=_set_bulk_notification_stream, + ) + ) mock_send_email.assert_has_calls(calls) @staticmethod diff --git a/seqr/views/apis/family_api.py b/seqr/views/apis/family_api.py index d2dd83c23f..3921a5b889 100644 --- a/seqr/views/apis/family_api.py +++ b/seqr/views/apis/family_api.py @@ -7,7 +7,7 @@ from django.contrib.postgres.aggregates import ArrayAgg from django.db.models import Count, Q from django.db.models.fields.files import ImageFieldFile -from django.db.models.functions import JSONObject +from django.db.models.functions import JSONObject, Concat, Upper, Substr from matchmaker.models import MatchmakerSubmission from reference_data.models import Omim @@ -85,8 +85,10 @@ def family_page_data(request, family_guid): tools_by_indiv = {} tools_agg = PhenotypePrioritization.objects.filter(individual__family=family).values('individual__guid').annotate( phenotypePrioritizationTools=ArrayAgg( - JSONObject(tool='tool', createdDate='created_date'), - distinct=True, + JSONObject( + sampleType=Concat(Upper(Substr('tool', 1, 1)), Substr('tool', 2)), + loadedDate='created_date'), + distinct=True )) for indiv_record in tools_agg: individual_guid = indiv_record.get('individual__guid') diff --git a/ui/pages/Project/components/FamilyTable/IndividualRow.jsx b/ui/pages/Project/components/FamilyTable/IndividualRow.jsx index 5e1e828597..4cb355b7c1 100644 --- a/ui/pages/Project/components/FamilyTable/IndividualRow.jsx +++ b/ui/pages/Project/components/FamilyTable/IndividualRow.jsx @@ -146,7 +146,7 @@ MmeStatusLabel.propTypes = { mmeSubmission: PropTypes.object, } -const DataDetails = React.memo(({ loadedSamples, individual, mmeSubmission, phenotypePrioritizationTools }) => ( +const DataDetails = React.memo(({ loadedSamples, individual, mmeSubmission }) => (
{loadedSamples.map( sample =>
, @@ -179,8 +179,8 @@ const DataDetails = React.memo(({ loadedSamples, individual, mmeSubmission, phen
)} - { phenotypePrioritizationTools.map( - pp =>
, + {individual.phenotypePrioritizationTools.map( + tool =>
, )} {SHOW_DATA_MODAL_CONFIG.filter(({ shouldShow }) => shouldShow(individual)).map( ({ modalName, title, modalSize, linkText, component }) => { @@ -209,7 +209,6 @@ DataDetails.propTypes = { mmeSubmission: PropTypes.object, individual: PropTypes.object, loadedSamples: PropTypes.arrayOf(PropTypes.object), - phenotypePrioritizationTools: PropTypes.arrayOf(PropTypes.object), } const formatGene = gene => `${gene.gene} ${gene.comments ? ` (${gene.comments.trim()})` : ''}` @@ -548,10 +547,6 @@ class IndividualRow extends React.PureComponent { // only show active or first/ last inactive samples loadedSamples = loadedSamples.filter((sample, i) => sample.isActive || i === 0 || i === loadedSamples.length - 1) - const phenotypePrioritizationTools = individual.phenotypePrioritizationTools.map( - pp => ({ sampleType: pp.tool.charAt(0).toUpperCase() + pp.tool.slice(1), loadedDate: pp.createdDate }), - ) - const leftContent = (
@@ -586,7 +581,6 @@ class IndividualRow extends React.PureComponent { loadedSamples={loadedSamples} individual={individual} mmeSubmission={mmeSubmission} - phenotypePrioritizationTools={phenotypePrioritizationTools} /> ) From 31a8137369c8fd6ce22e230684fe72c1304a0507 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Thu, 9 May 2024 12:34:49 -0400 Subject: [PATCH 105/736] fix test --- seqr/views/apis/family_api_tests.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/seqr/views/apis/family_api_tests.py b/seqr/views/apis/family_api_tests.py index e5786b6544..42b3b9332c 100644 --- a/seqr/views/apis/family_api_tests.py +++ b/seqr/views/apis/family_api_tests.py @@ -75,10 +75,10 @@ def test_family_page_data(self): self.assertSetEqual(set(individual.keys()), individual_fields) self.assertListEqual([ [ - {'createdDate': '2024-05-02T06:42:55.397+00:00', 'tool': 'exomiser'}, - {'createdDate': '2024-05-02T06:42:55.397+00:00', 'tool': 'lirical'} + {'loadedDate': '2024-05-02T06:42:55.397+00:00', 'sampleType': 'Exomiser'}, + {'loadedDate': '2024-05-02T06:42:55.397+00:00', 'sampleType': 'Lirical'} ], [ - {'createdDate': '2024-05-02T06:42:55.397+00:00', 'tool': 'lirical'} + {'loadedDate': '2024-05-02T06:42:55.397+00:00', 'sampleType': 'Lirical'} ], [] ], [response_json['individualsByGuid'][guid].get('phenotypePrioritizationTools') for guid in INDIVIDUAL_GUIDS] From 5d3bcdd286afbc4c920cd6786ea2cfd883ddc38c Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 9 May 2024 12:42:27 -0400 Subject: [PATCH 106/736] fix load data tests --- seqr/views/apis/data_manager_api.py | 12 +++------- seqr/views/apis/data_manager_api_tests.py | 29 ++++++++++++++++------- 2 files changed, 23 insertions(+), 18 deletions(-) diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index e2cf4720d0..8158af15a3 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -302,13 +302,6 @@ def _save_sample_data(sample_guid, sample_data): }) -def _load_saved_sample_data(file_name_prefix, sample_guid): - file_name = f'{TEMP_GS_BUCKET}/{file_name_prefix}/{sample_guid}.json.gz' - if does_file_exist(file_name, user=request.user): - return [json.loads(line) for line in file_iter(file_name, user=request.user)] - return None - - @pm_or_data_manager_required def load_rna_seq_sample_data(request, sample_guid): sample = Sample.objects.get(guid=sample_guid) @@ -319,8 +312,9 @@ def load_rna_seq_sample_data(request, sample_guid): data_type = request_json['dataType'] config = RNA_DATA_TYPE_CONFIGS[data_type] - data_rows = _load_saved_sample_data(file_name, sample_guid) - if data_rows: + gs_file_name = f'{TEMP_GS_BUCKET}/{file_name}/{sample_guid}.json.gz' + if does_file_exist(gs_file_name, user=request.user): + data_rows = [json.loads(line) for line in file_iter(gs_file_name, user=request.user)] data_rows, error = post_process_rna_data(sample_guid, data_rows, **config.get('post_process_kwargs', {})) else: logger.error(f'No saved temp data found for {sample_guid} with file prefix {file_name}', request.user) diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index 85efc87aaf..25e054d03e 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -1095,10 +1095,8 @@ def _test_basic_data_loading(data, num_parsed_samples, num_loaded_samples, new_s params['write_data'], ) - @mock.patch('seqr.views.apis.data_manager_api.os') - @mock.patch('seqr.views.apis.data_manager_api.gzip.open') - def test_load_rna_seq_sample_data(self, mock_open, mock_os): - mock_os.path.join.side_effect = lambda *args: '/'.join(args[1:]) + @mock.patch('seqr.utils.file_utils.subprocess.Popen') + def test_load_rna_seq_sample_data(self, mock_subprocess): url = reverse(load_rna_seq_sample_data, args=[RNA_MUSCLE_SAMPLE_GUID]) self.check_pm_login(url) @@ -1109,10 +1107,14 @@ def test_load_rna_seq_sample_data(self, mock_open, mock_os): url = reverse(load_rna_seq_sample_data, args=[sample_guid]) model_cls = params['model_cls'] model_cls.objects.all().delete() - mock_os.path.exists.return_value = False self.reset_logs() parsed_file_lines = params['parsed_file_data'][sample_guid].strip().split('\n') - mock_open.return_value.__enter__.return_value.readlines.return_value = parsed_file_lines + + mock_does_file_exist = mock.MagicMock() + mock_does_file_exist.wait.return_value = 1 + mock_does_file_exist.stdout = [b'CommandException: One or more URLs matched no objects'] + mock_subprocess.side_effect = [mock_does_file_exist] + file_name = RNA_FILENAME_TEMPLATE.format(data_type) body = {'fileName': file_name, 'dataType': data_type} @@ -1121,12 +1123,17 @@ def test_load_rna_seq_sample_data(self, mock_open, mock_os): self.assertDictEqual(response.json(), {'error': 'Data for this sample was not properly parsed. Please re-upload the data'}) self.assert_json_logs(self.pm_user, [ (f'Loading outlier data for {params["loaded_data_row"][0]}', None), + (f'==> gsutil ls gs://seqr-scratch-temp/{file_name}/{sample_guid}.json.gz', None), + (f'CommandException: One or more URLs matched no objects', None), (f'No saved temp data found for {sample_guid} with file prefix {file_name}', { 'severity': 'ERROR', '@type': 'type.googleapis.com/google.devtools.clouderrorreporting.v1beta1.ReportedErrorEvent', }), ]) - mock_os.path.exists.return_value = True + mock_does_file_exist.wait.return_value = 0 + mock_file_iter = mock.MagicMock() + mock_file_iter.stdout = [row.encode('utf-8') for row in parsed_file_lines] + mock_subprocess.side_effect = [mock_does_file_exist, mock_file_iter] self.reset_logs() response = self.client.post(url, content_type='application/json', data=json.dumps(body)) self.assertEqual(response.status_code, 200) @@ -1138,10 +1145,13 @@ def test_load_rna_seq_sample_data(self, mock_open, mock_os): self.assertSetEqual({model.sample.guid for model in models}, {sample_guid}) self.assertTrue(all(model.sample.is_active for model in models)) - mock_open.assert_called_with(f'{file_name}__{sample_guid}.json.gz', 'rt') + gsutil_cat = f'gsutil cat gs://seqr-scratch-temp/{file_name}/{sample_guid}.json.gz | gunzip -c -q - ' + mock_subprocess.assert_called_with(gsutil_cat, stdout=-1, stderr=-2, shell=True) # nosec self.assert_json_logs(self.pm_user, [ (f'Loading outlier data for {params["loaded_data_row"][0]}', None), + (f'==> gsutil ls gs://seqr-scratch-temp/{file_name}/{sample_guid}.json.gz', None), + (f'==> {gsutil_cat}', None), (f'create {model_cls.__name__}s', {'dbUpdate': { 'dbEntity': model_cls.__name__, 'numEntities': num_models, 'parentEntityIds': [sample_guid], 'updateType': 'bulk_create', @@ -1151,7 +1161,8 @@ def test_load_rna_seq_sample_data(self, mock_open, mock_os): self.assertListEqual(list(params['get_models_json'](models)), params['expected_models_json']) mismatch_row = {**json.loads(parsed_file_lines[0]), params.get('mismatch_field', 'p_value'): '0.05'} - mock_open.return_value.__enter__.return_value.readlines.return_value = parsed_file_lines + [json.dumps(mismatch_row)] + mock_file_iter.stdout += [json.dumps(mismatch_row).encode('utf-8')] + mock_subprocess.side_effect = [mock_does_file_exist, mock_file_iter] response = self.client.post(url, content_type='application/json', data=json.dumps(body)) self.assertEqual(response.status_code, 400) self.assertDictEqual(response.json(), { From 564d06a4f3a58b58e7c1c345d818abab5d31c88b Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 9 May 2024 13:31:10 -0400 Subject: [PATCH 107/736] fix unit tests --- seqr/views/apis/data_manager_api_tests.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index 25e054d03e..dbf95ad468 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -878,14 +878,14 @@ def test_update_rna_splice_outlier(self, *args, **kwargs): @mock.patch('seqr.views.utils.dataset_utils.BASE_URL', 'https://test-seqr.org/') @mock.patch('seqr.views.utils.dataset_utils.SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL', 'seqr-data-loading') + @mock.patch('seqr.views.apis.data_manager_api.get_temp_upload_directory', lambda: 'tmp/') @mock.patch('seqr.views.utils.dataset_utils.safe_post_to_slack') @mock.patch('seqr.views.apis.data_manager_api.datetime') - @mock.patch('seqr.views.apis.data_manager_api.os') @mock.patch('seqr.views.apis.data_manager_api.load_uploaded_file') @mock.patch('seqr.utils.file_utils.subprocess.Popen') @mock.patch('seqr.views.apis.data_manager_api.gzip.open') def _test_update_rna_seq(self, data_type, mock_open, mock_subprocess, mock_load_uploaded_file, - mock_os, mock_datetime, mock_send_slack): + mock_datetime, mock_send_slack): url = reverse(update_rna_seq) self.check_pm_login(url) @@ -897,8 +897,7 @@ def _test_update_rna_seq(self, data_type, mock_open, mock_subprocess, mock_load_ # Test errors body = {'dataType': data_type, 'file': 'gs://rna_data/muscle_samples.tsv'} mock_datetime.now.return_value = datetime(2020, 4, 15) - mock_os.path.join.side_effect = lambda *args: '/'.join(args[1:]) - mock_os.path.exists.return_value = False + mock_load_uploaded_file.return_value = [['a']] mock_load_uploaded_file.return_value = [['a']] mock_does_file_exist = mock.MagicMock() mock_does_file_exist.wait.return_value = 1 @@ -911,7 +910,7 @@ def _test_update_rna_seq(self, data_type, mock_open, mock_subprocess, mock_load_ mock_file_iter = mock.MagicMock() def _set_file_iter_stdout(rows): mock_file_iter.stdout = [('\t'.join([str(col) for col in row]) + '\n').encode() for row in rows] - mock_subprocess.side_effect = [mock_does_file_exist, mock_file_iter] + mock_subprocess.side_effect = [mock_does_file_exist, mock_file_iter, mock_does_file_exist] _set_file_iter_stdout([]) invalid_body = {**body, 'file': body['file'].replace('tsv', 'xlsx')} @@ -1004,6 +1003,7 @@ def _test_basic_data_loading(data, num_parsed_samples, num_loaded_samples, new_s # Test loading new data mock_open.reset_mock() + mock_subprocess.reset_mock() self.reset_logs() mock_load_uploaded_file.return_value = [['NA19675_D2', 'NA19675_1']] mock_files = defaultdict(mock.MagicMock) @@ -1044,10 +1044,15 @@ def _test_basic_data_loading(data, num_parsed_samples, num_loaded_samples, new_s self.assertSetEqual(set(response_json['sampleGuids']), {sample_guid, new_sample_guid}) # test correct file interactions - mock_subprocess.assert_called_with(f'gsutil cat {RNA_FILE_ID} | gunzip -c -q - ', stdout=-1, stderr=-2, shell=True) # nosec - filename = RNA_FILENAME_TEMPLATE.format(data_type) + f'__{new_sample_guid}.json.gz' + file_path = RNA_FILENAME_TEMPLATE.format(data_type) + mock_subprocess.assert_has_calls([mock.call(command, stdout=-1, stderr=-2, shell=True) for command in [ # nosec + f'gsutil ls {RNA_FILE_ID}', + f'gsutil cat {RNA_FILE_ID} | gunzip -c -q - ', + f'gsutil mv tmp/{file_path}/* gs://seqr-scratch-temp/{file_path}', + ]]) + filename = f'tmp/{file_path}/{new_sample_guid}.json.gz' expected_files = { - f'{RNA_FILENAME_TEMPLATE.format(data_type)}__{new_sample_guid if sample_guid == PLACEHOLDER_GUID else sample_guid}.json.gz': data + f'tmp/{file_path}/{new_sample_guid if sample_guid == PLACEHOLDER_GUID else sample_guid}.json.gz': data for sample_guid, data in params['parsed_file_data'].items() } self.assertIn(filename, expected_files) @@ -1087,7 +1092,7 @@ def _test_basic_data_loading(data, num_parsed_samples, num_loaded_samples, new_s self.assertTrue(second_tissue_sample_guid != new_sample_guid) self.assertTrue(second_tissue_sample_guid in response_json['sampleGuids']) mock_open.assert_has_calls([ - mock.call(f'{RNA_FILENAME_TEMPLATE.format(data_type)}__{sample_guid}.json.gz', 'at') + mock.call(f'tmp/{RNA_FILENAME_TEMPLATE.format(data_type)}/{sample_guid}.json.gz', 'at') for sample_guid in response_json['sampleGuids'] ]) self.assertSetEqual( From 837b456dd5772f291a75a246f8fdb4bc499fe045 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 9 May 2024 16:02:36 -0400 Subject: [PATCH 108/736] update gnomad links to v4 where possible --- ui/shared/components/panel/genes/GeneDetail.jsx | 4 ++-- ui/shared/components/panel/variants/Annotations.jsx | 11 ++++------- ui/shared/components/panel/variants/Frequencies.jsx | 9 ++++----- 3 files changed, 10 insertions(+), 14 deletions(-) diff --git a/ui/shared/components/panel/genes/GeneDetail.jsx b/ui/shared/components/panel/genes/GeneDetail.jsx index 82470104f9..03ac3e06ec 100644 --- a/ui/shared/components/panel/genes/GeneDetail.jsx +++ b/ui/shared/components/panel/genes/GeneDetail.jsx @@ -334,8 +334,8 @@ const GeneDetailContent = React.memo(({ gene, user, updateGeneNote: dispatchUpda { title: 'Decipher', link: getDecipherGeneLink(gene), description: 'DatabasE of genomiC varIation and Phenotype in Humans using Ensembl Resources' }, { title: 'UniProt', link: `http://www.uniprot.org/uniprot?query=${gene.geneId}+AND(reviewed:true)+AND(organism_id:9606)`, description: 'Protein sequence and functional information' }, { title: 'Geno2MP', link: `https://geno2mp.gs.washington.edu/Geno2MP/#/gene/${gene.geneSymbol}/gene/0/0/0`, description: 'Genotype to Mendelian Phenotype' }, - { title: 'gnomAD', link: `https://gnomad.broadinstitute.org/gene/${gene.geneId}?dataset=gnomad_r3`, description: 'Genome Aggregation Database' }, - { title: 'primAD', link: `http://primad.basespace.illumina.com/gene/${gene.geneSymbol}?dataset=gnomad_r3`, description: 'Primate Genome Aggregation Database' }, + { title: 'gnomAD', link: `https://gnomad.broadinstitute.org/gene/${gene.geneId}?dataset=gnomad_r4`, description: 'Genome Aggregation Database' }, + { title: 'primAD', link: `http://primad.basespace.illumina.com/gene/${gene.geneSymbol}`, description: 'Primate Genome Aggregation Database' }, gene.mgiMarkerId ? { title: 'MGI', link: `http://www.informatics.jax.org/marker/${gene.mgiMarkerId}`, description: 'Mouse Genome Informatics' } : null, gene.mgiMarkerId ? { title: 'IMPC', link: `https://www.mousephenotype.org/data/genes/${gene.mgiMarkerId}`, description: 'International Mouse Phenotyping Consortium' } : null, { title: 'KEGG', link: `https://www.kegg.jp/kegg-bin/search_pathway_text?keyword=${gene.geneSymbol}&viewImage=true`, description: 'Pathway maps representing known molecular interaction' }, diff --git a/ui/shared/components/panel/variants/Annotations.jsx b/ui/shared/components/panel/variants/Annotations.jsx index 3be7b2e742..3af864824a 100644 --- a/ui/shared/components/panel/variants/Annotations.jsx +++ b/ui/shared/components/panel/variants/Annotations.jsx @@ -201,12 +201,9 @@ const LOF_FILTER_MAP = { '3UTR_SPLICE': { title: "3'UTR", message: 'Essential splice variant LoF occurs in the UTR of the transcript' }, } -const getSvRegion = ( - { chrom, endChrom, pos, end, liftedOverGenomeVersion, liftedOverPos }, divider, useLiftoverVersion, -) => { +const getSvRegion = ({ chrom, endChrom, pos, end }, divider) => { const endOffset = endChrom ? 0 : end - pos - const start = (useLiftoverVersion && liftedOverGenomeVersion === useLiftoverVersion) ? liftedOverPos : pos - return `${chrom}${divider}${start}-${start + endOffset}` + return `${chrom}${divider}${pos}-${pos + endOffset}` } const getGeneNames = genes => genes.reduce((acc, gene) => [gene.geneSymbol, ...getOtherGeneNames(gene), ...acc], []) @@ -237,8 +234,8 @@ const shouldShowNonDefaultTranscriptInfoIcon = (variant, transcript, transcripts const VARIANT_LINKS = [ { name: 'gnomAD', - shouldShow: variant => !!variant.svType && has37Coords(variant), - getHref: variant => `https://gnomad.broadinstitute.org/region/${getSvRegion(variant, '-', GENOME_VERSION_37)}?dataset=gnomad_sv_r2_1`, + shouldShow: variant => !!variant.svType, + getHref: variant => `https://gnomad.broadinstitute.org/region/${getSvRegion(variant, '-')}?dataset=gnomad_sv_r4`, }, { name: 'Decipher', diff --git a/ui/shared/components/panel/variants/Frequencies.jsx b/ui/shared/components/panel/variants/Frequencies.jsx index 50887dfc18..826deb136a 100644 --- a/ui/shared/components/panel/variants/Frequencies.jsx +++ b/ui/shared/components/panel/variants/Frequencies.jsx @@ -143,7 +143,7 @@ gnomadLink.propTypes = { const GNOMAD_URL_INFO = { urls: { [GENOME_VERSION_37]: 'gnomad.broadinstitute.org', [GENOME_VERSION_38]: 'gnomad.broadinstitute.org' }, - queryParams: { [GENOME_VERSION_38]: 'dataset=gnomad_r3' }, + queryParams: { [GENOME_VERSION_38]: 'dataset=gnomad_r3', [GENOME_VERSION_37]: 'dataset=gnomad_r2_1' }, // TODO } const sectionTitle = ({ fieldTitle, section }) => ( @@ -174,14 +174,13 @@ const POPULATIONS = [ }, { field: 'gnomad_exomes', - fieldTitle: 'gnomAD v2 exomes', + fieldTitle: 'gnomAD v2 exomes', // TODO titleContainer: gnomadLink, - urls: { [GENOME_VERSION_37]: 'gnomad.broadinstitute.org' }, - queryParams: { [GENOME_VERSION_37]: 'dataset=gnomad_r2_1' }, + ...GNOMAD_URL_INFO, }, { field: 'gnomad_genomes', - fieldTitle: 'gnomAD v3 genomes', + fieldTitle: 'gnomAD v3 genomes', // TODO titleContainer: gnomadLink, precision: 3, ...GNOMAD_URL_INFO, From d2611fc15684b4c304d2c37709656ac2b45d7edf Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 9 May 2024 16:35:41 -0400 Subject: [PATCH 109/736] conditionally link to correct gnomad for search backend --- .../components/panel/variants/Frequencies.jsx | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/ui/shared/components/panel/variants/Frequencies.jsx b/ui/shared/components/panel/variants/Frequencies.jsx index 826deb136a..bec0f84a02 100644 --- a/ui/shared/components/panel/variants/Frequencies.jsx +++ b/ui/shared/components/panel/variants/Frequencies.jsx @@ -57,7 +57,7 @@ const getFreqLinkPath = ({ chrom, pos, variant, value }) => { } const FreqSummary = React.memo((props) => { - const { field, fieldTitle, variant, urls, queryParams, acDisplay, titleContainer, precision = 2 } = props + const { field, fieldTitle, variant, urls, conditionalQueryParams, acDisplay, titleContainer, precision = 2 } = props const { populations = {}, chrom } = variant const population = populations[field] || {} if (population.af === null || population.af === undefined) { @@ -67,6 +67,11 @@ const FreqSummary = React.memo((props) => { const value = population.id ? population.id.replace('gnomAD-SV_v2.1_', '') : afValue const displayValue = population.filter_af > 0 ? population.filter_af.toPrecision(precision) : afValue + let { queryParams } = props + if (conditionalQueryParams) { + queryParams = conditionalQueryParams(populations) + } + return (
{titleContainer ? titleContainer(props) : fieldTitle} @@ -121,16 +126,18 @@ FreqSummary.propTypes = { titleContainer: PropTypes.func, urls: PropTypes.object, queryParams: PropTypes.object, + conditionalQueryParams: PropTypes.object, acDisplay: PropTypes.string, } const getGenePath = ({ variant }) => `gene/${getVariantMainGeneId(variant)}` -const gnomadLink = ({ fieldTitle, ...props }) => { - const [detail, ...linkName] = fieldTitle.split(' ').reverse() +const gnomadLink = ({ fieldTitle, esVersion, variant, ...props }) => { + const isEs = !(variant || {}).populations?.seqr + const [prefix, detail] = fieldTitle.split(' ') return ( - +   {detail} @@ -143,7 +150,7 @@ gnomadLink.propTypes = { const GNOMAD_URL_INFO = { urls: { [GENOME_VERSION_37]: 'gnomad.broadinstitute.org', [GENOME_VERSION_38]: 'gnomad.broadinstitute.org' }, - queryParams: { [GENOME_VERSION_38]: 'dataset=gnomad_r3', [GENOME_VERSION_37]: 'dataset=gnomad_r2_1' }, // TODO + queryParams: { [GENOME_VERSION_38]: 'dataset=gnomad_r4', [GENOME_VERSION_37]: 'dataset=gnomad_r2_1' }, } const sectionTitle = ({ fieldTitle, section }) => ( @@ -174,14 +181,18 @@ const POPULATIONS = [ }, { field: 'gnomad_exomes', - fieldTitle: 'gnomAD v2 exomes', // TODO + fieldTitle: 'gnomAD exomes', titleContainer: gnomadLink, + esVersion: 'v2', + conditionalQueryParams: populations => (populations.seqr ? GNOMAD_URL_INFO.queryParams : { [GENOME_VERSION_37]: 'dataset=gnomad_r2_1' }), ...GNOMAD_URL_INFO, }, { field: 'gnomad_genomes', - fieldTitle: 'gnomAD v3 genomes', // TODO + fieldTitle: 'gnomAD genomes', titleContainer: gnomadLink, + esVersion: 'v3', + conditionalQueryParams: populations => (populations.seqr ? GNOMAD_URL_INFO.queryParams : { [GENOME_VERSION_38]: 'dataset=gnomad_r3' }), precision: 3, ...GNOMAD_URL_INFO, }, From 84c5c3596b24bdf568c9407106547a613e712327 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 9 May 2024 16:37:36 -0400 Subject: [PATCH 110/736] codacy fix --- seqr/views/apis/data_manager_api_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index dbf95ad468..002a12f30f 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -1129,7 +1129,7 @@ def test_load_rna_seq_sample_data(self, mock_subprocess): self.assert_json_logs(self.pm_user, [ (f'Loading outlier data for {params["loaded_data_row"][0]}', None), (f'==> gsutil ls gs://seqr-scratch-temp/{file_name}/{sample_guid}.json.gz', None), - (f'CommandException: One or more URLs matched no objects', None), + ('CommandException: One or more URLs matched no objects', None), (f'No saved temp data found for {sample_guid} with file prefix {file_name}', { 'severity': 'ERROR', '@type': 'type.googleapis.com/google.devtools.clouderrorreporting.v1beta1.ReportedErrorEvent', }), From fec7671a739b88dffdd73d4f66e337b943baecc4 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Thu, 9 May 2024 17:02:14 -0400 Subject: [PATCH 111/736] allow images from github discussions for feature updates page --- settings.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/settings.py b/settings.py index b061521b87..d5e5f448ad 100644 --- a/settings.py +++ b/settings.py @@ -2,7 +2,7 @@ import os import random import string -import subprocess # nosec +import subprocess # nosec from ssl import create_default_context @@ -71,7 +71,7 @@ CSRF_COOKIE_NAME = 'csrf_token' CSRF_COOKIE_HTTPONLY = False -SESSION_COOKIE_AGE = 86400 # seconds in 1 day +SESSION_COOKIE_AGE = 86400 # seconds in 1 day X_FRAME_OPTIONS = 'SAMEORIGIN' SECURE_BROWSER_XSS_FILTER = True @@ -81,7 +81,9 @@ 'https://storage.googleapis.com', # google storage used by IGV 'https://reg.genome.network') CSP_SCRIPT_SRC = ("'self'", "'unsafe-eval'", 'https://www.googletagmanager.com') -CSP_IMG_SRC = ("'self'", 'https://www.google-analytics.com', 'https://storage.googleapis.com', 'data:') +CSP_IMG_SRC = ("'self'", 'https://www.google-analytics.com', 'https://storage.googleapis.com', + 'https://user-images.githubusercontent.com', # for images in GitHub discussions on Feature Updates page + 'data:') CSP_OBJECT_SRC = ("'none'") CSP_BASE_URI = ("'none'") # IGV js injects CSS into the page head so there is no way to set nonce. Therefore, support hashed value of the CSS @@ -280,7 +282,8 @@ 'context_processors': [ 'django.contrib.auth.context_processors.auth', 'django.contrib.messages.context_processors.messages', # required for admin template - 'django.template.context_processors.request', # must be enabled in DjangoTemplates (TEMPLATES) in order to use the admin navigation sidebar + 'django.template.context_processors.request', + # must be enabled in DjangoTemplates (TEMPLATES) in order to use the admin navigation sidebar 'social_django.context_processors.backends', # required for social_auth, same for below 'social_django.context_processors.login_redirect', ], @@ -304,7 +307,8 @@ AIRTABLE_URL = 'https://api.airtable.com/v0' AIRTABLE_API_KEY = os.environ.get("AIRTABLE_API_KEY") -GREGOR_DATA_MODEL_URL = os.environ.get('GREGOR_DATA_MODEL_URL', 'https://raw.githubusercontent.com/UW-GAC/gregor_data_models/main/GREGoR_data_model.json') +GREGOR_DATA_MODEL_URL = os.environ.get('GREGOR_DATA_MODEL_URL', + 'https://raw.githubusercontent.com/UW-GAC/gregor_data_models/main/GREGoR_data_model.json') API_LOGIN_REQUIRED_URL = '/api/login-required-error' API_POLICY_REQUIRED_URL = '/api/policy-required-error' @@ -383,7 +387,6 @@ SOCIAL_AUTH_PROVIDER = 'google-oauth2' GOOGLE_LOGIN_REQUIRED_URL = '/login/{}'.format(SOCIAL_AUTH_PROVIDER) - # Use Google sub ID as the user ID, safer than using email USE_UNIQUE_USER_ID = True @@ -391,7 +394,6 @@ SOCIAL_AUTH_GOOGLE_OAUTH2_SECRET = os.environ.get('SOCIAL_AUTH_GOOGLE_OAUTH2_SECRET') LOGIN_URL = GOOGLE_LOGIN_REQUIRED_URL if SOCIAL_AUTH_GOOGLE_OAUTH2_KEY else '/login' - SOCIAL_AUTH_JSONFIELD_ENABLED = True SOCIAL_AUTH_URL_NAMESPACE = 'social' SOCIAL_AUTH_LOGIN_REDIRECT_URL = '/' @@ -418,7 +420,7 @@ if TERRA_API_ROOT_URL: try: - # Refresh pattern taken from: https://stackoverflow.com/a/74377391 + # Refresh pattern taken from: https://stackoverflow.com/a/74377391 SERVICE_ACCOUNT_CREDENTIALS, project_id = google.auth.default(scopes=SOCIAL_AUTH_GOOGLE_OAUTH2_SCOPE) request = google.auth.transport.requests.Request() SERVICE_ACCOUNT_CREDENTIALS.refresh(request=request) @@ -428,13 +430,14 @@ # activate command line account if failed on start up activated_service_account = subprocess.run(['gcloud auth list --filter=status:ACTIVE --format="value(account)"'], - capture_output=True, text=True, shell=True).stdout.split('\n')[0] # nosec + capture_output=True, text=True, shell=True).stdout.split('\n')[ + 0] # nosec if activated_service_account != SERVICE_ACCOUNT_FOR_ANVIL: raise Exception('Error starting seqr - attempt to authenticate gcloud cli failed') SOCIAL_AUTH_GOOGLE_OAUTH2_AUTH_EXTRA_ARGUMENTS = { 'access_type': 'offline', # to make the access_token can be refreshed after expired (expiration time is 1 hour) - 'approval_prompt': 'auto', # required for successful token refresh + 'approval_prompt': 'auto', # required for successful token refresh } SOCIAL_AUTH_PIPELINE = ('seqr.utils.social_auth_pipeline.validate_anvil_registration',) + \ From cbd3d379df22802bea609c85f2461b9ce0173192 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Thu, 9 May 2024 17:05:03 -0400 Subject: [PATCH 112/736] undo the extra changes --- settings.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/settings.py b/settings.py index d5e5f448ad..b7fd626bfa 100644 --- a/settings.py +++ b/settings.py @@ -2,7 +2,7 @@ import os import random import string -import subprocess # nosec +import subprocess # nosec from ssl import create_default_context @@ -71,7 +71,7 @@ CSRF_COOKIE_NAME = 'csrf_token' CSRF_COOKIE_HTTPONLY = False -SESSION_COOKIE_AGE = 86400 # seconds in 1 day +SESSION_COOKIE_AGE = 86400 # seconds in 1 day X_FRAME_OPTIONS = 'SAMEORIGIN' SECURE_BROWSER_XSS_FILTER = True @@ -282,8 +282,7 @@ 'context_processors': [ 'django.contrib.auth.context_processors.auth', 'django.contrib.messages.context_processors.messages', # required for admin template - 'django.template.context_processors.request', - # must be enabled in DjangoTemplates (TEMPLATES) in order to use the admin navigation sidebar + 'django.template.context_processors.request', # must be enabled in DjangoTemplates (TEMPLATES) in order to use the admin navigation sidebar 'social_django.context_processors.backends', # required for social_auth, same for below 'social_django.context_processors.login_redirect', ], @@ -307,8 +306,7 @@ AIRTABLE_URL = 'https://api.airtable.com/v0' AIRTABLE_API_KEY = os.environ.get("AIRTABLE_API_KEY") -GREGOR_DATA_MODEL_URL = os.environ.get('GREGOR_DATA_MODEL_URL', - 'https://raw.githubusercontent.com/UW-GAC/gregor_data_models/main/GREGoR_data_model.json') +GREGOR_DATA_MODEL_URL = os.environ.get('GREGOR_DATA_MODEL_URL', 'https://raw.githubusercontent.com/UW-GAC/gregor_data_models/main/GREGoR_data_model.json') API_LOGIN_REQUIRED_URL = '/api/login-required-error' API_POLICY_REQUIRED_URL = '/api/policy-required-error' @@ -387,6 +385,7 @@ SOCIAL_AUTH_PROVIDER = 'google-oauth2' GOOGLE_LOGIN_REQUIRED_URL = '/login/{}'.format(SOCIAL_AUTH_PROVIDER) + # Use Google sub ID as the user ID, safer than using email USE_UNIQUE_USER_ID = True @@ -394,6 +393,7 @@ SOCIAL_AUTH_GOOGLE_OAUTH2_SECRET = os.environ.get('SOCIAL_AUTH_GOOGLE_OAUTH2_SECRET') LOGIN_URL = GOOGLE_LOGIN_REQUIRED_URL if SOCIAL_AUTH_GOOGLE_OAUTH2_KEY else '/login' + SOCIAL_AUTH_JSONFIELD_ENABLED = True SOCIAL_AUTH_URL_NAMESPACE = 'social' SOCIAL_AUTH_LOGIN_REDIRECT_URL = '/' @@ -420,7 +420,7 @@ if TERRA_API_ROOT_URL: try: - # Refresh pattern taken from: https://stackoverflow.com/a/74377391 + # Refresh pattern taken from: https://stackoverflow.com/a/74377391 SERVICE_ACCOUNT_CREDENTIALS, project_id = google.auth.default(scopes=SOCIAL_AUTH_GOOGLE_OAUTH2_SCOPE) request = google.auth.transport.requests.Request() SERVICE_ACCOUNT_CREDENTIALS.refresh(request=request) @@ -430,14 +430,13 @@ # activate command line account if failed on start up activated_service_account = subprocess.run(['gcloud auth list --filter=status:ACTIVE --format="value(account)"'], - capture_output=True, text=True, shell=True).stdout.split('\n')[ - 0] # nosec + capture_output=True, text=True, shell=True).stdout.split('\n')[0] # nosec if activated_service_account != SERVICE_ACCOUNT_FOR_ANVIL: raise Exception('Error starting seqr - attempt to authenticate gcloud cli failed') SOCIAL_AUTH_GOOGLE_OAUTH2_AUTH_EXTRA_ARGUMENTS = { 'access_type': 'offline', # to make the access_token can be refreshed after expired (expiration time is 1 hour) - 'approval_prompt': 'auto', # required for successful token refresh + 'approval_prompt': 'auto', # required for successful token refresh } SOCIAL_AUTH_PIPELINE = ('seqr.utils.social_auth_pipeline.validate_anvil_registration',) + \ From 1f592bb3ea4801f310f93fa878722aece587af9b Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 10 May 2024 15:24:01 -0400 Subject: [PATCH 113/736] add validation for comp het pairs in same gene in link variants UI --- ui/pages/Project/components/SavedVariants.jsx | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/ui/pages/Project/components/SavedVariants.jsx b/ui/pages/Project/components/SavedVariants.jsx index 37748641a7..98c15210b1 100644 --- a/ui/pages/Project/components/SavedVariants.jsx +++ b/ui/pages/Project/components/SavedVariants.jsx @@ -91,7 +91,19 @@ const LINK_VARIANT_FIELDS = [ VARIANT_POS_COLUMN, TAG_COLUMN, ], - validate: value => (Object.keys(value || {}).length > 1 ? undefined : 'Multiple variants required'), + includeSelectedRowData: true, + validate: (value) => { + const variants = Object.values(value || {}).filter(v => v) + if (variants.length < 2) { + return 'Multiple variants required' + } + if (variants.length === 2 && + Object.keys(variants[0].transcripts).every(geneId => !variants[1].transcripts[geneId]) + ) { + return 'Compound het pairs must be in the same gene' + } + return undefined + }, }, ] From ec5a004e443f38b7e2268c9d329d3315baa0fcc3 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 10 May 2024 16:01:08 -0400 Subject: [PATCH 114/736] load all phenotpye priorities after viewing variant page --- .../Project/components/PhenotypePrioritizedGenes.jsx | 1 + ui/pages/Project/reducers.js | 11 ++++++++++- ui/shared/components/panel/variants/VariantGene.jsx | 5 +++-- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/ui/pages/Project/components/PhenotypePrioritizedGenes.jsx b/ui/pages/Project/components/PhenotypePrioritizedGenes.jsx index 6eeae095a0..a193fc2b94 100644 --- a/ui/pages/Project/components/PhenotypePrioritizedGenes.jsx +++ b/ui/pages/Project/components/PhenotypePrioritizedGenes.jsx @@ -24,6 +24,7 @@ const PHENOTYPE_GENE_INFO_COLUMNS = [ compact showInlineDetails noExpand + hideLocusLists /> ), }, diff --git a/ui/pages/Project/reducers.js b/ui/pages/Project/reducers.js index 52785cb6f7..0a54ca0569 100644 --- a/ui/pages/Project/reducers.js +++ b/ui/pages/Project/reducers.js @@ -330,7 +330,16 @@ export const loadRnaSeqData = individualGuid => (dispatch, getState) => { export const loadPhenotypeGeneScores = individualGuid => (dispatch, getState) => { const state = getState() const { familyGuid } = state.individualsByGuid[individualGuid] - if (!state.phenotypeGeneScoresByIndividual[individualGuid]) { + const loadedToolCounts = Object.values(state.phenotypeGeneScoresByIndividual[individualGuid] || {}).reduce( + (acc, dataByTool) => ( + Object.entries(dataByTool).reduce((acc2, [tool, data]) => ({ + ...acc2, + [tool]: (acc2[tool] || 0) + data.length, + }), acc) + ), {}, + ) + // Data can be loaded for only a subset of genes if previously loaded variant information + if (!Object.values(loadedToolCounts).some(val => val >= 10)) { dispatch({ type: REQUEST_PHENOTYPE_GENE_SCORES }) new HttpRequestHelper(`/api/family/${familyGuid}/phenotype_gene_scores`, (responseJson) => { diff --git a/ui/shared/components/panel/variants/VariantGene.jsx b/ui/shared/components/panel/variants/VariantGene.jsx index e1eb18f9c9..6cb36c9a76 100644 --- a/ui/shared/components/panel/variants/VariantGene.jsx +++ b/ui/shared/components/panel/variants/VariantGene.jsx @@ -579,7 +579,7 @@ const getGeneConsequence = (geneId, variant) => { export const BaseVariantGene = React.memo(({ geneId, gene, variant, compact, showInlineDetails, compoundHetToggle, tpmGenes, individualGeneData, geneModalId, - noExpand, geneSearchFamily, + noExpand, geneSearchFamily, hideLocusLists, }) => { const geneConsequence = variant && getGeneConsequence(geneId, variant) @@ -598,7 +598,7 @@ export const BaseVariantGene = React.memo(({ margin={showInlineDetails ? '1em .5em 0px 0px' : null} horizontal={showInlineDetails} individualGeneData={individualGeneData} - showLocusLists + showLocusLists={!hideLocusLists} /> ) @@ -684,6 +684,7 @@ BaseVariantGene.propTypes = { geneModalId: PropTypes.string, noExpand: PropTypes.bool, geneSearchFamily: PropTypes.string, + hideLocusLists: PropTypes.bool, ...RNA_SEQ_PROP_TYPES, } From 4b0d7c2588ed6db6efd11a6b9825365401dbbf18 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 13 May 2024 11:17:47 -0400 Subject: [PATCH 115/736] fix edit dynamic analysis group --- seqr/views/apis/analysis_group_api.py | 2 +- seqr/views/apis/analysis_group_api_tests.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/seqr/views/apis/analysis_group_api.py b/seqr/views/apis/analysis_group_api.py index 8147158321..53ee833f45 100644 --- a/seqr/views/apis/analysis_group_api.py +++ b/seqr/views/apis/analysis_group_api.py @@ -28,7 +28,7 @@ def _update_analysis_group(request, project_guid, analysis_group_guid, model_cls if analysis_group_guid: analysis_group = model_cls.objects.get(guid=analysis_group_guid, project=project) - update_model_from_json(analysis_group, request_json, user=request.user) + update_model_from_json(analysis_group, request_json, user=request.user, allow_unknown_keys=True) else: analysis_group, created = get_or_create_model_from_json(model_cls, { 'project': project, diff --git a/seqr/views/apis/analysis_group_api_tests.py b/seqr/views/apis/analysis_group_api_tests.py index 385bd11ac9..bcae366b90 100644 --- a/seqr/views/apis/analysis_group_api_tests.py +++ b/seqr/views/apis/analysis_group_api_tests.py @@ -111,7 +111,7 @@ def test_create_update_and_delete_dynamic_analysis_group(self): # update the analysis_group update_analysis_group_url = reverse(update_dynamic_analysis_group_handler, args=[PROJECT_GUID, guid]) response = self.client.post(update_analysis_group_url, content_type='application/json', data=json.dumps( - {'name': 'updated_analysis_group', 'criteria': {'analysisStatus': ['I']}})) + {**new_analysis_group, 'name': 'updated_analysis_group', 'criteria': {'analysisStatus': ['I']}})) self.assertEqual(response.status_code, 200) updated_analysis_group_response = response.json() From 3a7fe4479ebbc6f18c6ed8856853f95d0fc0f7cf Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 13 May 2024 11:22:16 -0400 Subject: [PATCH 116/736] add dynmic analysis group to django admin --- seqr/admin.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/seqr/admin.py b/seqr/admin.py index 0ec3b58c9d..2ac70cc8e6 100644 --- a/seqr/admin.py +++ b/seqr/admin.py @@ -3,13 +3,13 @@ from matchmaker.models import MatchmakerSubmission, MatchmakerIncomingQuery, MatchmakerResult, MatchmakerContactNotes from seqr.models import Project, Family, Individual, Sample, LocusList, LocusListGene, LocusListInterval, VariantNote, \ VariantTag, VariantTagType, VariantFunctionalData, SavedVariant, GeneNote, AnalysisGroup, ProjectCategory, \ - FamilyAnalysedBy, VariantSearch, VariantSearchResults, IgvSample, UserPolicy, WarningMessage, FamilyNote + FamilyAnalysedBy, VariantSearch, VariantSearchResults, IgvSample, UserPolicy, WarningMessage, FamilyNote, DynamicAnalysisGroup for model_class in [ Project, Family, Individual, Sample, IgvSample, LocusList, LocusListGene, LocusListInterval, VariantNote, VariantTag, VariantTagType, VariantFunctionalData, SavedVariant, GeneNote, AnalysisGroup, ProjectCategory, FamilyAnalysedBy, VariantSearch, VariantSearchResults, MatchmakerSubmission, MatchmakerIncomingQuery, MatchmakerResult, - MatchmakerContactNotes, FamilyNote, + MatchmakerContactNotes, FamilyNote, DynamicAnalysisGroup, ]: @admin.register(model_class) From 21ba5df95818bd9bb277d9b8d9fa80d576c5440f Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 13 May 2024 14:08:59 -0400 Subject: [PATCH 117/736] clearer constant name and comment --- ui/pages/Project/reducers.js | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ui/pages/Project/reducers.js b/ui/pages/Project/reducers.js index 0a54ca0569..4bba0918ca 100644 --- a/ui/pages/Project/reducers.js +++ b/ui/pages/Project/reducers.js @@ -327,6 +327,8 @@ export const loadRnaSeqData = individualGuid => (dispatch, getState) => { } } +const MAX_EXPECTED_PHENOTYPE_PRIORITY_RANK = 10 + export const loadPhenotypeGeneScores = individualGuid => (dispatch, getState) => { const state = getState() const { familyGuid } = state.individualsByGuid[individualGuid] @@ -339,7 +341,8 @@ export const loadPhenotypeGeneScores = individualGuid => (dispatch, getState) => ), {}, ) // Data can be loaded for only a subset of genes if previously loaded variant information - if (!Object.values(loadedToolCounts).some(val => val >= 10)) { + // The top 10 genes are expected to be loaded per tool, so load data if fewer than that are available + if (!Object.values(loadedToolCounts).some(val => val >= MAX_EXPECTED_PHENOTYPE_PRIORITY_RANK)) { dispatch({ type: REQUEST_PHENOTYPE_GENE_SCORES }) new HttpRequestHelper(`/api/family/${familyGuid}/phenotype_gene_scores`, (responseJson) => { From 85208de68a0343a2e43e96af9972dc53ef497a71 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Tue, 14 May 2024 11:59:31 -0400 Subject: [PATCH 118/736] update changelog --- CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 101bad16f2..06dc6927d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,9 +2,11 @@ ## dev +## 5/14/24 +* Adds guid and created fields to PhenotypePrioritization model (REQUIRES DB MIGRATION) + ## 5/8/24 * Adds dynamic analysis groups (REQUIRES DB MIGRATION) -* Adds guid and created fields to PhenotypePrioritization model (REQUIRES DB MIGRATION) ## 4/4/24 * Add ability to import project metadata from gregor metadata From 43afc1dd726edb54d27f7150c105fcfc286a9b30 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 15 May 2024 11:36:46 -0400 Subject: [PATCH 119/736] do not show rna page link for inactive samples --- seqr/views/apis/family_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seqr/views/apis/family_api.py b/seqr/views/apis/family_api.py index 028791da7f..a40683b0f8 100644 --- a/seqr/views/apis/family_api.py +++ b/seqr/views/apis/family_api.py @@ -75,7 +75,7 @@ def family_page_data(request, family_guid): 'postDiscoveryOmimOptions': omim_map, }) - outlier_individual_guids = sample_models.filter(sample_type=Sample.SAMPLE_TYPE_RNA)\ + outlier_individual_guids = sample_models.filter(sample_type=Sample.SAMPLE_TYPE_RNA, is_active=True)\ .exclude(rnaseqoutlier__isnull=True, rnaseqspliceoutlier__isnull=True).values_list('individual__guid', flat=True) for individual_guid in outlier_individual_guids: response['individualsByGuid'][individual_guid]['hasRnaOutlierData'] = True From 86e24bab559ebf8af4b32c87071081a990f3be55 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Wed, 15 May 2024 11:50:46 -0400 Subject: [PATCH 120/736] Update CHANGELOG.md --- CHANGELOG.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 06dc6927d7..a07e577172 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,6 @@ # _seqr_ Changes ## dev - -## 5/14/24 * Adds guid and created fields to PhenotypePrioritization model (REQUIRES DB MIGRATION) ## 5/8/24 From c46403aaed3b9bd857a4fe5bff9db67b15104adf Mon Sep 17 00:00:00 2001 From: hanars Date: Wed, 15 May 2024 11:56:42 -0400 Subject: [PATCH 121/736] Revert "Make metadata reports available to local users" --- seqr/urls.py | 8 +- seqr/views/apis/report_api.py | 128 ++++++++ seqr/views/apis/report_api_tests.py | 258 +++++++++++++++- seqr/views/apis/summary_data_api.py | 124 +------- seqr/views/apis/summary_data_api_tests.py | 281 +----------------- ui/pages/Report/Report.jsx | 4 + .../components/FamilyMetadata.jsx | 8 +- .../components/VariantMetadata.jsx | 11 +- ui/pages/SummaryData/SummaryData.jsx | 4 - .../components/IndividualMetadata.jsx | 37 ++- .../components/table}/LoadReportTable.jsx | 46 +-- 11 files changed, 453 insertions(+), 456 deletions(-) rename ui/pages/{SummaryData => Report}/components/FamilyMetadata.jsx (80%) rename ui/pages/{SummaryData => Report}/components/VariantMetadata.jsx (77%) rename ui/{pages/SummaryData/components => shared/components/table}/LoadReportTable.jsx (58%) diff --git a/seqr/urls.py b/seqr/urls.py index 8882ca41ba..82e6b91763 100644 --- a/seqr/urls.py +++ b/seqr/urls.py @@ -124,10 +124,12 @@ validate_callset, get_loaded_projects, load_data from seqr.views.apis.report_api import \ anvil_export, \ + family_metadata, \ + variant_metadata, \ gregor_export, \ seqr_stats from seqr.views.apis.summary_data_api import success_story, saved_variants_page, mme_details, hpo_summary_data, \ - bulk_update_family_external_analysis, individual_metadata, family_metadata, variant_metadata + bulk_update_family_external_analysis, individual_metadata from seqr.views.apis.superuser_api import get_all_users from seqr.views.apis.awesomebar_api import awesomebar_autocomplete_handler @@ -318,6 +320,8 @@ 'upload_temp_file': save_temp_file, 'report/anvil/(?P[^/]+)': anvil_export, + 'report/family_metadata/(?P[^/]+)': family_metadata, + 'report/variant_metadata/(?P[^/]+)': variant_metadata, 'report/gregor': gregor_export, 'report/seqr_stats': seqr_stats, @@ -340,8 +344,6 @@ 'summary_data/matchmaker': mme_details, 'summary_data/update_external_analysis': bulk_update_family_external_analysis, 'summary_data/individual_metadata/(?P[^/]+)': individual_metadata, - 'summary_data/family_metadata/(?P[^/]+)': family_metadata, - 'summary_data/variant_metadata/(?P[^/]+)': variant_metadata, 'create_project_from_workspace/(?P[^/]+)/(?P[^/]+)/grant_access': grant_workspace_access, 'create_project_from_workspace/(?P[^/]+)/(?P[^/]+)/validate_vcf': validate_anvil_vcf, diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 6155c4d1d1..8b05c31863 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -20,6 +20,7 @@ from seqr.views.utils.permissions_utils import analyst_required, get_project_and_check_permissions, \ get_project_guids_user_can_view, get_internal_projects from seqr.views.utils.terra_api_utils import anvil_enabled +from seqr.views.utils.variant_utils import DISCOVERY_CATEGORY from seqr.models import Project, Family, Sample, Individual from settings import GREGOR_DATA_MODEL_URL @@ -790,3 +791,130 @@ def _validate_column_data(column, file_name, data, column_validator, warnings, e def _get_row_id(row): id_col = next(col for col in ['genetic_findings_id', 'participant_id', 'experiment_sample_id', 'family_id'] if col in row) return row[id_col] + + +@analyst_required +def family_metadata(request, project_guid): + projects = _get_metadata_projects(project_guid, request.user) + + families_by_id = {} + family_individuals = defaultdict(dict) + + def _add_row(row, family_id, row_type): + if row_type == FAMILY_ROW_TYPE: + families_by_id[family_id] = row + elif row_type == SUBJECT_ROW_TYPE: + family_individuals[family_id][row['participant_id']] = row + elif row_type == SAMPLE_ROW_TYPE: + family_individuals[family_id][row['participant_id']].update(row) + elif row_type == DISCOVERY_ROW_TYPE: + family = families_by_id[family_id] + if 'inheritance_models' not in family: + family.update({'genes': set(), 'inheritance_models': set()}) + family['genes'].update({v.get('gene') or v.get('sv_name') or v.get('gene_id') or '' for v in row}) + family['inheritance_models'].update({v['variant_inheritance'] for v in row}) + + parse_anvil_metadata( + projects, user=request.user, add_row=_add_row, omit_airtable=True, include_metadata=True, include_no_individual_families=True) + + for family_id, f in families_by_id.items(): + individuals_by_id = family_individuals[family_id] + proband = next((i for i in individuals_by_id.values() if i['proband_relationship'] == 'Self'), None) + individuals_ids = set(individuals_by_id.keys()) + known_ids = {} + if proband: + known_ids = { + 'proband_id': proband['participant_id'], + 'paternal_id': proband['paternal_id'], + 'maternal_id': proband['maternal_id'], + } + f.update(known_ids) + individuals_ids -= set(known_ids.values()) + + sorted_samples = sorted(individuals_by_id.values(), key=lambda x: x.get('date_data_generation', '')) + earliest_sample = next((s for s in [proband or {}] + sorted_samples if s.get('date_data_generation')), {}) + + inheritance_models = f.pop('inheritance_models', []) + f.update({ + 'individual_count': len(individuals_by_id), + 'other_individual_ids': '; '.join(sorted(individuals_ids)), + 'family_structure': _get_family_structure(len(individuals_by_id), sum(1 for id in known_ids.values() if id)), + 'data_type': earliest_sample.get('data_type'), + 'date_data_generation': earliest_sample.get('date_data_generation'), + 'genes': '; '.join(sorted(f.get('genes', []))), + 'actual_inheritance': 'unknown' if inheritance_models == {'unknown'} else ';'.join( + sorted([i for i in inheritance_models if i != 'unknown'])), + }) + + return create_json_response({'rows': list(families_by_id.values())}) + + +def _get_metadata_projects(project_guid, user): + if project_guid == 'all': + return get_internal_projects() + if project_guid == GREGOR_CATEGORY.lower(): + return Project.objects.filter(projectcategory__name=GREGOR_CATEGORY) + return [get_project_and_check_permissions(project_guid, user)] + + +FAMILY_STRUCTURES = { + 1: 'singleton', + 2: 'duo', + 3: 'trio', + 4: 'quad', +} + + +def _get_family_structure(num_individuals, num_known_individuals): + if (num_individuals and num_known_individuals == num_individuals) or ( + num_known_individuals in {0, 3} and num_individuals == num_known_individuals + 1): + return FAMILY_STRUCTURES[num_individuals] + return 'other' + + +@analyst_required +def variant_metadata(request, project_guid): + projects = _get_metadata_projects(project_guid, request.user) + + individuals = Individual.objects.filter( + family__project__in=projects, family__savedvariant__varianttag__variant_tag_type__category=DISCOVERY_CATEGORY, + ).distinct().annotate( + data_types=ArrayAgg('sample__sample_type', distinct=True, filter=Q(sample__isnull=False)) + ) + + families_by_id = {} + participant_mme = {} + variant_rows = [] + + def _add_row(row, family_id, row_type): + if row_type == FAMILY_ROW_TYPE: + families_by_id[family_id] = row + elif row_type == SUBJECT_ROW_TYPE: + participant_mme[row['participant_id']] = row.get('MME', {}) + elif row_type == DISCOVERY_ROW_TYPE: + family = families_by_id[family_id] + for variant in row: + del variant['gene_ids'] + variant_rows.append({ + 'MME': variant.pop('variantId') in participant_mme[variant['participant_id']].get('variant_ids', []), + 'phenotype_contribution': 'Full', + **family, + **variant, + }) + + parse_anvil_metadata( + projects, + user=request.user, + individual_samples={i: None for i in individuals}, + individual_data_types={i.individual_id: i.data_types for i in individuals}, + add_row=_add_row, + variant_json_fields=['clinvar', 'variantId'], + mme_values={'variant_ids': ArrayAgg('matchmakersubmissiongenes__saved_variant__saved_variant_json__variantId')}, + include_metadata=True, + include_mondo=True, + omit_airtable=True, + proband_only_variants=True, + include_parent_mnvs=True, + ) + + return create_json_response({'rows': variant_rows}) diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index 2320228029..b840029e1e 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -5,7 +5,7 @@ from settings import AIRTABLE_URL from seqr.models import Project, SavedVariant -from seqr.views.apis.report_api import seqr_stats, anvil_export, gregor_export +from seqr.views.apis.report_api import seqr_stats, anvil_export, gregor_export, family_metadata, variant_metadata from seqr.views.utils.test_utils import AuthenticationTestCase, AnvilAuthenticationTestCase, AirtableTest @@ -486,6 +486,31 @@ ] + INVALID_TABLES } +BASE_VARIANT_METADATA_ROW = { + 'MME': False, + 'additional_family_members_with_variant': '', + 'allele_balance_or_heteroplasmy_percentage': None, + 'analysisStatus': 'Q', + 'analysis_groups': '', + 'clinvar': None, + 'condition_id': None, + 'consanguinity': 'Unknown', + 'end': None, + 'hgvsc': '', + 'hgvsp': '', + 'method_of_discovery': 'SR-ES', + 'notes': None, + 'phenotype_contribution': 'Full', + 'phenotype_description': None, + 'pmid_id': None, + 'seqr_chosen_consequence': None, + 'solve_status': 'Unsolved', + 'svName': None, + 'svType': None, + 'sv_name': None, + 'transcript': None, +} + PARTICIPANT_TABLE = [ [ 'participant_id', 'internal_project_id', 'gregor_center', 'consent_code', 'recontactable', 'prior_testing', @@ -1074,9 +1099,238 @@ def _test_expected_gregor_airtable_calls(self, additional_samples=None, addition self.assertEqual(responses.calls[len(mondo_ids) + 3].request.url, MOCK_DATA_MODEL_URL) + def test_family_metadata(self): + url = reverse(family_metadata, args=['R0003_test']) + self.check_analyst_login(url) + + response = self.client.get(url) + self.assertEqual(response.status_code, 200) + response_json = response.json() + self.assertListEqual(list(response_json.keys()), ['rows']) + self.assertListEqual(sorted([r['familyGuid'] for r in response_json['rows']]), ['F000011_11', 'F000012_12']) + test_row = next(r for r in response_json['rows'] if r['familyGuid'] == 'F000012_12') + self.assertDictEqual(test_row, { + 'projectGuid': 'R0003_test', + 'internal_project_id': 'Test Reprocessed Project', + 'familyGuid': 'F000012_12', + 'family_id': '12', + 'displayName': '12', + 'solve_status': 'Unsolved', + 'actual_inheritance': 'unknown', + 'date_data_generation': '2017-02-05', + 'data_type': 'WES', + 'proband_id': 'NA20889', + 'maternal_id': '', + 'paternal_id': '', + 'other_individual_ids': 'NA20870; NA20888', + 'individual_count': 3, + 'family_structure': 'other', + 'family_history': 'Yes', + 'genes': 'DEL:chr1:249045487-249045898; OR4G11P', + 'pmid_id': None, + 'phenotype_description': None, + 'analysisStatus': 'Q', + 'analysis_groups': '', + 'consanguinity': 'Unknown', + }) + + # Test all projects + all_projects_url = reverse(family_metadata, args=['all']) + response = self.client.get(all_projects_url) + self.assertEqual(response.status_code, 200) + response_json = response.json() + self.assertListEqual(list(response_json.keys()), ['rows']) + self.assertListEqual(sorted([r['familyGuid'] for r in response_json['rows']]), [ + 'F000001_1', 'F000002_2', 'F000003_3', 'F000004_4', 'F000005_5', 'F000006_6', 'F000007_7', 'F000008_8', + 'F000009_9', 'F000010_10', 'F000011_11', 'F000012_12', 'F000013_13'] + self.ADDITIONAL_FAMILIES) + test_row = next(r for r in response_json['rows'] if r['familyGuid'] == 'F000003_3') + self.assertDictEqual(test_row, { + 'projectGuid': 'R0001_1kg', + 'internal_project_id': '1kg project nÃ¥me with uniçøde', + 'familyGuid': 'F000003_3', + 'family_id': '3', + 'displayName': '3', + 'solve_status': 'Unsolved', + 'actual_inheritance': '', + 'date_data_generation': '2017-02-05', + 'data_type': 'WES', + 'other_individual_ids': 'NA20870', + 'individual_count': 1, + 'family_structure': 'singleton', + 'genes': '', + 'pmid_id': None, + 'phenotype_description': None, + 'analysisStatus': 'Q', + 'analysis_groups': 'Accepted; Test Group 1', + 'consanguinity': 'Unknown', + 'condition_id': 'OMIM:615123', + 'known_condition_name': '', + 'condition_inheritance': 'Unknown', + }) + + # Test empty project + empty_project_url = reverse(family_metadata, args=['R0002_empty']) + response = self.client.get(empty_project_url) + self.assertEqual(response.status_code, 200) + self.assertDictEqual(response.json(), {'rows': []}) + + def test_variant_metadata(self): + url = reverse(variant_metadata, args=[PROJECT_GUID]) + self.check_analyst_login(url) + + response = self.client.get(url) + self.assertEqual(response.status_code, 200) + response_json = response.json() + self.assertListEqual(list(response_json.keys()), ['rows']) + row_ids = ['NA19675_1_21_3343353', 'HG00731_1_248367227', 'HG00731_19_1912634', 'HG00731_19_1912633', 'HG00731_19_1912632'] + self.assertListEqual([r['genetic_findings_id'] for r in response_json['rows']], row_ids) + expected_row = { + **BASE_VARIANT_METADATA_ROW, + 'additional_family_members_with_variant': 'HG00732', + 'alt': 'T', + 'chrom': '1', + 'clinvar': {'alleleId': None, 'clinicalSignificance': '', 'goldStars': None, 'variationId': None}, + 'condition_id': 'MONDO:0044970', + 'condition_inheritance': None, + 'displayName': '2', + 'familyGuid': 'F000002_2', + 'family_id': '2', + 'gene': 'RP11', + 'gene_id': 'ENSG00000135953', + 'gene_known_for_phenotype': 'Known', + 'genetic_findings_id': 'HG00731_1_248367227', + 'known_condition_name': 'mitochondrial disease', + 'participant_id': 'HG00731', + 'phenotype_contribution': 'Full', + 'phenotype_description': 'microcephaly; seizures', + 'pos': 248367227, + 'projectGuid': 'R0001_1kg', + 'internal_project_id': '1kg project nÃ¥me with uniçøde', + 'ref': 'TC', + 'tags': ['Known gene for phenotype'], + 'variant_inheritance': 'paternal', + 'variant_reference_assembly': 'GRCh37', + 'zygosity': 'Homozygous', + } + self.assertDictEqual(response_json['rows'][1], expected_row) + expected_mnv = { + **BASE_VARIANT_METADATA_ROW, + 'alt': 'T', + 'chrom': '19', + 'condition_id': 'MONDO:0044970', + 'condition_inheritance': None, + 'displayName': '2', + 'end': 1912634, + 'familyGuid': 'F000002_2', + 'family_id': '2', + 'gene': 'OR4G11P', + 'gene_id': 'ENSG00000240361', + 'gene_known_for_phenotype': 'Known', + 'genetic_findings_id': 'HG00731_19_1912634', + 'known_condition_name': 'mitochondrial disease', + 'notes': 'The following variants are part of the multinucleotide variant 19-1912632-GC-TT (c.586_587delinsTT, p.Ala196Leu): 19-1912633-G-T, 19-1912634-C-T', + 'participant_id': 'HG00731', + 'phenotype_description': 'microcephaly; seizures', + 'pos': 1912634, + 'projectGuid': 'R0001_1kg', + 'internal_project_id': '1kg project nÃ¥me with uniçøde', + 'ref': 'C', + 'tags': ['Known gene for phenotype'], + 'transcript': 'ENST00000371839', + 'variant_inheritance': 'unknown', + 'variant_reference_assembly': 'GRCh38', + 'zygosity': 'Heterozygous', + } + self.assertDictEqual(response_json['rows'][2], expected_mnv) + + # Test gregor projects + gregor_projects_url = reverse(variant_metadata, args=['gregor']) + response = self.client.get(gregor_projects_url) + self.assertEqual(response.status_code, 200) + response_json = response.json() + self.assertListEqual(list(response_json.keys()), ['rows']) + row_ids += ['NA20889_1_248367227', 'NA20889_1_249045487'] + self.assertListEqual([r['genetic_findings_id'] for r in response_json['rows']], row_ids) + self.assertDictEqual(response_json['rows'][1], expected_row) + self.assertDictEqual(response_json['rows'][2], expected_mnv) + self.assertDictEqual(response_json['rows'][5], { + **BASE_VARIANT_METADATA_ROW, + 'MME': True, + 'alt': 'T', + 'chrom': '1', + 'clinvar': {'alleleId': None, 'clinicalSignificance': '', 'goldStars': None, 'variationId': None}, + 'condition_id': 'MONDO:0008788', + 'displayName': '12', + 'familyGuid': 'F000012_12', + 'family_id': '12', + 'family_history': 'Yes', + 'gene': 'OR4G11P', + 'gene_id': 'ENSG00000240361', + 'gene_known_for_phenotype': 'Candidate', + 'genetic_findings_id': 'NA20889_1_248367227', + 'hgvsc': 'c.3955G>A', + 'hgvsp': 'c.1586-17C>G', + 'participant_id': 'NA20889', + 'pos': 248367227, + 'projectGuid': 'R0003_test', + 'internal_project_id': 'Test Reprocessed Project', + 'ref': 'TC', + 'seqr_chosen_consequence': 'intron_variant', + 'tags': ['Tier 1 - Novel gene and phenotype'], + 'transcript': 'ENST00000505820', + 'variant_inheritance': 'unknown', + 'variant_reference_assembly': 'GRCh37', + 'zygosity': 'Heterozygous', + }) + self.assertDictEqual(response_json['rows'][6], { + **BASE_VARIANT_METADATA_ROW, + 'alt': None, + 'chrom': '1', + 'condition_id': 'MONDO:0008788', + 'displayName': '12', + 'end': 249045898, + 'familyGuid': 'F000012_12', + 'family_id': '12', + 'family_history': 'Yes', + 'gene': None, + 'gene_id': None, + 'gene_known_for_phenotype': 'Candidate', + 'genetic_findings_id': 'NA20889_1_249045487', + 'participant_id': 'NA20889', + 'pos': 249045487, + 'projectGuid': 'R0003_test', + 'internal_project_id': 'Test Reprocessed Project', + 'ref': None, + 'svType': 'DEL', + 'sv_name': 'DEL:chr1:249045487-249045898', + 'tags': ['Tier 1 - Novel gene and phenotype'], + 'variant_inheritance': 'unknown', + 'variant_reference_assembly': 'GRCh37', + 'zygosity': 'Heterozygous', + }) + + # Test all projects + all_projects_url = reverse(variant_metadata, args=['all']) + response = self.client.get(all_projects_url) + self.assertEqual(response.status_code, 200) + response_json = response.json() + self.assertListEqual(list(response_json.keys()), ['rows']) + row_ids += self.ADDITIONAL_FINDINGS + self.assertListEqual([r['genetic_findings_id'] for r in response_json['rows']], row_ids) + self.assertDictEqual(response_json['rows'][1], expected_row) + self.assertDictEqual(response_json['rows'][2], expected_mnv) + + # Test empty project + empty_project_url = reverse(family_metadata, args=['R0002_empty']) + response = self.client.get(empty_project_url) + self.assertEqual(response.status_code, 200) + self.assertDictEqual(response.json(), {'rows': []}) + class LocalReportAPITest(AuthenticationTestCase, ReportAPITest): fixtures = ['users', '1kg_project', 'reference_data', 'report_variants'] + ADDITIONAL_FAMILIES = ['F000014_14'] + ADDITIONAL_FINDINGS = ['NA21234_1_248367227'] STATS_DATA = { 'projectsCount': {'non_demo': 3, 'demo': 1}, 'familiesCount': {'non_demo': 12, 'demo': 2}, @@ -1093,6 +1347,8 @@ class LocalReportAPITest(AuthenticationTestCase, ReportAPITest): class AnvilReportAPITest(AnvilAuthenticationTestCase, ReportAPITest): fixtures = ['users', 'social_auth', '1kg_project', 'reference_data', 'report_variants'] + ADDITIONAL_FAMILIES = [] + ADDITIONAL_FINDINGS = [] STATS_DATA = { 'projectsCount': {'internal': 1, 'external': 1, 'no_anvil': 1, 'demo': 1}, 'familiesCount': {'internal': 11, 'external': 1, 'no_anvil': 0, 'demo': 2}, diff --git a/seqr/views/apis/summary_data_api.py b/seqr/views/apis/summary_data_api.py index 4af43c1bee..8fb27bf768 100644 --- a/seqr/views/apis/summary_data_api.py +++ b/seqr/views/apis/summary_data_api.py @@ -2,8 +2,7 @@ from datetime import datetime from django.core.exceptions import PermissionDenied from django.contrib.auth.models import User -from django.contrib.postgres.aggregates import ArrayAgg -from django.db.models import CharField, F, Q, Value +from django.db.models import CharField, F, Value from django.db.models.functions import Coalesce, Concat, JSONObject, NullIf import json from random import randint @@ -24,7 +23,7 @@ add_individual_hpo_details, INDIVIDUAL_DISPLAY_NAME_EXPR, AIP_TAG_TYPE from seqr.views.utils.permissions_utils import analyst_required, user_is_analyst, get_project_guids_user_can_view, \ login_and_policies_required, get_project_and_check_permissions, get_internal_projects -from seqr.views.utils.anvil_metadata_utils import parse_anvil_metadata, FAMILY_ROW_TYPE, SUBJECT_ROW_TYPE, SAMPLE_ROW_TYPE, DISCOVERY_ROW_TYPE +from seqr.views.utils.anvil_metadata_utils import parse_anvil_metadata, FAMILY_ROW_TYPE, SUBJECT_ROW_TYPE, DISCOVERY_ROW_TYPE from seqr.views.utils.variant_utils import get_variants_response, bulk_create_tagged_variants, DISCOVERY_CATEGORY from settings import SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL @@ -349,122 +348,3 @@ def _get_airtable_collaborator_names(user, collaborator_ids): collaborator_id: collaborator_map.get(collaborator_id, {}).get('CollaboratorID') for collaborator_id in collaborator_ids } - - -@login_and_policies_required -def family_metadata(request, project_guid): - projects, _ = _get_metadata_projects(request, project_guid) - - families_by_id = {} - family_individuals = defaultdict(dict) - - def _add_row(row, family_id, row_type): - if row_type == FAMILY_ROW_TYPE: - families_by_id[family_id] = row - elif row_type == SUBJECT_ROW_TYPE: - family_individuals[family_id][row['participant_id']] = row - elif row_type == SAMPLE_ROW_TYPE: - family_individuals[family_id][row['participant_id']].update(row) - elif row_type == DISCOVERY_ROW_TYPE: - family = families_by_id[family_id] - if 'inheritance_models' not in family: - family.update({'genes': set(), 'inheritance_models': set()}) - family['genes'].update({v.get('gene') or v.get('sv_name') or v.get('gene_id') or '' for v in row}) - family['inheritance_models'].update({v['variant_inheritance'] for v in row}) - - parse_anvil_metadata( - projects, user=request.user, add_row=_add_row, omit_airtable=True, include_metadata=True, include_no_individual_families=True) - - for family_id, f in families_by_id.items(): - individuals_by_id = family_individuals[family_id] - proband = next((i for i in individuals_by_id.values() if i['proband_relationship'] == 'Self'), None) - individuals_ids = set(individuals_by_id.keys()) - known_ids = {} - if proband: - known_ids = { - 'proband_id': proband['participant_id'], - 'paternal_id': proband['paternal_id'], - 'maternal_id': proband['maternal_id'], - } - f.update(known_ids) - individuals_ids -= set(known_ids.values()) - - sorted_samples = sorted(individuals_by_id.values(), key=lambda x: x.get('date_data_generation', '')) - earliest_sample = next((s for s in [proband or {}] + sorted_samples if s.get('date_data_generation')), {}) - - inheritance_models = f.pop('inheritance_models', []) - f.update({ - 'individual_count': len(individuals_by_id), - 'other_individual_ids': '; '.join(sorted(individuals_ids)), - 'family_structure': _get_family_structure(len(individuals_by_id), sum(1 for id in known_ids.values() if id)), - 'data_type': earliest_sample.get('data_type'), - 'date_data_generation': earliest_sample.get('date_data_generation'), - 'genes': '; '.join(sorted(f.get('genes', []))), - 'actual_inheritance': 'unknown' if inheritance_models == {'unknown'} else ';'.join( - sorted([i for i in inheritance_models if i != 'unknown'])), - }) - - return create_json_response({'rows': list(families_by_id.values())}) - - -FAMILY_STRUCTURES = { - 1: 'singleton', - 2: 'duo', - 3: 'trio', - 4: 'quad', -} - - -def _get_family_structure(num_individuals, num_known_individuals): - if (num_individuals and num_known_individuals == num_individuals) or ( - num_known_individuals in {0, 3} and num_individuals == num_known_individuals + 1): - return FAMILY_STRUCTURES[num_individuals] - return 'other' - - -@login_and_policies_required -def variant_metadata(request, project_guid): - projects, _ = _get_metadata_projects(request, project_guid) - - individuals = Individual.objects.filter( - family__project__in=projects, family__savedvariant__varianttag__variant_tag_type__category=DISCOVERY_CATEGORY, - ).distinct().annotate( - data_types=ArrayAgg('sample__sample_type', distinct=True, filter=Q(sample__isnull=False)) - ) - - families_by_id = {} - participant_mme = {} - variant_rows = [] - - def _add_row(row, family_id, row_type): - if row_type == FAMILY_ROW_TYPE: - families_by_id[family_id] = row - elif row_type == SUBJECT_ROW_TYPE: - participant_mme[row['participant_id']] = row.get('MME', {}) - elif row_type == DISCOVERY_ROW_TYPE: - family = families_by_id[family_id] - for variant in row: - del variant['gene_ids'] - variant_rows.append({ - 'MME': variant.pop('variantId') in participant_mme[variant['participant_id']].get('variant_ids', []), - 'phenotype_contribution': 'Full', - **family, - **variant, - }) - - parse_anvil_metadata( - projects, - user=request.user, - individual_samples={i: None for i in individuals}, - individual_data_types={i.individual_id: i.data_types for i in individuals}, - add_row=_add_row, - variant_json_fields=['clinvar', 'variantId'], - mme_values={'variant_ids': ArrayAgg('matchmakersubmissiongenes__saved_variant__saved_variant_json__variantId')}, - include_metadata=True, - include_mondo=True, - omit_airtable=True, - proband_only_variants=True, - include_parent_mnvs=True, - ) - - return create_json_response({'rows': variant_rows}) diff --git a/seqr/views/apis/summary_data_api_tests.py b/seqr/views/apis/summary_data_api_tests.py index 453299a757..6a4a040932 100644 --- a/seqr/views/apis/summary_data_api_tests.py +++ b/seqr/views/apis/summary_data_api_tests.py @@ -6,7 +6,7 @@ import responses from seqr.views.apis.summary_data_api import mme_details, success_story, saved_variants_page, hpo_summary_data, \ - bulk_update_family_external_analysis, individual_metadata, family_metadata, variant_metadata + bulk_update_family_external_analysis, individual_metadata from seqr.views.utils.test_utils import AuthenticationTestCase, AnvilAuthenticationTestCase, AirtableTest, PARSED_VARIANTS from seqr.models import FamilyAnalysedBy, SavedVariant, VariantTag from settings import AIRTABLE_URL @@ -250,32 +250,6 @@ } -BASE_VARIANT_METADATA_ROW = { - 'MME': False, - 'additional_family_members_with_variant': '', - 'allele_balance_or_heteroplasmy_percentage': None, - 'analysisStatus': 'Q', - 'analysis_groups': '', - 'clinvar': None, - 'condition_id': None, - 'consanguinity': 'Unknown', - 'end': None, - 'hgvsc': '', - 'hgvsp': '', - 'method_of_discovery': 'SR-ES', - 'notes': None, - 'phenotype_contribution': 'Full', - 'phenotype_description': None, - 'pmid_id': None, - 'seqr_chosen_consequence': None, - 'solve_status': 'Unsolved', - 'svName': None, - 'svType': None, - 'sv_name': None, - 'transcript': None, -} - - @mock.patch('seqr.views.utils.permissions_utils.safe_redis_get_json', lambda *args: None) class SummaryDataAPITest(AirtableTest): @@ -374,10 +348,6 @@ def test_saved_variants_page(self): all_tag_url = reverse(saved_variants_page, args=['ALL']) response = self.client.get('{}?gene=ENSG00000135953'.format(all_tag_url)) self.assertEqual(response.status_code, 200) - report_variant_guids = { - 'SV0027168_191912632_r0384_rare', 'SV0027167_191912633_r0384_rare', 'SV0027166_191912634_r0384_rare', - } - expected_variant_guids.update(report_variant_guids) expected_variant_guids.add('SV0000002_1248367227_r0390_100') self.assertSetEqual(set(response.json()['savedVariantsByGuid'].keys()), expected_variant_guids) @@ -396,7 +366,7 @@ def test_saved_variants_page(self): self.assertEqual(response.status_code, 200) self.assertSetEqual(set(response.json()['savedVariantsByGuid'].keys()), { 'SV0000001_2103343353_r0390_100', 'SV0000002_1248367227_r0390_100', 'SV0000007_prefix_19107_DEL_r00', - 'SV0000006_1248367227_r0003_tes', *report_variant_guids, + 'SV0000006_1248367227_r0003_tes', }) multi_discovery_tag_url = reverse(saved_variants_page, args=['CMG Discovery Tags;Review']) @@ -731,253 +701,12 @@ def test_sample_metadata_export(self, mock_google_authenticated): response = self.client.get(f'{gregor_projects_url}?includeAirtable=true') self._has_expected_metadata_response(response, multi_project_individuals, has_airtable=True, has_duplicate=True) - def test_family_metadata(self): - url = reverse(family_metadata, args=['R0003_test']) - self.check_collaborator_login(url) - - response = self.client.get(url) - self.assertEqual(response.status_code, 200) - response_json = response.json() - self.assertListEqual(list(response_json.keys()), ['rows']) - self.assertListEqual(sorted([r['familyGuid'] for r in response_json['rows']]), ['F000011_11', 'F000012_12']) - test_row = next(r for r in response_json['rows'] if r['familyGuid'] == 'F000012_12') - self.assertDictEqual(test_row, { - 'projectGuid': 'R0003_test', - 'internal_project_id': 'Test Reprocessed Project', - 'familyGuid': 'F000012_12', - 'family_id': '12', - 'displayName': '12', - 'solve_status': 'Unsolved', - 'actual_inheritance': 'unknown', - 'date_data_generation': '2017-02-05', - 'data_type': 'WES', - 'proband_id': 'NA20889', - 'maternal_id': '', - 'paternal_id': '', - 'other_individual_ids': 'NA20870; NA20888', - 'individual_count': 3, - 'family_structure': 'other', - 'family_history': 'Yes', - 'genes': 'DEL:chr1:249045487-249045898; OR4G11P', - 'pmid_id': None, - 'phenotype_description': None, - 'analysisStatus': 'Q', - 'analysis_groups': '', - 'consanguinity': 'Unknown', - }) - - # Test all projects - all_projects_url = reverse(family_metadata, args=['all']) - response = self.client.get(all_projects_url) - self.assertEqual(response.status_code, 200) - response_json = response.json() - self.assertListEqual(list(response_json.keys()), ['rows']) - all_project_families = [ - 'F000001_1', 'F000002_2', 'F000003_3', 'F000004_4', 'F000005_5', 'F000006_6', 'F000007_7', 'F000008_8', - 'F000009_9', 'F000010_10', 'F000011_11', 'F000012_12', 'F000013_13'] - self.assertListEqual(sorted([r['familyGuid'] for r in response_json['rows']]), all_project_families) - test_row = next(r for r in response_json['rows'] if r['familyGuid'] == 'F000003_3') - self.assertDictEqual(test_row, { - 'projectGuid': 'R0001_1kg', - 'internal_project_id': '1kg project nÃ¥me with uniçøde', - 'familyGuid': 'F000003_3', - 'family_id': '3', - 'displayName': '3', - 'solve_status': 'Unsolved', - 'actual_inheritance': '', - 'date_data_generation': '2017-02-05', - 'data_type': 'WES', - 'other_individual_ids': 'NA20870', - 'individual_count': 1, - 'family_structure': 'singleton', - 'genes': '', - 'pmid_id': None, - 'phenotype_description': None, - 'analysisStatus': 'Q', - 'analysis_groups': 'Accepted; Test Group 1', - 'consanguinity': 'Unknown', - 'condition_id': 'OMIM:615123', - 'known_condition_name': '', - 'condition_inheritance': 'Unknown', - }) - - # Test analyst access - self.login_analyst_user() - response = self.client.get(all_projects_url) - self.assertEqual(response.status_code, 200) - self.assertListEqual( - sorted([r['familyGuid'] for r in response.json()['rows']]), all_project_families + self.ADDITIONAL_FAMILIES) - - # Test empty project - empty_project_url = reverse(family_metadata, args=['R0002_empty']) - response = self.client.get(empty_project_url) - self.assertEqual(response.status_code, 200) - self.assertDictEqual(response.json(), {'rows': []}) - - def test_variant_metadata(self): - url = reverse(variant_metadata, args=[PROJECT_GUID]) - self.check_collaborator_login(url) - - response = self.client.get(url) - self.assertEqual(response.status_code, 200) - response_json = response.json() - self.assertListEqual(list(response_json.keys()), ['rows']) - row_ids = ['NA19675_1_21_3343353', 'HG00731_1_248367227', 'HG00731_19_1912634', 'HG00731_19_1912633', 'HG00731_19_1912632'] - self.assertListEqual([r['genetic_findings_id'] for r in response_json['rows']], row_ids) - expected_row = { - **BASE_VARIANT_METADATA_ROW, - 'additional_family_members_with_variant': 'HG00732', - 'alt': 'T', - 'chrom': '1', - 'clinvar': {'alleleId': None, 'clinicalSignificance': '', 'goldStars': None, 'variationId': None}, - 'condition_id': 'MONDO:0044970', - 'condition_inheritance': None, - 'displayName': '2', - 'familyGuid': 'F000002_2', - 'family_id': '2', - 'gene': 'RP11', - 'gene_id': 'ENSG00000135953', - 'gene_known_for_phenotype': 'Known', - 'genetic_findings_id': 'HG00731_1_248367227', - 'known_condition_name': 'mitochondrial disease', - 'participant_id': 'HG00731', - 'phenotype_contribution': 'Full', - 'phenotype_description': 'microcephaly; seizures', - 'pos': 248367227, - 'projectGuid': 'R0001_1kg', - 'internal_project_id': '1kg project nÃ¥me with uniçøde', - 'ref': 'TC', - 'tags': ['Known gene for phenotype'], - 'variant_inheritance': 'paternal', - 'variant_reference_assembly': 'GRCh37', - 'zygosity': 'Homozygous', - } - self.assertDictEqual(response_json['rows'][1], expected_row) - expected_mnv = { - **BASE_VARIANT_METADATA_ROW, - 'alt': 'T', - 'chrom': '19', - 'condition_id': 'MONDO:0044970', - 'condition_inheritance': None, - 'displayName': '2', - 'end': 1912634, - 'familyGuid': 'F000002_2', - 'family_id': '2', - 'gene': 'OR4G11P', - 'gene_id': 'ENSG00000240361', - 'gene_known_for_phenotype': 'Known', - 'genetic_findings_id': 'HG00731_19_1912634', - 'known_condition_name': 'mitochondrial disease', - 'notes': 'The following variants are part of the multinucleotide variant 19-1912632-GC-TT (c.586_587delinsTT, p.Ala196Leu): 19-1912633-G-T, 19-1912634-C-T', - 'participant_id': 'HG00731', - 'phenotype_description': 'microcephaly; seizures', - 'pos': 1912634, - 'projectGuid': 'R0001_1kg', - 'internal_project_id': '1kg project nÃ¥me with uniçøde', - 'ref': 'C', - 'tags': ['Known gene for phenotype'], - 'transcript': 'ENST00000371839', - 'variant_inheritance': 'unknown', - 'variant_reference_assembly': 'GRCh38', - 'zygosity': 'Heterozygous', - } - self.assertDictEqual(response_json['rows'][2], expected_mnv) - - # Test gregor projects - gregor_projects_url = reverse(variant_metadata, args=['gregor']) - response = self.client.get(gregor_projects_url) - self.assertEqual(response.status_code, 403) - - self.login_analyst_user() - response = self.client.get(gregor_projects_url) - self.assertEqual(response.status_code, 200) - response_json = response.json() - self.assertListEqual(list(response_json.keys()), ['rows']) - row_ids += ['NA20889_1_248367227', 'NA20889_1_249045487'] - self.assertListEqual([r['genetic_findings_id'] for r in response_json['rows']], row_ids) - self.assertDictEqual(response_json['rows'][1], expected_row) - self.assertDictEqual(response_json['rows'][2], expected_mnv) - self.assertDictEqual(response_json['rows'][5], { - **BASE_VARIANT_METADATA_ROW, - 'MME': True, - 'alt': 'T', - 'chrom': '1', - 'clinvar': {'alleleId': None, 'clinicalSignificance': '', 'goldStars': None, 'variationId': None}, - 'condition_id': 'MONDO:0008788', - 'displayName': '12', - 'familyGuid': 'F000012_12', - 'family_id': '12', - 'family_history': 'Yes', - 'gene': 'OR4G11P', - 'gene_id': 'ENSG00000240361', - 'gene_known_for_phenotype': 'Candidate', - 'genetic_findings_id': 'NA20889_1_248367227', - 'hgvsc': 'c.3955G>A', - 'hgvsp': 'c.1586-17C>G', - 'participant_id': 'NA20889', - 'pos': 248367227, - 'projectGuid': 'R0003_test', - 'internal_project_id': 'Test Reprocessed Project', - 'ref': 'TC', - 'seqr_chosen_consequence': 'intron_variant', - 'tags': ['Tier 1 - Novel gene and phenotype'], - 'transcript': 'ENST00000505820', - 'variant_inheritance': 'unknown', - 'variant_reference_assembly': 'GRCh37', - 'zygosity': 'Heterozygous', - }) - self.assertDictEqual(response_json['rows'][6], { - **BASE_VARIANT_METADATA_ROW, - 'alt': None, - 'chrom': '1', - 'condition_id': 'MONDO:0008788', - 'displayName': '12', - 'end': 249045898, - 'familyGuid': 'F000012_12', - 'family_id': '12', - 'family_history': 'Yes', - 'gene': None, - 'gene_id': None, - 'gene_known_for_phenotype': 'Candidate', - 'genetic_findings_id': 'NA20889_1_249045487', - 'participant_id': 'NA20889', - 'pos': 249045487, - 'projectGuid': 'R0003_test', - 'internal_project_id': 'Test Reprocessed Project', - 'ref': None, - 'svType': 'DEL', - 'sv_name': 'DEL:chr1:249045487-249045898', - 'tags': ['Tier 1 - Novel gene and phenotype'], - 'variant_inheritance': 'unknown', - 'variant_reference_assembly': 'GRCh37', - 'zygosity': 'Heterozygous', - }) - - # Test all projects - all_projects_url = reverse(variant_metadata, args=['all']) - response = self.client.get(all_projects_url) - self.assertEqual(response.status_code, 200) - response_json = response.json() - self.assertListEqual(list(response_json.keys()), ['rows']) - row_ids += self.ADDITIONAL_FINDINGS - self.assertListEqual([r['genetic_findings_id'] for r in response_json['rows']], row_ids) - self.assertDictEqual(response_json['rows'][1], expected_row) - self.assertDictEqual(response_json['rows'][2], expected_mnv) - - # Test empty project - empty_project_url = reverse(family_metadata, args=['R0002_empty']) - response = self.client.get(empty_project_url) - self.assertEqual(response.status_code, 200) - self.assertDictEqual(response.json(), {'rows': []}) - # Tests for AnVIL access disabled class LocalSummaryDataAPITest(AuthenticationTestCase, SummaryDataAPITest): - fixtures = ['users', '1kg_project', 'reference_data', 'report_variants'] + fixtures = ['users', '1kg_project', 'reference_data'] NUM_MANAGER_SUBMISSIONS = 4 ADDITIONAL_SAMPLES = ['NA21234', 'NA21987'] - ADDITIONAL_FAMILIES = ['F000014_14'] - ADDITIONAL_FINDINGS = ['NA21234_1_248367227'] def assert_has_expected_calls(self, users, skip_group_call_idxs=None): @@ -991,11 +720,9 @@ def assert_has_expected_calls(self, users, skip_group_call_idxs=None): # Test for permissions from AnVIL only class AnvilSummaryDataAPITest(AnvilAuthenticationTestCase, SummaryDataAPITest): - fixtures = ['users', 'social_auth', '1kg_project', 'reference_data', 'report_variants'] + fixtures = ['users', 'social_auth', '1kg_project', 'reference_data'] NUM_MANAGER_SUBMISSIONS = 4 ADDITIONAL_SAMPLES = [] - ADDITIONAL_FAMILIES = [] - ADDITIONAL_FINDINGS = [] def test_mme_details(self, *args): super(AnvilSummaryDataAPITest, self).test_mme_details(*args) diff --git a/ui/pages/Report/Report.jsx b/ui/pages/Report/Report.jsx index a7a4a1ded2..d4e8f61533 100644 --- a/ui/pages/Report/Report.jsx +++ b/ui/pages/Report/Report.jsx @@ -8,12 +8,16 @@ import { Error404, Error401 } from 'shared/components/page/Errors' import Anvil from './components/Anvil' import CustomSearch from './components/CustomSearch' +import FamilyMetadata from './components/FamilyMetadata' import Gregor from './components/Gregor' import SeqrStats from './components/SeqrStats' +import VariantMetadata from './components/VariantMetadata' export const REPORT_PAGES = [ { path: 'anvil', component: Anvil }, { path: 'custom_search', params: '/:searchHash?', component: CustomSearch }, + { path: 'family_metadata', params: '/:projectGuid?', component: FamilyMetadata }, + { path: 'variant_metadata', params: '/:projectGuid?', component: VariantMetadata }, { path: 'gregor', component: Gregor }, { path: 'seqr_stats', component: SeqrStats }, ] diff --git a/ui/pages/SummaryData/components/FamilyMetadata.jsx b/ui/pages/Report/components/FamilyMetadata.jsx similarity index 80% rename from ui/pages/SummaryData/components/FamilyMetadata.jsx rename to ui/pages/Report/components/FamilyMetadata.jsx index 89f67fa2fa..412dc55848 100644 --- a/ui/pages/SummaryData/components/FamilyMetadata.jsx +++ b/ui/pages/Report/components/FamilyMetadata.jsx @@ -1,7 +1,9 @@ import React from 'react' +import LoadReportTable from 'shared/components/table/LoadReportTable' import { FAMILY_ANALYSIS_STATUS_LOOKUP } from 'shared/utils/constants' -import LoadReportTable from './LoadReportTable' + +const VIEW_ALL_PAGES = [{ name: 'Broad', downloadName: 'All', path: 'all' }] const COLUMNS = [ { name: 'data_type' }, @@ -31,8 +33,10 @@ const COLUMNS = [ const FamilyMetadata = props => ( ) diff --git a/ui/pages/SummaryData/components/VariantMetadata.jsx b/ui/pages/Report/components/VariantMetadata.jsx similarity index 77% rename from ui/pages/SummaryData/components/VariantMetadata.jsx rename to ui/pages/Report/components/VariantMetadata.jsx index 6a46031fba..b09db6a2ab 100644 --- a/ui/pages/SummaryData/components/VariantMetadata.jsx +++ b/ui/pages/Report/components/VariantMetadata.jsx @@ -1,7 +1,12 @@ import React from 'react' +import LoadReportTable from 'shared/components/table/LoadReportTable' import { clinvarSignificance, VARIANT_METADATA_COLUMNS } from 'shared/utils/constants' -import LoadReportTable from './LoadReportTable' + +const VIEW_ALL_PAGES = [ + { name: 'GREGoR', downloadName: 'GREGoR', path: 'gregor' }, + { name: 'Broad', downloadName: 'All', path: 'all' }, +] const COLUMNS = [ { name: 'participant_id' }, @@ -24,8 +29,10 @@ const COLUMNS = [ const FamilyMetadata = props => ( ) diff --git a/ui/pages/SummaryData/SummaryData.jsx b/ui/pages/SummaryData/SummaryData.jsx index b2280d6f22..b56789bcfb 100644 --- a/ui/pages/SummaryData/SummaryData.jsx +++ b/ui/pages/SummaryData/SummaryData.jsx @@ -15,8 +15,6 @@ import GeneInfoSearch from './components/GeneInfoSearch' import LocusLists from './components/LocusLists' import ExternalAnalysis from './components/ExternalAnalysis' import Hpo from './components/Hpo' -import FamilyMetadata from './components/FamilyMetadata' -import VariantMetadata from './components/VariantMetadata' import IndividualMetadata from './components/IndividualMetadata' import VariantLookup from './components/VariantLookup' @@ -33,8 +31,6 @@ const SUMMARY_DATA_PAGES = [ { path: 'gene_lists', component: LocusLists }, { path: 'saved_variants', component: SavedVariants }, { path: 'individual_metadata', params: '/:projectGuid?', component: IndividualMetadata }, - { path: 'family_metadata', params: '/:projectGuid?', component: FamilyMetadata }, - { path: 'variant_metadata', params: '/:projectGuid?', component: VariantMetadata }, { path: 'hpo_terms', component: Hpo }, { path: 'matchmaker', component: Matchmaker }, ] diff --git a/ui/pages/SummaryData/components/IndividualMetadata.jsx b/ui/pages/SummaryData/components/IndividualMetadata.jsx index 5b216cb3b0..c55db3aac3 100644 --- a/ui/pages/SummaryData/components/IndividualMetadata.jsx +++ b/ui/pages/SummaryData/components/IndividualMetadata.jsx @@ -1,8 +1,12 @@ -import React from 'react' +import { connect } from 'react-redux' +import { getUser } from 'redux/selectors' import { BaseSemanticInput, BooleanCheckbox } from 'shared/components/form/Inputs' +import LoadReportTable from 'shared/components/table/LoadReportTable' import { FAMILY_ANALYSIS_STATUS_LOOKUP, VARIANT_METADATA_COLUMNS } from 'shared/utils/constants' -import LoadReportTable from './LoadReportTable' + +const ALL_PROJECTS_PATH = 'all' +const GREGOR_PROJECT_PATH = 'gregor' const FIELDS = [ { @@ -63,6 +67,12 @@ const AIRTABLE_COLUMNS = [ { name: 'sample_provider' }, ] +const ANALYST_VIEW_ALL_PAGES = [ + { name: 'GREGoR', downloadName: 'All_GREGoR_Projects', path: GREGOR_PROJECT_PATH }, + { name: 'Broad', downloadName: 'All_AnVIL_Projects', path: ALL_PROJECTS_PATH }, +] +const VIEW_ALL_PAGES = [{ name: 'my', downloadName: 'All_Projects', path: ALL_PROJECTS_PATH }] + const getColumns = (data) => { const maxSavedVariants = Math.max(1, ...(data || []).map(row => row.num_saved_variants)) const hasAirtable = data && data[0] && data[0][AIRTABLE_DBGAP_SUBMISSION_FIELD] @@ -78,15 +88,16 @@ const getColumns = (data) => { ) } -const IndividualMetadata = props => ( - -) +const mapStateToProps = (state, ownProps) => { + const user = getUser(state) + return { + getColumns, + queryFields: (user.isAnalyst && ownProps.match.params.projectGuid !== ALL_PROJECTS_PATH) ? AIRTABLE_FIELDS : FIELDS, + viewAllPages: (user.isAnalyst ? ANALYST_VIEW_ALL_PAGES : VIEW_ALL_PAGES), + urlBase: 'summary_data/individual_metadata', + idField: 'participant_id', + fileName: 'Metadata', + } +} -export default IndividualMetadata +export default connect(mapStateToProps)(LoadReportTable) diff --git a/ui/pages/SummaryData/components/LoadReportTable.jsx b/ui/shared/components/table/LoadReportTable.jsx similarity index 58% rename from ui/pages/SummaryData/components/LoadReportTable.jsx rename to ui/shared/components/table/LoadReportTable.jsx index fe6766399d..4a9ab52d41 100644 --- a/ui/pages/SummaryData/components/LoadReportTable.jsx +++ b/ui/shared/components/table/LoadReportTable.jsx @@ -1,9 +1,7 @@ import React from 'react' -import { connect } from 'react-redux' import PropTypes from 'prop-types' import { Link } from 'react-router-dom' -import { getUser } from 'redux/selectors' import { NoHoverFamilyLink } from 'shared/components/buttons/FamilyLink' import AwesomeBar from 'shared/components/page/AwesomeBar' import DataTable from 'shared/components/table/DataTable' @@ -11,17 +9,9 @@ import { HorizontalSpacer } from 'shared/components/Spacers' import StateDataLoader from 'shared/components/StateDataLoader' import { InlineHeader, ActiveDisabledNavLink } from 'shared/components/StyledComponents' -const ALL_PAGE = { downloadName: 'all_projects', path: 'all' } -const ANALYST_VIEW_ALL_PAGES = [ - { name: 'GREGoR', downloadName: 'all_GREGoR_projects', path: 'gregor' }, - { name: 'Broad', ...ALL_PAGE }, -] -const VIEW_ALL_PAGES = [{ name: 'my', ...ALL_PAGE }] - const SEARCH_CATEGORIES = ['projects'] -const URL_BASE = 'summary_data' -const getResultHref = urlPath => result => `/${URL_BASE}/${urlPath}/${result.key}` +const getResultHref = urlBase => result => `/${urlBase}/${result.key}` const PROJECT_ID_FIELD = 'internal_project_id' @@ -42,7 +32,7 @@ const getTableColumns = columns => ([ ].map(({ name, ...props }) => ({ name, content: name, ...props }))) const ReportTable = React.memo(( - { projectGuid, queryForm, data, urlPath, user, columns, getColumns, idField }, + { projectGuid, queryForm, data, urlBase, viewAllPages, columns, getColumns, idField, fileName }, ) => (
@@ -50,12 +40,12 @@ const ReportTable = React.memo(( categories={SEARCH_CATEGORIES} placeholder="Enter project name" inputwidth="350px" - getResultHref={getResultHref(urlPath)} + getResultHref={getResultHref(urlBase)} /> - {(user.isAnalyst ? ANALYST_VIEW_ALL_PAGES : VIEW_ALL_PAGES).map(({ name, path }) => ( + {viewAllPages.map(({ name, path }) => (   or   - {`view all ${name} projects`} + {`view all ${name} projects`} ))} @@ -64,7 +54,7 @@ const ReportTable = React.memo(( striped collapsing horizontalScroll - downloadFileName={`${ANALYST_VIEW_ALL_PAGES.find(({ path }) => path === projectGuid)?.downloadName || (data?.length && data[0][PROJECT_ID_FIELD].replace(/ /g, '_'))}_${new Date().toISOString().slice(0, 10)}_${urlPath.split('_')[0]}_metadata`} + downloadFileName={`${viewAllPages.find(({ path }) => path === projectGuid)?.downloadName || (data?.length && data[0][PROJECT_ID_FIELD].replace(/ /g, '_'))}_${new Date().toISOString().slice(0, 10)}_${fileName}`} idField={idField} defaultSortColumn="family_id" emptyContent={projectGuid ? '0 cases found' : 'Select a project to view data'} @@ -78,20 +68,21 @@ const ReportTable = React.memo(( ReportTable.propTypes = { data: PropTypes.arrayOf(PropTypes.object), projectGuid: PropTypes.string, - user: PropTypes.object, + viewAllPages: PropTypes.arrayOf(PropTypes.object), queryForm: PropTypes.node, columns: PropTypes.arrayOf(PropTypes.object), getColumns: PropTypes.func, - urlPath: PropTypes.string, + urlBase: PropTypes.string, idField: PropTypes.string, + fileName: PropTypes.string, } const parseResponse = ({ rows }) => ({ data: rows }) -const LoadReportTable = ({ match, urlPath, ...props }) => ( +const LoadReportTable = ({ match, urlBase, ...props }) => ( ( LoadReportTable.propTypes = { match: PropTypes.object, - urlPath: PropTypes.string, -} - -const mapStateToProps = (state, ownProps) => { - const user = getUser(state) - return { - user, - queryFields: (user.isAnalyst && ownProps.match.params.projectGuid !== ALL_PAGE.path) ? - ownProps.allQueryFields : ownProps.queryFields, - } + urlBase: PropTypes.string, } -export default connect(mapStateToProps)(LoadReportTable) +export default LoadReportTable From f4516837f29a5b63583e47ec4e3a31c94cd494c1 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 15 May 2024 12:07:26 -0400 Subject: [PATCH 122/736] alow local superusers to access some report pages --- CHANGELOG.md | 1 + ui/pages/Report/Report.jsx | 14 +++++++++----- ui/shared/components/page/Header.jsx | 2 +- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a07e577172..ed1b4522e3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## dev * Adds guid and created fields to PhenotypePrioritization model (REQUIRES DB MIGRATION) +* Enable "Reports" tab by default for local installations ## 5/8/24 * Adds dynamic analysis groups (REQUIRES DB MIGRATION) diff --git a/ui/pages/Report/Report.jsx b/ui/pages/Report/Report.jsx index d4e8f61533..22831713ac 100644 --- a/ui/pages/Report/Report.jsx +++ b/ui/pages/Report/Report.jsx @@ -13,19 +13,23 @@ import Gregor from './components/Gregor' import SeqrStats from './components/SeqrStats' import VariantMetadata from './components/VariantMetadata' -export const REPORT_PAGES = [ - { path: 'anvil', component: Anvil }, +const LOCAL_REPORT_PAGES = [ { path: 'custom_search', params: '/:searchHash?', component: CustomSearch }, { path: 'family_metadata', params: '/:projectGuid?', component: FamilyMetadata }, { path: 'variant_metadata', params: '/:projectGuid?', component: VariantMetadata }, - { path: 'gregor', component: Gregor }, { path: 'seqr_stats', component: SeqrStats }, ] +export const REPORT_PAGES = [ + { path: 'anvil', component: Anvil }, + { path: 'gregor', component: Gregor }, + ...LOCAL_REPORT_PAGES, +] + const Report = ({ match, user }) => ( - user.isAnalyst ? ( + (user.isAnalyst || user.isPm) ? ( - {REPORT_PAGES.map( + {(user.isAnalyst ? REPORT_PAGES : LOCAL_REPORT_PAGES).map( ({ path, params, component }) => , )} diff --git a/ui/shared/components/page/Header.jsx b/ui/shared/components/page/Header.jsx index c523e894cc..3f461f2878 100644 --- a/ui/shared/components/page/Header.jsx +++ b/ui/shared/components/page/Header.jsx @@ -23,7 +23,7 @@ const PageHeader = React.memo(({ user, googleLoginEnabled, onSubmit }) => (
seqr
{Object.keys(user).length ? [ , - user.isAnalyst ? : null, + (user.isAnalyst || user.isPm) ? : null, (user.isDataManager || user.isPm) ? : null, , ] : null } From 3fb696b8b9aa08f1880963fde99677270e81d980 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 15 May 2024 12:13:59 -0400 Subject: [PATCH 123/736] update permissions for pm report access --- seqr/views/apis/report_api.py | 10 +++++----- seqr/views/utils/permissions_utils.py | 2 ++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 8b05c31863..b308c6434a 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -18,7 +18,7 @@ from seqr.views.utils.export_utils import export_multiple_files, write_multiple_files_to_gs from seqr.views.utils.json_utils import create_json_response from seqr.views.utils.permissions_utils import analyst_required, get_project_and_check_permissions, \ - get_project_guids_user_can_view, get_internal_projects + get_project_guids_user_can_view, get_internal_projects, pm_or_analyst_required from seqr.views.utils.terra_api_utils import anvil_enabled from seqr.views.utils.variant_utils import DISCOVERY_CATEGORY @@ -31,7 +31,7 @@ MONDO_BASE_URL = 'https://monarchinitiative.org/v3/api/entity' -@analyst_required +@pm_or_analyst_required def seqr_stats(request): non_demo_projects = Project.objects.filter(is_demo=False) @@ -793,7 +793,7 @@ def _get_row_id(row): return row[id_col] -@analyst_required +@pm_or_analyst_required def family_metadata(request, project_guid): projects = _get_metadata_projects(project_guid, request.user) @@ -851,7 +851,7 @@ def _add_row(row, family_id, row_type): def _get_metadata_projects(project_guid, user): if project_guid == 'all': - return get_internal_projects() + return get_internal_projects().filter(guid__in=get_project_guids_user_can_view(user)) if project_guid == GREGOR_CATEGORY.lower(): return Project.objects.filter(projectcategory__name=GREGOR_CATEGORY) return [get_project_and_check_permissions(project_guid, user)] @@ -872,7 +872,7 @@ def _get_family_structure(num_individuals, num_known_individuals): return 'other' -@analyst_required +@pm_or_analyst_required def variant_metadata(request, project_guid): projects = _get_metadata_projects(project_guid, request.user) diff --git a/seqr/views/utils/permissions_utils.py b/seqr/views/utils/permissions_utils.py index 227a53e836..035df27b13 100644 --- a/seqr/views/utils/permissions_utils.py +++ b/seqr/views/utils/permissions_utils.py @@ -96,6 +96,8 @@ def decorator(view_func): pm_required = active_user_has_policies_and_passes_test(user_is_pm) pm_or_data_manager_required = active_user_has_policies_and_passes_test( lambda user: user_is_data_manager(user) or user_is_pm(user)) +pm_or_analyst_required = active_user_has_policies_and_passes_test( + lambda user: user_is_analyst(user) or user_is_pm(user)) superuser_required = active_user_has_policies_and_passes_test(lambda user: user.is_superuser) From 327a9e93a57312a0029df45a84d7bb35badcd344 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 15 May 2024 12:33:23 -0400 Subject: [PATCH 124/736] update unit tests --- seqr/views/apis/report_api_tests.py | 18 ++++++++++++++---- seqr/views/utils/test_utils.py | 7 ++++++- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index b840029e1e..0865d26748 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -648,7 +648,7 @@ def test_seqr_stats(self): self.assertDictEqual(response_json['familiesCount'], self.STATS_DATA['familiesCount']) self.assertDictEqual(response_json['sampleCountsByType'], self.STATS_DATA['sampleCountsByType']) - self.check_no_analyst_no_access(url) + self.check_no_analyst_no_access(url, has_override=True) @mock.patch('seqr.views.utils.export_utils.zipfile.ZipFile') @mock.patch('seqr.views.utils.airtable_utils.is_google_authenticated') @@ -1140,9 +1140,10 @@ def test_family_metadata(self): self.assertEqual(response.status_code, 200) response_json = response.json() self.assertListEqual(list(response_json.keys()), ['rows']) - self.assertListEqual(sorted([r['familyGuid'] for r in response_json['rows']]), [ + expected_families = [ 'F000001_1', 'F000002_2', 'F000003_3', 'F000004_4', 'F000005_5', 'F000006_6', 'F000007_7', 'F000008_8', - 'F000009_9', 'F000010_10', 'F000011_11', 'F000012_12', 'F000013_13'] + self.ADDITIONAL_FAMILIES) + 'F000009_9', 'F000010_10', 'F000011_11', 'F000012_12', 'F000013_13'] + self.assertListEqual(sorted([r['familyGuid'] for r in response_json['rows']]), expected_families) test_row = next(r for r in response_json['rows'] if r['familyGuid'] == 'F000003_3') self.assertDictEqual(test_row, { 'projectGuid': 'R0001_1kg', @@ -1174,6 +1175,11 @@ def test_family_metadata(self): self.assertEqual(response.status_code, 200) self.assertDictEqual(response.json(), {'rows': []}) + # Test access with no analyst group + response = self.check_no_analyst_no_access(all_projects_url, has_override=True) + self.assertListEqual( + sorted([r['familyGuid'] for r in response.json()['rows']]), expected_families + self.ADDITIONAL_FAMILIES) + def test_variant_metadata(self): url = reverse(variant_metadata, args=[PROJECT_GUID]) self.check_analyst_login(url) @@ -1315,7 +1321,6 @@ def test_variant_metadata(self): self.assertEqual(response.status_code, 200) response_json = response.json() self.assertListEqual(list(response_json.keys()), ['rows']) - row_ids += self.ADDITIONAL_FINDINGS self.assertListEqual([r['genetic_findings_id'] for r in response_json['rows']], row_ids) self.assertDictEqual(response_json['rows'][1], expected_row) self.assertDictEqual(response_json['rows'][2], expected_mnv) @@ -1326,6 +1331,11 @@ def test_variant_metadata(self): self.assertEqual(response.status_code, 200) self.assertDictEqual(response.json(), {'rows': []}) + # Test access with no analyst group + response = self.check_no_analyst_no_access(all_projects_url, has_override=True) + row_ids += self.ADDITIONAL_FINDINGS + self.assertListEqual([r['genetic_findings_id'] for r in response.json()['rows']], row_ids) + class LocalReportAPITest(AuthenticationTestCase, ReportAPITest): fixtures = ['users', '1kg_project', 'reference_data', 'report_variants'] diff --git a/seqr/views/utils/test_utils.py b/seqr/views/utils/test_utils.py index d34e9fb1e1..050c3d3174 100644 --- a/seqr/views/utils/test_utils.py +++ b/seqr/views/utils/test_utils.py @@ -229,13 +229,18 @@ def get_initial_page_window(self, key, response): def get_initial_page_json(self, response): return self.get_initial_page_window('initialJSON', response) - def check_no_analyst_no_access(self, url, get_response=None): + def check_no_analyst_no_access(self, url, get_response=None, has_override=False): self.mock_analyst_group.__str__.return_value = '' response = get_response() if get_response else self.client.get(url) self.assertEqual(response.status_code, 403) self.assertEqual(response.json()['error'], 'Permission Denied') + self.login_pm_user() + response = get_response() if get_response else self.client.get(url) + self.assertEqual(response.status_code, 200 if has_override else 403) + return response + def reset_logs(self): self._log_stream.truncate(0) self._log_stream.seek(0) From e5139fe9558e56dc8604677681d4b9c35998431f Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 15 May 2024 12:47:19 -0400 Subject: [PATCH 125/736] fix unit tests --- seqr/views/apis/report_api_tests.py | 20 +++++++++++--------- seqr/views/utils/test_utils.py | 2 +- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index 0865d26748..6e99831378 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -648,7 +648,7 @@ def test_seqr_stats(self): self.assertDictEqual(response_json['familiesCount'], self.STATS_DATA['familiesCount']) self.assertDictEqual(response_json['sampleCountsByType'], self.STATS_DATA['sampleCountsByType']) - self.check_no_analyst_no_access(url, has_override=True) + self.check_no_analyst_no_access(url, has_override=self.HAS_PM_OVERRIDE) @mock.patch('seqr.views.utils.export_utils.zipfile.ZipFile') @mock.patch('seqr.views.utils.airtable_utils.is_google_authenticated') @@ -1176,9 +1176,10 @@ def test_family_metadata(self): self.assertDictEqual(response.json(), {'rows': []}) # Test access with no analyst group - response = self.check_no_analyst_no_access(all_projects_url, has_override=True) - self.assertListEqual( - sorted([r['familyGuid'] for r in response.json()['rows']]), expected_families + self.ADDITIONAL_FAMILIES) + response = self.check_no_analyst_no_access(all_projects_url, has_override=self.HAS_PM_OVERRIDE) + if self.HAS_PM_OVERRIDE: + self.assertListEqual( + sorted([r['familyGuid'] for r in response.json()['rows']]), expected_families + self.ADDITIONAL_FAMILIES) def test_variant_metadata(self): url = reverse(variant_metadata, args=[PROJECT_GUID]) @@ -1332,15 +1333,17 @@ def test_variant_metadata(self): self.assertDictEqual(response.json(), {'rows': []}) # Test access with no analyst group - response = self.check_no_analyst_no_access(all_projects_url, has_override=True) - row_ids += self.ADDITIONAL_FINDINGS - self.assertListEqual([r['genetic_findings_id'] for r in response.json()['rows']], row_ids) + response = self.check_no_analyst_no_access(all_projects_url, has_override=self.HAS_PM_OVERRIDE) + if self.HAS_PM_OVERRIDE: + row_ids += self.ADDITIONAL_FINDINGS + self.assertListEqual([r['genetic_findings_id'] for r in response.json()['rows']], row_ids) class LocalReportAPITest(AuthenticationTestCase, ReportAPITest): fixtures = ['users', '1kg_project', 'reference_data', 'report_variants'] ADDITIONAL_FAMILIES = ['F000014_14'] ADDITIONAL_FINDINGS = ['NA21234_1_248367227'] + HAS_PM_OVERRIDE = True STATS_DATA = { 'projectsCount': {'non_demo': 3, 'demo': 1}, 'familiesCount': {'non_demo': 12, 'demo': 2}, @@ -1357,8 +1360,7 @@ class LocalReportAPITest(AuthenticationTestCase, ReportAPITest): class AnvilReportAPITest(AnvilAuthenticationTestCase, ReportAPITest): fixtures = ['users', 'social_auth', '1kg_project', 'reference_data', 'report_variants'] - ADDITIONAL_FAMILIES = [] - ADDITIONAL_FINDINGS = [] + HAS_PM_OVERRIDE = False STATS_DATA = { 'projectsCount': {'internal': 1, 'external': 1, 'no_anvil': 1, 'demo': 1}, 'familiesCount': {'internal': 11, 'external': 1, 'no_anvil': 0, 'demo': 2}, diff --git a/seqr/views/utils/test_utils.py b/seqr/views/utils/test_utils.py index 050c3d3174..64809b5991 100644 --- a/seqr/views/utils/test_utils.py +++ b/seqr/views/utils/test_utils.py @@ -236,7 +236,7 @@ def check_no_analyst_no_access(self, url, get_response=None, has_override=False) self.assertEqual(response.status_code, 403) self.assertEqual(response.json()['error'], 'Permission Denied') - self.login_pm_user() + self.client.force_login(self.super_user) response = get_response() if get_response else self.client.get(url) self.assertEqual(response.status_code, 200 if has_override else 403) return response From fc9602d7fcd7fd80cc4da09a72e849f8acf26082 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 15 May 2024 13:38:49 -0400 Subject: [PATCH 126/736] codacy fix --- seqr/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/seqr/models.py b/seqr/models.py index 5866c9c592..bee93f098c 100644 --- a/seqr/models.py +++ b/seqr/models.py @@ -74,8 +74,8 @@ class Meta: internal_json_fields = [] audit_fields = set() - def _format_guid(self, id): - return f'{self.GUID_PREFIX}{id:0{self.GUID_PRECISION}d}_{_slugify(str(self))}'[:self.MAX_GUID_SIZE] + def _format_guid(self, model_id): + return f'{self.GUID_PREFIX}{model_id:0{self.GUID_PRECISION}d}_{_slugify(str(self))}'[:self.MAX_GUID_SIZE] def _compute_guid(self): return self._format_guid(self.id) From 585019e01b8e58974b3ac880bc033967d7cc3953 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 15 May 2024 13:41:38 -0400 Subject: [PATCH 127/736] update MMe models --- matchmaker/models.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/matchmaker/models.py b/matchmaker/models.py index 2e2b43371f..36c676069f 100644 --- a/matchmaker/models.py +++ b/matchmaker/models.py @@ -24,8 +24,7 @@ class MatchmakerSubmission(ModelWithGUID): def __unicode__(self): return '{}_submission_{}'.format(str(self.individual), self.id) - def _compute_guid(self): - return 'MS%07d_%s' % (self.id, str(self.individual)) + GUID_PREFIX = 'MS' class Meta: json_fields = [ @@ -46,8 +45,7 @@ class MatchmakerIncomingQuery(ModelWithGUID): def __unicode__(self): return '{}_{}_query'.format(self.patient_id or self.id, self.institution) - def _compute_guid(self): - return 'MIQ%07d_%s_%s' % (self.id, self.patient_id, self.institution.replace(' ', '_')) + GUID_PREFIX = 'MIQ' class Meta: json_fields = ['guid', 'created_date'] @@ -71,8 +69,7 @@ class MatchmakerResult(ModelWithGUID): def __unicode__(self): return '{}_{}_result'.format(self.id, str(self.submission)) - def _compute_guid(self): - return 'MR%07d_%s' % (self.id, str(self.submission)) + GUID_PREFIX = 'MR' class Meta: json_fields = [ @@ -88,8 +85,7 @@ class MatchmakerContactNotes(ModelWithGUID): def __unicode__(self): return '{}_{}_contact'.format(self.id, self.institution) - def _compute_guid(self): - return 'MCN%07d_%s' % (self.id, self.institution.replace(' ', '_')) + GUID_PREFIX = 'MCN' class Meta: json_fields = [] From 5ec5487ce39c9c4f3a1f3a2595a74638e460973c Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 15 May 2024 16:50:43 -0400 Subject: [PATCH 128/736] clinvar override pass filter --- hail_search/queries/base.py | 24 ++++++++++++++---------- hail_search/queries/mito.py | 4 ++-- hail_search/queries/sv.py | 4 ++-- hail_search/test_search.py | 5 +++-- 4 files changed, 21 insertions(+), 16 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 0b7dffc171..efacff45bd 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -384,11 +384,7 @@ def _filter_entries_table(self, ht, sample_data, inheritance_filter=None, qualit ht, sorted_family_sample_data = self._add_entry_sample_families(ht, sample_data) - quality_filter = quality_filter or {} - if quality_filter.get('vcf_filter'): - ht = self._filter_vcf_filters(ht) - - passes_quality_filter = self._get_family_passes_quality_filter(quality_filter, ht=ht, **kwargs) + passes_quality_filter = self._get_family_passes_quality_filter(quality_filter, ht, **kwargs) if passes_quality_filter is not None: ht = ht.annotate(family_entries=ht.family_entries.map( lambda entries: hl.or_missing(passes_quality_filter(entries), entries) @@ -538,7 +534,9 @@ def _valid_genotype_family_entries(cls, entries, gentoype_entry_indices, genotyp is_valid &= unaffected_filter return hl.or_missing(is_valid, entries) - def _get_family_passes_quality_filter(self, quality_filter, **kwargs): + def _get_family_passes_quality_filter(self, quality_filter, ht, **kwargs): + quality_filter = quality_filter or {} + affected_only = quality_filter.get('affected_only') passes_quality_filters = [] for filter_k, value in quality_filter.items(): @@ -547,10 +545,16 @@ def _get_family_passes_quality_filter(self, quality_filter, **kwargs): if field and value: passes_quality_filters.append(self._get_genotype_passes_quality_field(field, value, affected_only)) - if not passes_quality_filters: + has_vcf_filter = quality_filter.get('vcf_filter') + if not (passes_quality_filters or has_vcf_filter): return None - return lambda entries: entries.all(lambda gt: hl.all([f(gt) for f in passes_quality_filters])) + def passes_quality(entries): + passes_filters = entries.all(lambda gt: hl.all([f(gt) for f in passes_quality_filters])) if passes_quality_filters else True + passes_vcf_filters = self._passes_vcf_filters(ht) if has_vcf_filter else True + return passes_filters & passes_vcf_filters + + return passes_quality @classmethod def _get_genotype_passes_quality_field(cls, field, value, affected_only): @@ -569,8 +573,8 @@ def passes_quality_field(gt): return passes_quality_field @staticmethod - def _filter_vcf_filters(ht): - return ht.filter(hl.is_missing(ht.filters) | (ht.filters.length() < 1)) + def _passes_vcf_filters(ht): + return hl.is_missing(ht.filters) | (ht.filters.length() < 1) def _parse_variant_keys(self, variant_keys=None, **kwargs): return [hl.struct(**{self.KEY_FIELD[0]: key}) for key in (variant_keys or [])] diff --git a/hail_search/queries/mito.py b/hail_search/queries/mito.py index e7eaf0bdc3..8b17734dd3 100644 --- a/hail_search/queries/mito.py +++ b/hail_search/queries/mito.py @@ -147,8 +147,8 @@ def _parse_intervals(self, intervals, exclude_intervals=False, **kwargs): self._load_table_kwargs = {'_intervals': parsed_intervals, '_filter_intervals': True} return parsed_intervals - def _get_family_passes_quality_filter(self, quality_filter, ht=None, pathogenicity=None, **kwargs): - passes_quality = super()._get_family_passes_quality_filter(quality_filter) + def _get_family_passes_quality_filter(self, quality_filter, ht, pathogenicity=None, **kwargs): + passes_quality = super()._get_family_passes_quality_filter(quality_filter, ht) clinvar_path_ht = False if passes_quality is None else self._get_loaded_clinvar_prefilter_ht(pathogenicity) if not clinvar_path_ht: return passes_quality diff --git a/hail_search/queries/sv.py b/hail_search/queries/sv.py index e5f279e4d4..812108a6eb 100644 --- a/hail_search/queries/sv.py +++ b/hail_search/queries/sv.py @@ -85,8 +85,8 @@ def _parse_annotations(self, annotations, *args, **kwargs): parsed_annotations[NEW_SV_FIELD] = (annotations or {}).get(NEW_SV_FIELD) return parsed_annotations - def _get_family_passes_quality_filter(self, quality_filter, parsed_annotations=None, **kwargs): - passes_quality = super()._get_family_passes_quality_filter(quality_filter) + def _get_family_passes_quality_filter(self, quality_filter, ht, parsed_annotations=None, **kwargs): + passes_quality = super()._get_family_passes_quality_filter(quality_filter, ht) if not (parsed_annotations or {}).get(NEW_SV_FIELD): return passes_quality diff --git a/hail_search/test_search.py b/hail_search/test_search.py index 8890d40ab1..cc1586df32 100644 --- a/hail_search/test_search.py +++ b/hail_search/test_search.py @@ -577,15 +577,16 @@ async def test_quality_filter(self): omit_sample_type='SV_WES', ) - quality_filter = {'min_gq': 40, 'min_ab': 50} + quality_filter.update({'min_gq': 40, 'min_ab': 50}) await self._assert_expected_search( [VARIANT2, FAMILY_3_VARIANT], quality_filter=quality_filter, omit_sample_type='SV_WES', ) annotations = {'splice_ai': '0.0'} # Ensures no variants are filtered out by annotation/path filters await self._assert_expected_search( - [VARIANT1, VARIANT2, FAMILY_3_VARIANT], quality_filter=quality_filter, omit_sample_type='SV_WES', + [VARIANT1, VARIANT2, FAMILY_3_VARIANT, MITO_VARIANT1, MITO_VARIANT3], quality_filter=quality_filter, omit_sample_type='SV_WES', annotations=annotations, pathogenicity={'clinvar': ['likely_pathogenic', 'vus_or_conflicting']}, + sample_data={**EXPECTED_SAMPLE_DATA, **FAMILY_2_MITO_SAMPLE_DATA}, ) await self._assert_expected_search( From f6263fd242c6bec6fbce0bb57d25db7cc967d602 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 16 May 2024 11:07:00 -0400 Subject: [PATCH 129/736] add external_data field --- CHANGELOG.md | 1 + seqr/migrations/0065_family_external_data.py | 19 +++++++++++++++++++ seqr/models.py | 17 ++++++++++++++++- 3 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 seqr/migrations/0065_family_external_data.py diff --git a/CHANGELOG.md b/CHANGELOG.md index ed1b4522e3..bd225f41f2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # _seqr_ Changes ## dev +* Adds external_data to Family model (REQUIRES DB MIGRATION) * Adds guid and created fields to PhenotypePrioritization model (REQUIRES DB MIGRATION) * Enable "Reports" tab by default for local installations diff --git a/seqr/migrations/0065_family_external_data.py b/seqr/migrations/0065_family_external_data.py new file mode 100644 index 0000000000..2eac8921bf --- /dev/null +++ b/seqr/migrations/0065_family_external_data.py @@ -0,0 +1,19 @@ +# Generated by Django 3.2.23 on 2024-05-16 15:05 + +import django.contrib.postgres.fields +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('seqr', '0064_alter_phenotypeprioritization'), + ] + + operations = [ + migrations.AddField( + model_name='family', + name='external_data', + field=django.contrib.postgres.fields.ArrayField(base_field=models.CharField(blank=True, choices=[('M', 'Methylation'), ('P', 'PacBio lrGS'), ('R', 'PacBio RNA'), ('L', 'ONT lrGS'), ('O', 'ONT RNA'), ('B', 'BioNano')], max_length=1, null=True), default=list, size=None), + ), + ] diff --git a/seqr/models.py b/seqr/models.py index e9ec7268eb..6642cef5c6 100644 --- a/seqr/models.py +++ b/seqr/models.py @@ -315,6 +315,14 @@ class Family(ModelWithGUID): ('D', 'Data Sharing'), ('O', 'Other'), ) + EXTERNAL_DATA_CHOICES = ( + ('M', 'Methylation'), + ('P', 'PacBio lrGS'), + ('R', 'PacBio RNA'), + ('L', 'ONT lrGS'), + ('O', 'ONT RNA'), + ('B', 'BioNano'), + ) project = models.ForeignKey('Project', on_delete=models.PROTECT) @@ -338,6 +346,13 @@ class Family(ModelWithGUID): ), default=list) success_story = models.TextField(null=True, blank=True) + external_data = ArrayField(models.CharField( + max_length=1, + choices=EXTERNAL_DATA_CHOICES, + null=True, + blank=True + ), default=list) + coded_phenotype = models.TextField(null=True, blank=True) mondo_id = models.CharField(null=True, blank=True, max_length=30) post_discovery_omim_numbers = ArrayField(models.PositiveIntegerField(), default=list) @@ -366,7 +381,7 @@ class Meta: 'post_discovery_omim_numbers', 'pedigree_dataset', 'coded_phenotype', 'mondo_id', ] internal_json_fields = [ - 'success_story_types', 'success_story', 'pubmed_ids', + 'success_story_types', 'success_story', 'pubmed_ids', 'external_data', ] audit_fields = {'analysis_status'} From 1c096aa4330bb45b67d16087fb8aff334bcf1a06 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 16 May 2024 12:36:41 -0400 Subject: [PATCH 130/736] display external data --- ui/shared/components/panel/family/Family.jsx | 13 ++++++++++--- .../panel/view-fields/TagFieldView.jsx | 12 +++++++----- ui/shared/utils/constants.js | 16 ++++++++++++++++ 3 files changed, 33 insertions(+), 8 deletions(-) diff --git a/ui/shared/components/panel/family/Family.jsx b/ui/shared/components/panel/family/Family.jsx index 077fc6a277..e0ad7c88e8 100644 --- a/ui/shared/components/panel/family/Family.jsx +++ b/ui/shared/components/panel/family/Family.jsx @@ -12,7 +12,7 @@ import OptionFieldView from '../view-fields/OptionFieldView' import ListFieldView from '../view-fields/ListFieldView' import NoteListFieldView from '../view-fields/NoteListFieldView' import SingleFieldView from '../view-fields/SingleFieldView' -import TagFieldView from '../view-fields/TagFieldView' +import TagFieldView, { TagFieldDisplay } from '../view-fields/TagFieldView' import TextFieldView from '../view-fields/TextFieldView' import { InlineHeader } from '../../StyledComponents' import { @@ -28,8 +28,8 @@ import { FAMILY_FIELD_NAME_LOOKUP, FAMILY_FIELD_OMIM_NUMBERS, FAMILY_FIELD_PMIDS, FAMILY_FIELD_DESCRIPTION, FAMILY_FIELD_SUCCESS_STORY, FAMILY_NOTES_FIELDS, - FAMILY_FIELD_CODED_PHENOTYPE, FAMILY_FIELD_INTERNAL_NOTES, FAMILY_FIELD_INTERNAL_SUMMARY, - FAMILY_FIELD_ANALYSIS_GROUPS, FAMILY_FIELD_MONDO_ID, + FAMILY_FIELD_CODED_PHENOTYPE, FAMILY_FIELD_INTERNAL_NOTES, FAMILY_FIELD_INTERNAL_SUMMARY, FAMILY_EXTERNAL_DATA_LOOKUP, + FAMILY_FIELD_ANALYSIS_GROUPS, FAMILY_FIELD_MONDO_ID, FAMILY_FIELD_EXTERNAL_DATA, FAMILY_EXTERNAL_DATA_OPTIONS, } from '../../../utils/constants' import { FirstSample, AnalystEmailDropdown, AnalysedBy, AnalysisGroups, analysisStatusIcon } from './FamilyFields' import FamilyLayout from './FamilyLayout' @@ -89,6 +89,13 @@ const FAMILY_FIELD_RENDER_LOOKUP = { ), }, + [FAMILY_FIELD_EXTERNAL_DATA]: { + internal: true, + component: TagFieldView, + tagOptions: FAMILY_EXTERNAL_DATA_OPTIONS, + simplifiedValue: true, + fieldDisplay: value => , + }, [FAMILY_FIELD_SUCCESS_STORY_TYPE]: { internal: true, component: TagFieldView, diff --git a/ui/shared/components/panel/view-fields/TagFieldView.jsx b/ui/shared/components/panel/view-fields/TagFieldView.jsx index d8aa878887..9b40e354a5 100644 --- a/ui/shared/components/panel/view-fields/TagFieldView.jsx +++ b/ui/shared/components/panel/view-fields/TagFieldView.jsx @@ -74,11 +74,12 @@ MetadataField.propTypes = { error: PropTypes.bool, } -export const TagFieldDisplay = React.memo(( - { displayFieldValues, tagAnnotation, popup, displayAnnotationFirst, displayMetadata, linkTagType, tagLinkUrl }, -) => ( +export const TagFieldDisplay = React.memo(({ + displayFieldValues, tagAnnotation, popup, displayAnnotationFirst, displayMetadata, linkTagType, tagLinkUrl, tagLookup, +}) => ( - {displayFieldValues.map((tag) => { + {displayFieldValues.map((initialTag) => { + const tag = tagLookup ? tagLookup[initialTag] : initialTag let content = tag.name || tag.text if (displayMetadata && tag.metadata) { content = `${content}: ${tag.metadata}` @@ -100,13 +101,14 @@ export const TagFieldDisplay = React.memo(( )) TagFieldDisplay.propTypes = { - displayFieldValues: PropTypes.arrayOf(PropTypes.object).isRequired, + displayFieldValues: PropTypes.arrayOf(PropTypes.oneOfType(PropTypes.object, PropTypes.string)).isRequired, popup: PropTypes.func, tagAnnotation: PropTypes.func, displayAnnotationFirst: PropTypes.bool, displayMetadata: PropTypes.bool, linkTagType: PropTypes.string, tagLinkUrl: PropTypes.string, + tagLookup: PropTypes.object, } class TagFieldView extends React.PureComponent { diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js index 78d74f7ebd..5cf877a2b1 100644 --- a/ui/shared/utils/constants.js +++ b/ui/shared/utils/constants.js @@ -199,6 +199,19 @@ export const FAMILY_ANALYSED_BY_DATA_TYPES = [ ['STR', 'STR'], ] +export const FAMILY_EXTERNAL_DATA_OPTIONS = [ + { value: 'M', color: '#3c9f6d', name: 'Methylation' }, + { value: 'P', color: '#1135cc', name: 'PacBio lrGS' }, + { value: 'R', color: '#5c2672', name: 'PacBio RNA' }, + { value: 'L', color: '#6583EC', name: 'ONT lrGS' }, + { value: 'O', color: '#644e96', name: 'ONT RNA' }, + { value: 'B', color: '#d0672d', name: 'BioNano' }, +] + +export const FAMILY_EXTERNAL_DATA_LOOKUP = FAMILY_EXTERNAL_DATA_OPTIONS.reduce( + (acc, tag) => ({ [tag.value]: tag, ...acc }), {}, +) + // SUCCESS STORY const FAMILY_SUCCESS_STORY_NOVEL_DISCOVERY = 'N' @@ -252,6 +265,7 @@ export const FAMILY_FIELD_PEDIGREE = 'pedigreeImage' export const FAMILY_FIELD_CREATED_DATE = 'createdDate' export const FAMILY_FIELD_ANALYSIS_GROUPS = 'analysisGroups' export const FAMILY_FIELD_SAVED_VARIANTS = 'savedVariants' +export const FAMILY_FIELD_EXTERNAL_DATA = 'externalData' export const FAMILY_FIELD_NAME_LOOKUP = { [FAMILY_FIELD_DESCRIPTION]: 'Family Description', @@ -260,6 +274,7 @@ export const FAMILY_FIELD_NAME_LOOKUP = { [FAMILY_FIELD_ASSIGNED_ANALYST]: 'Assigned Analyst', [FAMILY_FIELD_ANALYSED_BY]: 'Analysed By', [FAMILY_FIELD_SUCCESS_STORY_TYPE]: 'Success Story Type', + [FAMILY_FIELD_EXTERNAL_DATA]: 'External Data', // TODO [FAMILY_FIELD_SUCCESS_STORY]: 'Success Story', [FAMILY_FIELD_FIRST_SAMPLE]: 'Data Loaded?', [FAMILY_FIELD_CASE_NOTES]: 'Case Notes', @@ -290,6 +305,7 @@ export const FAMILY_MAIN_FIELDS = [ export const FAMILY_DETAIL_FIELDS = [ ...FAMILY_MAIN_FIELDS, { id: FAMILY_FIELD_ANALYSED_BY }, + { id: FAMILY_FIELD_EXTERNAL_DATA }, { id: FAMILY_FIELD_SUCCESS_STORY_TYPE }, { id: FAMILY_FIELD_SUCCESS_STORY }, ...FAMILY_NOTES_FIELDS, From 0399c855393113aabc102f88614db4474823826f Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 16 May 2024 12:50:29 -0400 Subject: [PATCH 131/736] clean up family field name lookup --- ui/shared/utils/constants.js | 37 ++++++++++++++---------------------- 1 file changed, 14 insertions(+), 23 deletions(-) diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js index 5cf877a2b1..992f1aef7d 100644 --- a/ui/shared/utils/constants.js +++ b/ui/shared/utils/constants.js @@ -13,7 +13,7 @@ import { BaseSemanticInput, } from '../components/form/Inputs' -import { stripMarkdown, snakecaseToTitlecase } from './stringUtils' +import { stripMarkdown, snakecaseToTitlecase, camelcaseToTitlecase } from './stringUtils' import { ColoredIcon } from '../components/StyledComponents' import HpoPanel from '../components/panel/HpoPanel' @@ -267,28 +267,6 @@ export const FAMILY_FIELD_ANALYSIS_GROUPS = 'analysisGroups' export const FAMILY_FIELD_SAVED_VARIANTS = 'savedVariants' export const FAMILY_FIELD_EXTERNAL_DATA = 'externalData' -export const FAMILY_FIELD_NAME_LOOKUP = { - [FAMILY_FIELD_DESCRIPTION]: 'Family Description', - [FAMILY_FIELD_ANALYSIS_GROUPS]: 'Analysis Groups', - [FAMILY_FIELD_ANALYSIS_STATUS]: 'Analysis Status', - [FAMILY_FIELD_ASSIGNED_ANALYST]: 'Assigned Analyst', - [FAMILY_FIELD_ANALYSED_BY]: 'Analysed By', - [FAMILY_FIELD_SUCCESS_STORY_TYPE]: 'Success Story Type', - [FAMILY_FIELD_EXTERNAL_DATA]: 'External Data', // TODO - [FAMILY_FIELD_SUCCESS_STORY]: 'Success Story', - [FAMILY_FIELD_FIRST_SAMPLE]: 'Data Loaded?', - [FAMILY_FIELD_CASE_NOTES]: 'Case Notes', - [FAMILY_FIELD_ANALYSIS_NOTES]: 'Analysis Notes', - [FAMILY_FIELD_MME_NOTES]: 'Matchmaker Notes', - [FAMILY_FIELD_CODED_PHENOTYPE]: 'Phenotype Description', - [FAMILY_FIELD_MONDO_ID]: 'MONDO ID', - [FAMILY_FIELD_OMIM_NUMBERS]: 'Post-discovery OMIM #', - [FAMILY_FIELD_PMIDS]: 'Publications on this discovery', - [FAMILY_FIELD_INTERNAL_NOTES]: 'Internal Notes', - [FAMILY_FIELD_INTERNAL_SUMMARY]: 'Internal Summary', - [FAMILY_FIELD_SAVED_VARIANTS]: 'Saved Variants', -} - export const FAMILY_NOTES_FIELDS = [ { id: FAMILY_FIELD_CASE_NOTES, noteType: 'C' }, { id: FAMILY_FIELD_ANALYSIS_NOTES, noteType: 'A' }, @@ -315,6 +293,19 @@ export const FAMILY_DETAIL_FIELDS = [ { id: FAMILY_FIELD_PMIDS }, ] +export const FAMILY_FIELD_NAME_LOOKUP = { + ...FAMILY_DETAIL_FIELDS.reduce((acc, field) => ({ ...acc, [field.id]: camelcaseToTitlecase(field.id) }), {}), + [FAMILY_FIELD_DESCRIPTION]: 'Family Description', + [FAMILY_FIELD_FIRST_SAMPLE]: 'Data Loaded?', + [FAMILY_FIELD_MME_NOTES]: 'Matchmaker Notes', + [FAMILY_FIELD_CODED_PHENOTYPE]: 'Phenotype Description', + [FAMILY_FIELD_MONDO_ID]: 'MONDO ID', + [FAMILY_FIELD_OMIM_NUMBERS]: 'Post-discovery OMIM #', + [FAMILY_FIELD_PMIDS]: 'Publications on this discovery', + [FAMILY_FIELD_INTERNAL_NOTES]: 'Internal Notes', + [FAMILY_FIELD_INTERNAL_SUMMARY]: 'Internal Summary', +} + const SHOW_DATA_LOADED = 'SHOW_DATA_LOADED' const SHOW_ASSIGNED_TO_ME = 'SHOW_ASSIGNED_TO_ME' const SHOW_ANALYSED_BY_ME = 'SHOW_ANALYSED_BY_ME' From 3480fe2c2ca2c22c8ffc72020667b4cfd68fd2b9 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 16 May 2024 12:56:51 -0400 Subject: [PATCH 132/736] updat eunit tests --- seqr/fixtures/1kg_project.json | 1 + seqr/views/utils/test_utils.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/seqr/fixtures/1kg_project.json b/seqr/fixtures/1kg_project.json index 0b959bca28..1b317dbf0a 100644 --- a/seqr/fixtures/1kg_project.json +++ b/seqr/fixtures/1kg_project.json @@ -130,6 +130,7 @@ "analysis_status": "Q", "coded_phenotype": "myopathy", "pubmed_ids": ["34415322", "33665635"], + "external_data": ["M"], "case_review_notes": "
initial notes with uniçøde
\n
test
", "case_review_summary": "
internal case review summary with uniçøde
" } diff --git a/seqr/views/utils/test_utils.py b/seqr/views/utils/test_utils.py index 64809b5991..4110cd4f4b 100644 --- a/seqr/views/utils/test_utils.py +++ b/seqr/views/utils/test_utils.py @@ -746,7 +746,7 @@ def _get_list_param(call, param): 'caseReviewNotes', 'caseReviewSummary' } INTERNAL_FAMILY_FIELDS = { - 'individualGuids', 'successStory', 'successStoryTypes', 'pubmedIds', + 'individualGuids', 'successStory', 'successStoryTypes', 'pubmedIds', 'externalData', } INTERNAL_FAMILY_FIELDS.update(FAMILY_FIELDS) From 8067e8d26993b4dd48e336c13740061c8a6b3941 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 16 May 2024 15:04:34 -0400 Subject: [PATCH 133/736] reduce project failies summary response --- seqr/views/apis/project_api.py | 23 ++++++++++++++--------- seqr/views/utils/orm_to_json_utils.py | 5 ++++- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/seqr/views/apis/project_api.py b/seqr/views/apis/project_api.py index 6bb30121ae..501e48b6f3 100644 --- a/seqr/views/apis/project_api.py +++ b/seqr/views/apis/project_api.py @@ -6,7 +6,7 @@ from collections import defaultdict from django.contrib.postgres.aggregates import ArrayAgg from django.core.exceptions import PermissionDenied -from django.db.models import Count, Max, Q, Case, When, Value +from django.db.models import Count, Max, Q, F, Case, When, Value from django.db.models.functions import JSONObject, TruncDate from django.utils import timezone from notifications.models import Notification @@ -15,11 +15,12 @@ from seqr.models import Project, Family, Individual, Sample, FamilyNote, CAN_EDIT from seqr.views.utils.airtable_utils import AirtableSession, ANVIL_REQUEST_TRACKING_TABLE from seqr.views.utils.individual_utils import delete_individuals -from seqr.views.utils.json_utils import create_json_response, _to_snake_case +from seqr.views.utils.json_utils import create_json_response, _to_snake_case, _to_camel_case from seqr.views.utils.json_to_orm_utils import update_project_from_json, create_model_from_json, update_model_from_json from seqr.views.utils.orm_to_json_utils import _get_json_for_project, get_json_for_samples, \ - get_json_for_project_collaborator_list, get_json_for_matchmaker_submissions, _get_json_for_families, \ - get_json_for_family_notes, _get_json_for_individuals, get_json_for_project_collaborator_groups + get_json_for_project_collaborator_list, get_json_for_matchmaker_submissions, \ + get_json_for_family_notes, _get_json_for_individuals, get_json_for_project_collaborator_groups, \ + FAMILY_ADDITIONAL_VALUES, INDIVIDUAL_GUIDS_VALUES from seqr.views.utils.permissions_utils import get_project_and_check_permissions, check_project_permissions, \ check_user_created_object_permissions, pm_required, user_is_pm, login_and_policies_required, \ has_workspace_perm, has_case_review_permissions, is_internal_anvil_project @@ -192,7 +193,15 @@ def project_families(request, project_guid): individual__phenotypeprioritization__tool__isnull=False, )) ) - family_annotations = dict( + families = family_models.values( + 'description', + **{_to_camel_case(field): F(field) for field in [ + 'family_id', 'analysis_status', 'created_date', 'coded_phenotype', 'mondo_id', + ]}, + familyGuid=F('guid'), + projectGuid=Value(project_guid), + **FAMILY_ADDITIONAL_VALUES, + **INDIVIDUAL_GUIDS_VALUES, caseReviewStatuses=ArrayAgg('individual__case_review_status', distinct=True, filter=~Q(individual__case_review_status='')), caseReviewStatusLastModified=Max('individual__case_review_status_last_modified_date'), hasRequiredMetadata=Case(When(metadata_individual_count__gt=0, then=Value(True)), default=Value(False)), @@ -202,10 +211,6 @@ def project_families(request, project_guid): ), hasPhenotypePrioritization=Case(When(pp_individual_count__gt=0, then=Value(True)), default=Value(False)), ) - families = _get_json_for_families( - family_models, request.user, has_case_review_perm=has_case_review_permissions(project, request.user), - project_guid=project_guid, add_individual_guids_field=True, additional_values=family_annotations, - ) response = families_discovery_tags(families) return create_json_response(response) diff --git a/seqr/views/utils/orm_to_json_utils.py b/seqr/views/utils/orm_to_json_utils.py index fd6d277917..daaa38d98a 100644 --- a/seqr/views/utils/orm_to_json_utils.py +++ b/seqr/views/utils/orm_to_json_utils.py @@ -228,6 +228,9 @@ def _get_case_review_fields(model_cls, has_case_review_perm): ), 'displayName': FAMILY_DISPLAY_NAME_EXPR, } +INDIVIDUAL_GUIDS_VALUES = { + 'individualGuids': ArrayAgg('individual__guid', filter=Q(individual__isnull=False), distinct=True), +} def _get_json_for_families(families, user=None, add_individual_guids_field=False, project_guid=None, is_analyst=None, @@ -240,7 +243,7 @@ def _get_json_for_families(families, user=None, add_individual_guids_field=False if additional_values: family_additional_values.update(additional_values) if add_individual_guids_field: - family_additional_values['individualGuids'] = ArrayAgg('individual__guid', filter=Q(individual__isnull=False), distinct=True) + family_additional_values.update(INDIVIDUAL_GUIDS_VALUES) additional_model_fields = _get_case_review_fields(families.model, has_case_review_perm) nested_fields = [{'fields': ('project', 'guid'), 'value': project_guid}] From b4bda16dfd19f4ab1468670c53ba25d68b0cb99e Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 16 May 2024 16:41:27 -0400 Subject: [PATCH 134/736] updat etests --- seqr/views/apis/project_api_tests.py | 22 +++------------------- seqr/views/utils/test_utils.py | 8 ++++++-- 2 files changed, 9 insertions(+), 21 deletions(-) diff --git a/seqr/views/apis/project_api_tests.py b/seqr/views/apis/project_api_tests.py index 8e0fdc388d..3f07dd8526 100644 --- a/seqr/views/apis/project_api_tests.py +++ b/seqr/views/apis/project_api_tests.py @@ -14,8 +14,8 @@ from seqr.views.utils.terra_api_utils import TerraAPIException, TerraRefreshTokenFailedException from seqr.views.utils.test_utils import AuthenticationTestCase, AnvilAuthenticationTestCase, \ PROJECT_FIELDS, LOCUS_LIST_FIELDS, PA_LOCUS_LIST_FIELDS, NO_INTERNAL_CASE_REVIEW_INDIVIDUAL_FIELDS, \ - SAMPLE_FIELDS, FAMILY_FIELDS, INTERNAL_FAMILY_FIELDS, INTERNAL_INDIVIDUAL_FIELDS, INDIVIDUAL_FIELDS, TAG_TYPE_FIELDS, \ - CASE_REVIEW_FAMILY_FIELDS, FAMILY_NOTE_FIELDS, MATCHMAKER_SUBMISSION_FIELDS, ANALYSIS_GROUP_FIELDS, \ + SAMPLE_FIELDS, SUMMARY_FAMILY_FIELDS, INTERNAL_INDIVIDUAL_FIELDS, INDIVIDUAL_FIELDS, TAG_TYPE_FIELDS, \ + FAMILY_NOTE_FIELDS, MATCHMAKER_SUBMISSION_FIELDS, ANALYSIS_GROUP_FIELDS, \ EXT_WORKSPACE_NAMESPACE, EXT_WORKSPACE_NAME, DYNAMIC_ANALYSIS_GROUP_FIELDS PROJECT_GUID = 'R0001_1kg' @@ -369,7 +369,7 @@ def test_project_families(self): 'individualGuids', 'discoveryTags', 'caseReviewStatuses', 'caseReviewStatusLastModified', 'hasRequiredMetadata', 'parents', 'hasPhenotypePrioritization', } - family_fields.update(FAMILY_FIELDS) + family_fields.update(SUMMARY_FAMILY_FIELDS) self.assertSetEqual(set(family_1.keys()), family_fields) self.assertEqual(len(family_1['individualGuids']), 3) @@ -401,22 +401,6 @@ def test_project_families(self): empty_url = reverse(project_families, args=[EMPTY_PROJECT_GUID]) self._check_empty_project(empty_url, response_keys) - # Test analyst users have internal fields returned - self.login_analyst_user() - response = self.client.get(url) - self.assertEqual(response.status_code, 200) - - response_json = response.json() - family_fields.update(CASE_REVIEW_FAMILY_FIELDS) - internal_fields = deepcopy(family_fields) - internal_fields.update(INTERNAL_FAMILY_FIELDS) - self.assertSetEqual(set(next(iter(response_json['familiesByGuid'].values())).keys()), internal_fields) - - self.mock_analyst_group.__str__.return_value = '' - response = self.client.get(url) - self.assertEqual(response.status_code, 200) - self.assertSetEqual(set(next(iter(response.json()['familiesByGuid'].values())).keys()), family_fields) - def test_project_individuals(self): url = reverse(project_individuals, args=[PROJECT_GUID]) self.check_collaborator_login(url) diff --git a/seqr/views/utils/test_utils.py b/seqr/views/utils/test_utils.py index 64809b5991..50b5ec5cb5 100644 --- a/seqr/views/utils/test_utils.py +++ b/seqr/views/utils/test_utils.py @@ -737,11 +737,15 @@ def _get_list_param(call, param): ANALYSIS_GROUP_FIELDS = {'analysisGroupGuid', 'description', 'name', 'projectGuid', 'familyGuids'} DYNAMIC_ANALYSIS_GROUP_FIELDS = {'analysisGroupGuid', 'criteria', 'name', 'projectGuid'} +SUMMARY_FAMILY_FIELDS = { + 'projectGuid', 'familyGuid', 'analysedBy', 'familyId', 'displayName', 'description', + 'analysisStatus', 'createdDate', 'assignedAnalyst', 'codedPhenotype', 'mondoId', +} FAMILY_FIELDS = { - 'projectGuid', 'familyGuid', 'analysedBy', 'pedigreeImage', 'familyId', 'displayName', 'description', - 'analysisStatus', 'pedigreeImage', 'createdDate', 'assignedAnalyst', 'codedPhenotype', 'postDiscoveryOmimNumbers', + 'pedigreeImage', 'postDiscoveryOmimNumbers', 'pedigreeDataset', 'analysisStatusLastModifiedDate', 'analysisStatusLastModifiedBy', 'mondoId', } +FAMILY_FIELDS.update(SUMMARY_FAMILY_FIELDS) CASE_REVIEW_FAMILY_FIELDS = { 'caseReviewNotes', 'caseReviewSummary' } From 8672039e5a9964f99b9a9920b373b28cfeee0503 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 16 May 2024 17:15:21 -0400 Subject: [PATCH 135/736] more efficient query for phentoype pripritization --- seqr/views/apis/project_api.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/seqr/views/apis/project_api.py b/seqr/views/apis/project_api.py index 6bb30121ae..5c97db17cc 100644 --- a/seqr/views/apis/project_api.py +++ b/seqr/views/apis/project_api.py @@ -6,13 +6,13 @@ from collections import defaultdict from django.contrib.postgres.aggregates import ArrayAgg from django.core.exceptions import PermissionDenied -from django.db.models import Count, Max, Q, Case, When, Value +from django.db.models import Count, Max, Q, F, Case, When, Value from django.db.models.functions import JSONObject, TruncDate from django.utils import timezone from notifications.models import Notification from matchmaker.models import MatchmakerSubmission -from seqr.models import Project, Family, Individual, Sample, FamilyNote, CAN_EDIT +from seqr.models import Project, Family, Individual, Sample, FamilyNote, PhenotypePrioritization, CAN_EDIT from seqr.views.utils.airtable_utils import AirtableSession, ANVIL_REQUEST_TRACKING_TABLE from seqr.views.utils.individual_utils import delete_individuals from seqr.views.utils.json_utils import create_json_response, _to_snake_case @@ -188,11 +188,9 @@ def project_families(request, project_guid): individual__features__0__isnull=False, individual__birth_year__isnull=False, individual__population__isnull=False, individual__proband_relationship__isnull=False, )), - pp_individual_count=Count('individual', filter=Q( - individual__phenotypeprioritization__tool__isnull=False, - )) ) family_annotations = dict( + _id=F('id'), caseReviewStatuses=ArrayAgg('individual__case_review_status', distinct=True, filter=~Q(individual__case_review_status='')), caseReviewStatusLastModified=Max('individual__case_review_status_last_modified_date'), hasRequiredMetadata=Case(When(metadata_individual_count__gt=0, then=Value(True)), default=Value(False)), @@ -200,12 +198,15 @@ def project_families(request, project_guid): JSONObject(paternalGuid='individual__father__guid', maternalGuid='individual__mother__guid'), filter=Q(individual__mother__isnull=False) | Q(individual__father__isnull=False), distinct=True, ), - hasPhenotypePrioritization=Case(When(pp_individual_count__gt=0, then=Value(True)), default=Value(False)), ) families = _get_json_for_families( family_models, request.user, has_case_review_perm=has_case_review_permissions(project, request.user), project_guid=project_guid, add_individual_guids_field=True, additional_values=family_annotations, ) + phenotype_priority_family_ids = set(PhenotypePrioritization.objects.filter( + individual__family__project=project).values_list('individual__family', flat=True).distinct()) + for family in families: + family['hasPhenotypePrioritization'] = family.pop('_id') in phenotype_priority_family_ids response = families_discovery_tags(families) return create_json_response(response) From 9d67b1342f919a5bbe4ccd817a70c38b83df5b7e Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 16 May 2024 17:23:52 -0400 Subject: [PATCH 136/736] improve discovery tags query --- seqr/views/apis/project_api.py | 2 +- seqr/views/utils/project_context_utils.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/seqr/views/apis/project_api.py b/seqr/views/apis/project_api.py index 5c97db17cc..3757fd77c5 100644 --- a/seqr/views/apis/project_api.py +++ b/seqr/views/apis/project_api.py @@ -207,7 +207,7 @@ def project_families(request, project_guid): individual__family__project=project).values_list('individual__family', flat=True).distinct()) for family in families: family['hasPhenotypePrioritization'] = family.pop('_id') in phenotype_priority_family_ids - response = families_discovery_tags(families) + response = families_discovery_tags(families, project=project) return create_json_response(response) diff --git a/seqr/views/utils/project_context_utils.py b/seqr/views/utils/project_context_utils.py index e8fdea5c45..c4a684cfe3 100644 --- a/seqr/views/utils/project_context_utils.py +++ b/seqr/views/utils/project_context_utils.py @@ -110,11 +110,12 @@ def add_child_ids(response): family['individualGuids'] = individual_guids_by_family[family['familyGuid']] -def families_discovery_tags(families): +def families_discovery_tags(families, project=None): families_by_guid = {f['familyGuid']: dict(discoveryTags=[], **f) for f in families} + family_filter = {'family__project': project} if project else {'family__guid__in': families_by_guid.keys()} discovery_tags = get_json_for_saved_variants(SavedVariant.objects.filter( - family__guid__in=families_by_guid.keys(), varianttag__variant_tag_type__category='CMG Discovery Tags', + varianttag__variant_tag_type__category='CMG Discovery Tags', **family_filter, ), add_details=True) gene_ids = set() From f0dd965570ecf992386ced0c1b5ebb93b430dd02 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 16 May 2024 17:26:22 -0400 Subject: [PATCH 137/736] remove unnecessary project tablejoin --- seqr/views/apis/project_api.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/seqr/views/apis/project_api.py b/seqr/views/apis/project_api.py index 3757fd77c5..e2231589dd 100644 --- a/seqr/views/apis/project_api.py +++ b/seqr/views/apis/project_api.py @@ -203,10 +203,11 @@ def project_families(request, project_guid): family_models, request.user, has_case_review_perm=has_case_review_permissions(project, request.user), project_guid=project_guid, add_individual_guids_field=True, additional_values=family_annotations, ) + families_by_id = {f.pop('_id'): f for f in families} phenotype_priority_family_ids = set(PhenotypePrioritization.objects.filter( - individual__family__project=project).values_list('individual__family', flat=True).distinct()) - for family in families: - family['hasPhenotypePrioritization'] = family.pop('_id') in phenotype_priority_family_ids + individual__family_id__in=families_by_id).values_list('individual__family', flat=True).distinct()) + for family_id, family in families_by_id.items(): + family['hasPhenotypePrioritization'] = family_id in phenotype_priority_family_ids response = families_discovery_tags(families, project=project) return create_json_response(response) From 9b7feefa3adda342a8bc613e69325df99e62e690 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 16 May 2024 18:02:45 -0400 Subject: [PATCH 138/736] separate individual query --- seqr/views/apis/project_api.py | 40 ++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/seqr/views/apis/project_api.py b/seqr/views/apis/project_api.py index e2231589dd..8b41cfa78c 100644 --- a/seqr/views/apis/project_api.py +++ b/seqr/views/apis/project_api.py @@ -183,27 +183,39 @@ def project_page_data(request, project_guid): @login_and_policies_required def project_families(request, project_guid): project = get_project_and_check_permissions(project_guid, request.user) - family_models = Family.objects.filter(project=project).annotate( - metadata_individual_count=Count('individual', filter=Q( - individual__features__0__isnull=False, individual__birth_year__isnull=False, - individual__population__isnull=False, individual__proband_relationship__isnull=False, - )), - ) + family_models = Family.objects.filter(project=project) family_annotations = dict( _id=F('id'), - caseReviewStatuses=ArrayAgg('individual__case_review_status', distinct=True, filter=~Q(individual__case_review_status='')), - caseReviewStatusLastModified=Max('individual__case_review_status_last_modified_date'), - hasRequiredMetadata=Case(When(metadata_individual_count__gt=0, then=Value(True)), default=Value(False)), - parents=ArrayAgg( - JSONObject(paternalGuid='individual__father__guid', maternalGuid='individual__mother__guid'), - filter=Q(individual__mother__isnull=False) | Q(individual__father__isnull=False), distinct=True, - ), ) families = _get_json_for_families( family_models, request.user, has_case_review_perm=has_case_review_permissions(project, request.user), - project_guid=project_guid, add_individual_guids_field=True, additional_values=family_annotations, + project_guid=project_guid, add_individual_guids_field=False, additional_values=family_annotations, ) families_by_id = {f.pop('_id'): f for f in families} + # TODO multiple joins against individual table to get parent annotations + family_individuals = Individual.objects.filter(family_id__in=families_by_id).values('family_id').annotate( + caseReviewStatuses=ArrayAgg('case_review_status', distinct=True, filter=~Q(case_review_status='')), + caseReviewStatusLastModified=Max('case_review_status_last_modified_date'), + parental_ids=ArrayAgg(JSONObject(**{k: k for k in ['id', 'guid', 'father_id', 'mother_id']})), + metadata_count=Count('id', filter=Q( + features__0__isnull=False, birth_year__isnull=False, + population__isnull=False, proband_relationship__isnull=False, + )), + ) + for individual_agg in family_individuals: + family = families_by_id[individual_agg.pop('family_id')] + parental_ids = individual_agg.pop('parental_ids') + id_guid_map = {i['id']: i['guid'] for i in parental_ids} + family.update({ + 'individualGuids': sorted(id_guid_map.values()), + 'hasRequiredMetadata': individual_agg.pop('metadata_count') > 0, + 'parents': [ + {'paternalGuid': id_guid_map.get(p['father_id']), 'maternalGuid': id_guid_map.get(p['mother_id'])} + for p in parental_ids if p['father_id'] or p['mother_id'] + ], + **individual_agg, + }) + phenotype_priority_family_ids = set(PhenotypePrioritization.objects.filter( individual__family_id__in=families_by_id).values_list('individual__family', flat=True).distinct()) for family_id, family in families_by_id.items(): From 8822b5ef42183517f8c5146acd3b3141c24887cf Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 16 May 2024 18:21:34 -0400 Subject: [PATCH 139/736] better phenotype priority logic --- seqr/views/apis/project_api.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/seqr/views/apis/project_api.py b/seqr/views/apis/project_api.py index 8b41cfa78c..3ae01421a8 100644 --- a/seqr/views/apis/project_api.py +++ b/seqr/views/apis/project_api.py @@ -183,16 +183,16 @@ def project_page_data(request, project_guid): @login_and_policies_required def project_families(request, project_guid): project = get_project_and_check_permissions(project_guid, request.user) + family_models = Family.objects.filter(project=project) - family_annotations = dict( - _id=F('id'), - ) families = _get_json_for_families( family_models, request.user, has_case_review_perm=has_case_review_permissions(project, request.user), - project_guid=project_guid, add_individual_guids_field=False, additional_values=family_annotations, + project_guid=project_guid, add_individual_guids_field=False, additional_values={'_id': F('id')}, ) families_by_id = {f.pop('_id'): f for f in families} - # TODO multiple joins against individual table to get parent annotations + + phenotype_priority_families = set(PhenotypePrioritization.objects.filter( + individual__family_id__in=families_by_id).values_list('individual__family_id', flat=True).distinct()) family_individuals = Individual.objects.filter(family_id__in=families_by_id).values('family_id').annotate( caseReviewStatuses=ArrayAgg('case_review_status', distinct=True, filter=~Q(case_review_status='')), caseReviewStatusLastModified=Max('case_review_status_last_modified_date'), @@ -203,11 +203,12 @@ def project_families(request, project_guid): )), ) for individual_agg in family_individuals: - family = families_by_id[individual_agg.pop('family_id')] + family_id = individual_agg.pop('family_id') parental_ids = individual_agg.pop('parental_ids') id_guid_map = {i['id']: i['guid'] for i in parental_ids} - family.update({ + families_by_id[family_id].update({ 'individualGuids': sorted(id_guid_map.values()), + 'hasPhenotypePrioritization': family_id in phenotype_priority_families, 'hasRequiredMetadata': individual_agg.pop('metadata_count') > 0, 'parents': [ {'paternalGuid': id_guid_map.get(p['father_id']), 'maternalGuid': id_guid_map.get(p['mother_id'])} @@ -216,10 +217,6 @@ def project_families(request, project_guid): **individual_agg, }) - phenotype_priority_family_ids = set(PhenotypePrioritization.objects.filter( - individual__family_id__in=families_by_id).values_list('individual__family', flat=True).distinct()) - for family_id, family in families_by_id.items(): - family['hasPhenotypePrioritization'] = family_id in phenotype_priority_family_ids response = families_discovery_tags(families, project=project) return create_json_response(response) From 3f445b4a67e9f077e521ba05bd6b8b4be1132475 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 17 May 2024 13:49:24 -0400 Subject: [PATCH 140/736] allow callset file extension .bed.gz --- seqr/views/apis/data_manager_api.py | 2 +- seqr/views/apis/data_manager_api_tests.py | 2 +- seqr/views/apis/feature_updates_api.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index fa8c8bbca0..56ee2b285f 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -436,7 +436,7 @@ def write_pedigree(request, project_guid): DATA_TYPE_FILE_EXTS = { Sample.DATASET_TYPE_MITO_CALLS: ('.mt',), - Sample.DATASET_TYPE_SV_CALLS: ('.bed',), + Sample.DATASET_TYPE_SV_CALLS: ('.bed', '.bed.gz'), } LOADABLE_PDO_STATUSES = [ diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index 9bb975e5a2..81e9607b31 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -1398,7 +1398,7 @@ def test_validate_callset(self, mock_subprocess): response = self.client.post(url, content_type='application/json', data=json.dumps(body)) self.assertEqual(response.status_code, 400) self.assertListEqual(response.json()['errors'], [ - 'Invalid VCF file format - file path must end with .bed or .vcf or .vcf.gz or .vcf.bgz', + 'Invalid VCF file format - file path must end with .bed or .bed.gz or .vcf or .vcf.gz or .vcf.bgz', ]) body['datasetType'] = 'MITO' diff --git a/seqr/views/apis/feature_updates_api.py b/seqr/views/apis/feature_updates_api.py index 2fccead30e..cdc91e706d 100644 --- a/seqr/views/apis/feature_updates_api.py +++ b/seqr/views/apis/feature_updates_api.py @@ -6,7 +6,7 @@ from seqr.views.utils.json_utils import create_json_response FEED_URL = ( - "https://github.com/broadinstitute/seqr/discussions/categories/feature-updates.atom" + "https://github.com/community/community/discussions.atom" ) TIMEOUT = 5 From 8a8e4962359051440c34838740f3a7bd7392c766 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 17 May 2024 13:50:08 -0400 Subject: [PATCH 141/736] not that --- seqr/views/apis/feature_updates_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seqr/views/apis/feature_updates_api.py b/seqr/views/apis/feature_updates_api.py index cdc91e706d..2fccead30e 100644 --- a/seqr/views/apis/feature_updates_api.py +++ b/seqr/views/apis/feature_updates_api.py @@ -6,7 +6,7 @@ from seqr.views.utils.json_utils import create_json_response FEED_URL = ( - "https://github.com/community/community/discussions.atom" + "https://github.com/broadinstitute/seqr/discussions/categories/feature-updates.atom" ) TIMEOUT = 5 From 84604621db3da256139076e06e76707c8167ab04 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 17 May 2024 14:21:48 -0400 Subject: [PATCH 142/736] optimiz adding project tags --- seqr/views/apis/family_api.py | 2 +- seqr/views/apis/individual_api.py | 5 +- seqr/views/apis/project_api.py | 6 +-- seqr/views/utils/project_context_utils.py | 61 ++++++++++++----------- 4 files changed, 38 insertions(+), 36 deletions(-) diff --git a/seqr/views/apis/family_api.py b/seqr/views/apis/family_api.py index 3921a5b889..a780b93d5f 100644 --- a/seqr/views/apis/family_api.py +++ b/seqr/views/apis/family_api.py @@ -139,7 +139,7 @@ def family_variant_tag_summary(request, family_guid): saved_variants__matchmakersubmissiongenes__isnull=False).values('saved_variants__guid').distinct().count() response['projectsByGuid'] = {project.guid: {}} - add_project_tag_types(response['projectsByGuid']) + add_project_tag_types(response['projectsByGuid'], project=project) return create_json_response(response) diff --git a/seqr/views/apis/individual_api.py b/seqr/views/apis/individual_api.py index ec74854e85..159291cb86 100644 --- a/seqr/views/apis/individual_api.py +++ b/seqr/views/apis/individual_api.py @@ -24,7 +24,7 @@ from seqr.views.utils.permissions_utils import get_project_and_check_permissions, check_project_permissions, \ get_project_and_check_pm_permissions, login_and_policies_required, has_project_permissions, project_has_anvil, \ is_internal_anvil_project, pm_or_data_manager_required, check_workspace_perm -from seqr.views.utils.project_context_utils import add_project_tag_types +from seqr.views.utils.project_context_utils import add_project_tag_type_counts from seqr.views.utils.individual_utils import delete_individuals, add_or_update_individuals_and_families from seqr.views.utils.variant_utils import bulk_create_tagged_variants @@ -936,8 +936,7 @@ def import_gregor_metadata(request, project_guid): ) info.append(f'Loaded {num_new} new and {num_updated} updated findings tags') - response_json['projectsByGuid'] = {project_guid: {}} - response_json['familyTagTypeCounts'] = add_project_tag_types(response_json['projectsByGuid'], add_counts=True) + add_project_tag_type_counts(project, response_json) response_json['importStats'] = {'gregorMetadata': {'info': info, 'warnings': warnings}} return create_json_response(response_json) diff --git a/seqr/views/apis/project_api.py b/seqr/views/apis/project_api.py index 3ae01421a8..08ff2b93d9 100644 --- a/seqr/views/apis/project_api.py +++ b/seqr/views/apis/project_api.py @@ -24,7 +24,7 @@ check_user_created_object_permissions, pm_required, user_is_pm, login_and_policies_required, \ has_workspace_perm, has_case_review_permissions, is_internal_anvil_project from seqr.views.utils.project_context_utils import families_discovery_tags, \ - add_project_tag_types, get_project_analysis_groups, get_project_locus_lists + add_project_tag_type_counts, get_project_analysis_groups, get_project_locus_lists from seqr.views.utils.terra_api_utils import is_anvil_authenticated, anvil_enabled from settings import BASE_URL @@ -241,12 +241,12 @@ def project_overview(request, project_guid): s['familyCounts'] = {f: s['familyCounts'].count(f) for f in s['familyCounts']} grouped_sample_counts[f'{s.pop("sample_type")}__{s.pop("dataset_type")}'].append(s) + project_json = {'projectGuid': project_guid, 'sampleCounts': grouped_sample_counts} response = { - 'projectsByGuid': {project_guid: {'projectGuid': project_guid, 'sampleCounts': grouped_sample_counts}}, 'samplesByGuid': samples_by_guid, } - response['familyTagTypeCounts'] = add_project_tag_types(response['projectsByGuid'], add_counts=True) + add_project_tag_type_counts(project, response, project_json=project_json) project_mme_submissions = MatchmakerSubmission.objects.filter(individual__family__project=project) diff --git a/seqr/views/utils/project_context_utils.py b/seqr/views/utils/project_context_utils.py index c4a684cfe3..c737ea138e 100644 --- a/seqr/views/utils/project_context_utils.py +++ b/seqr/views/utils/project_context_utils.py @@ -1,10 +1,10 @@ from collections import defaultdict -from django.db.models import Count, Q, prefetch_related_objects +from django.db.models import Count, Q, F, prefetch_related_objects from seqr.models import Individual, IgvSample, AnalysisGroup, DynamicAnalysisGroup, LocusList, VariantTagType,\ VariantFunctionalData, FamilyNote, SavedVariant, VariantTag, VariantNote from seqr.utils.gene_utils import get_genes -from seqr.views.utils.orm_to_json_utils import _get_json_for_families, _get_json_for_individuals, _get_json_for_models, \ +from seqr.views.utils.orm_to_json_utils import _get_json_for_families, _get_json_for_individuals, get_json_for_queryset, \ get_json_for_analysis_groups, get_json_for_samples, get_json_for_locus_lists, \ get_json_for_family_notes, get_json_for_saved_variants @@ -133,20 +133,20 @@ def families_discovery_tags(families, project=None): MME_TAG_NAME = 'MME Submission' -def add_project_tag_types(projects_by_guid, add_counts=False): - variant_tag_types_models = VariantTagType.objects.filter(Q(project__guid__in=projects_by_guid.keys()) | Q(project__isnull=True)) - variant_tag_types = _get_json_for_models(variant_tag_types_models) +def add_project_tag_types(projects_by_guid, project=None): + is_single_project = len(projects_by_guid) == 1 + project_q = dict(project=project) if project else dict(project__guid__in=projects_by_guid.keys()) + variant_tag_types_models = VariantTagType.objects.filter(Q(**project_q) | Q(project__isnull=True)) + variant_tag_types = get_json_for_queryset( + variant_tag_types_models, nested_fields=None if is_single_project else [{'fields': ('project', 'guid')}]) project_tag_types = defaultdict(list) - if len(projects_by_guid) == 1: + if is_single_project: project_guid = next(iter((projects_by_guid.keys()))) - project_tag_types[project_guid] = variant_tag_types + project_tag_types[project_guid] = list(variant_tag_types) else: - prefetch_related_objects(variant_tag_types_models, 'project') - variant_tag_types_by_guid = {vtt['variantTagTypeGuid']: vtt for vtt in variant_tag_types} - for vtt in variant_tag_types_models: - project_guid = vtt.project.guid if vtt.project else None - project_tag_types[project_guid].append(variant_tag_types_by_guid[vtt.guid]) + for vtt in variant_tag_types: + project_tag_types[vtt.pop('projectGuid')].append(vtt) project_tag_types[None].append({ 'variantTagTypeGuid': 'mmeSubmissionVariants', @@ -157,7 +157,6 @@ def add_project_tag_types(projects_by_guid, add_counts=False): 'order': 99, }) - family_counts = {} for project_guid, project_json in projects_by_guid.items(): project_json.update({ 'variantTagTypes': sorted( @@ -166,17 +165,18 @@ def add_project_tag_types(projects_by_guid, add_counts=False): ), 'variantFunctionalTagTypes': VariantFunctionalData.FUNCTIONAL_DATA_TAG_TYPES, }) - if add_counts: - family_counts.update(_add_tag_type_counts(project_guid, project_json['variantTagTypes'])) - return family_counts +def add_project_tag_type_counts(project, response_json, project_json=None): + response_json['projectsByGuid'] = {project.guid: project_json or {}} + add_project_tag_types(response_json['projectsByGuid'], project=project) -def _add_tag_type_counts(project_guid, project_variant_tags): - project_tags = VariantTag.objects.filter(saved_variants__family__project__guid=project_guid) - project_notes = VariantNote.objects.filter(saved_variants__family__project__guid=project_guid) + saved_variants = SavedVariant.objects.filter(family__project=project) + project_tags = VariantTag.objects.filter(saved_variants__in=saved_variants) + project_notes = VariantNote.saved_variants.through.objects.filter(savedvariant_id__in=saved_variants) family_tag_type_counts = defaultdict(dict) + note_tag_type = { 'variantTagTypeGuid': 'notes', 'name': 'Has Notes', @@ -184,24 +184,27 @@ def _add_tag_type_counts(project_guid, project_variant_tags): 'description': '', 'color': 'grey', 'order': 100, - 'numTags': project_notes.aggregate(count=Count('saved_variants__guid', distinct=True))['count'], + 'numTags': project_notes.values_list('savedvariant_id').distinct().count(), } - mme_counts_by_family = project_tags.filter(saved_variants__matchmakersubmissiongenes__isnull=False) \ - .values('saved_variants__family__guid').annotate(count=Count('saved_variants__guid', distinct=True)) + mme_counts_by_family = saved_variants.filter(matchmakersubmissiongenes__isnull=False) \ + .values(family_guid=F('family__guid')).annotate(count=Count('guid', distinct=True)) + + tag_counts_by_type_and_family = defaultdict(list) + for counts in project_tags.values( + 'variant_tag_type__name', family_guid=F('saved_variants__family__guid')).annotate(count=Count('guid', distinct=True)): + tag_counts_by_type_and_family[counts['variant_tag_type__name']].append(counts) + tag_counts_by_type_and_family[MME_TAG_NAME] = mme_counts_by_family - tag_counts_by_type_and_family = project_tags.values( - 'saved_variants__family__guid', 'variant_tag_type__name').annotate(count=Count('guid', distinct=True)) + project_variant_tags = project_json['variantTagTypes'] for tag_type in project_variant_tags: - current_tag_type_counts = mme_counts_by_family if tag_type['name'] == MME_TAG_NAME else [ - counts for counts in tag_counts_by_type_and_family if counts['variant_tag_type__name'] == tag_type['name'] - ] + current_tag_type_counts = tag_counts_by_type_and_family[tag_type['name']] num_tags = sum(count['count'] for count in current_tag_type_counts) tag_type.update({ 'numTags': num_tags, }) for count in current_tag_type_counts: - family_tag_type_counts[count['saved_variants__family__guid']].update({tag_type['name']: count['count']}) + family_tag_type_counts[count['family_guid']].update({tag_type['name']: count['count']}) project_variant_tags.append(note_tag_type) - return family_tag_type_counts + response_json['familyTagTypeCounts'] = family_tag_type_counts From 9d030fb419679e8eda5fccfed71e708443c52a50 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 17 May 2024 14:37:37 -0400 Subject: [PATCH 143/736] more efficient sample query --- seqr/views/apis/project_api.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/seqr/views/apis/project_api.py b/seqr/views/apis/project_api.py index 08ff2b93d9..8acfd533ab 100644 --- a/seqr/views/apis/project_api.py +++ b/seqr/views/apis/project_api.py @@ -227,11 +227,9 @@ def project_overview(request, project_guid): sample_models = Sample.objects.filter(individual__family__project=project) - active_samples = sample_models.filter(is_active=True) - first_loaded_samples = sample_models.order_by('individual__family', 'loaded_date').distinct('individual__family') - samples_by_guid = {} - for samples in [active_samples, first_loaded_samples]: - samples_by_guid.update({s['sampleGuid']: s for s in get_json_for_samples(samples, project_guid=project_guid)}) + first_loaded_samples = sample_models.order_by('individual__family', 'loaded_date').distinct('individual__family').values_list('id', flat=True) + samples = sample_models.filter(Q(is_active=True) | Q(id__in=first_loaded_samples)) + samples_by_guid = {s['sampleGuid']: s for s in get_json_for_samples(samples, project_guid=project_guid)} sample_load_counts = sample_models.values( 'sample_type', 'dataset_type', loadedDate=TruncDate('loaded_date'), From a75533cb6414acedd31b4f0c29027a6e93585696 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 17 May 2024 15:15:27 -0400 Subject: [PATCH 144/736] fix default val --- seqr/views/utils/project_context_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/seqr/views/utils/project_context_utils.py b/seqr/views/utils/project_context_utils.py index c737ea138e..d176cea1a2 100644 --- a/seqr/views/utils/project_context_utils.py +++ b/seqr/views/utils/project_context_utils.py @@ -168,7 +168,8 @@ def add_project_tag_types(projects_by_guid, project=None): def add_project_tag_type_counts(project, response_json, project_json=None): - response_json['projectsByGuid'] = {project.guid: project_json or {}} + project_json = project_json or {} + response_json['projectsByGuid'] = {project.guid: project_json} add_project_tag_types(response_json['projectsByGuid'], project=project) saved_variants = SavedVariant.objects.filter(family__project=project) From 59d434d3a09c13f1758d6951b815a00c9f8bbd89 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 17 May 2024 16:13:52 -0400 Subject: [PATCH 145/736] do not allow mito 37 variant lookup --- seqr/utils/search/hail_search_utils.py | 3 +++ seqr/utils/search/hail_search_utils_tests.py | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/seqr/utils/search/hail_search_utils.py b/seqr/utils/search/hail_search_utils.py index 0ec504d1fa..2977ef697d 100644 --- a/seqr/utils/search/hail_search_utils.py +++ b/seqr/utils/search/hail_search_utils.py @@ -87,6 +87,9 @@ def _execute_lookup(variant_id, data_type, user, **kwargs): def hail_variant_lookup(user, variant_id, **kwargs): + if kwargs.get('genome_version') == 'GRCh37' and variant_id[0] == 'M': + from seqr.utils.search.utils import InvalidSearchException + raise InvalidSearchException('Mitochondrial variants are not supported in GRCh37') variant, _ = _execute_lookup(variant_id, Sample.DATASET_TYPE_VARIANT_CALLS, user, **kwargs) return variant diff --git a/seqr/utils/search/hail_search_utils_tests.py b/seqr/utils/search/hail_search_utils_tests.py index 8f0cb092cb..512ba7fa4a 100644 --- a/seqr/utils/search/hail_search_utils_tests.py +++ b/seqr/utils/search/hail_search_utils_tests.py @@ -261,6 +261,12 @@ def test_variant_lookup(self): 'variant_id': ['1', 10439, 'AC', 'A'], 'genome_version': 'GRCh38', 'data_type': 'SNV_INDEL', }) + # Test mitochondrial variant lookup + responses.add(responses.POST, f'{MOCK_HOST}:5000/lookup', status=400) + with self.assertRaises(InvalidSearchException) as cm: + variant_lookup(self.user, ('M', 11018, 'G', 'T'), genome_version='37') + self.assertEqual(str(cm.exception), 'Mitochondrial variants are not supported in GRCh37') + @responses.activate def test_sv_variant_lookup(self): sv_families = Family.objects.filter(id__in=[2, 14]) From 0ef4dd41aca8035e61ee47ef2f2570f507046b9c Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 20 May 2024 13:02:00 -0400 Subject: [PATCH 146/736] do not call mv if no new sample data loaded --- seqr/views/apis/data_manager_api.py | 3 ++- seqr/views/apis/data_manager_api_tests.py | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index 56ee2b285f..1de0c6c452 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -294,7 +294,8 @@ def _save_sample_data(sample_guid, sample_data): except ValueError as e: return create_json_response({'error': str(e)}, status=400) - mv_file_to_gs(f'{file_dir}/*', f'{TEMP_GS_BUCKET}/{file_name_prefix}', request.user) + if sample_guids: + mv_file_to_gs(f'{file_dir}/*', f'{TEMP_GS_BUCKET}/{file_name_prefix}', request.user) return create_json_response({ 'info': info, diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index 81e9607b31..fa1c7f03b0 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -956,6 +956,7 @@ def _set_file_iter_stdout(rows): # Test already loaded data mock_send_slack.reset_mock() + mock_subprocess.reset_mock() self.reset_logs() _set_file_iter_stdout([header, loaded_data_row]) body['file'] = 'gs://rna_data/muscle_samples.tsv.gz' @@ -970,6 +971,11 @@ def _set_file_iter_stdout(rows): self._has_expected_file_loading_logs('gs://rna_data/muscle_samples.tsv.gz', info=info, warnings=warnings, user=self.pm_user) self.assertEqual(model_cls.objects.count(), params['initial_model_count']) mock_send_slack.assert_not_called() + self.assertEqual(mock_subprocess.call_count, 2) + mock_subprocess.assert_has_calls([mock.call(command, stdout=-1, stderr=-2, shell=True) for command in [ # nosec + f'gsutil ls {body["file"]}', + f'gsutil cat {body["file"]} | gunzip -c -q - ', + ]]) def _test_basic_data_loading(data, num_parsed_samples, num_loaded_samples, new_sample_individual_id, body, project_names, num_created_samples=1, warnings=None, additional_logs=None): From 0d287582a27a743d15b0e313632f2d9843b28096 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Mon, 20 May 2024 15:37:44 -0400 Subject: [PATCH 147/736] ignore super panels --- panelapp/panelapp_utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/panelapp/panelapp_utils.py b/panelapp/panelapp_utils.py index d9540e777e..edad4c9ec5 100644 --- a/panelapp/panelapp_utils.py +++ b/panelapp/panelapp_utils.py @@ -40,6 +40,8 @@ def _extract_ensembl_id_from_json(raw_gene_json): panel_genes_url = '{}/panels/{}/genes'.format(panel_app_api_url, panel_app_id) pa_locus_list = _create_or_update_locus_list_from_panel(user, panel_genes_url, panel, label) all_genes_for_panel = genes_by_panel_id.get(panel_app_id, []) + if not all_genes_for_panel: + continue # Genes in 'super panels' are associated with sub panels panel_genes_by_id = {_extract_ensembl_id_from_json(gene): gene for gene in all_genes_for_panel if _extract_ensembl_id_from_json(gene)} raw_ensbl_38_gene_ids_csv = ','.join(panel_genes_by_id.keys()) From d5a0ba98a391564f2e1dd0d58d10eda905d9a514 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 20 May 2024 16:09:10 -0400 Subject: [PATCH 148/736] fix file creation --- seqr/views/apis/data_manager_api.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index 1de0c6c452..5e0054968e 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -278,6 +278,7 @@ def update_rna_seq(request): file_name_prefix = f'rna_sample_data__{data_type}__{datetime.now().isoformat()}' file_dir = os.path.join(get_temp_upload_directory(), file_name_prefix) + os.mkdir(file_dir) sample_files = {} @@ -294,6 +295,10 @@ def _save_sample_data(sample_guid, sample_data): except ValueError as e: return create_json_response({'error': str(e)}, status=400) + for f in sample_files.values(): + # Required to ensure gzipped files are properly encoded/ terminated + f.close() + if sample_guids: mv_file_to_gs(f'{file_dir}/*', f'{TEMP_GS_BUCKET}/{file_name_prefix}', request.user) From 9b99da3831b2c78c43756c3e370f315369449073 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 20 May 2024 16:11:24 -0400 Subject: [PATCH 149/736] rename function --- seqr/views/utils/dataset_utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/seqr/views/utils/dataset_utils.py b/seqr/views/utils/dataset_utils.py index 7e2ffd7d17..4fa33c2048 100644 --- a/seqr/views/utils/dataset_utils.py +++ b/seqr/views/utils/dataset_utils.py @@ -335,7 +335,7 @@ def _validate_rna_header(header, column_map): def _load_rna_seq_file( - file_path, user, potential_loaded_samples, update_sample_models, save_sample_data, get_matched_sample, + file_path, user, potential_loaded_samples, update_sample_models, save_sample_data, match_sample, column_map, mapping_file=None, allow_missing_gene=False, ignore_extra_samples=False, ): @@ -382,7 +382,7 @@ def _load_rna_seq_file( if any(row_gene_ids): gene_ids.update(row_gene_ids) - get_matched_sample(sample_key, unmatched_samples, sample_id_to_individual_id_mapping) + match_sample(sample_key, unmatched_samples, sample_id_to_individual_id_mapping) if missing_required_fields or (unmatched_samples and not ignore_extra_samples) or (sample_key in unmatched_samples): # If there are definite errors, do not process/save data, just continue to check for additional errors @@ -485,7 +485,7 @@ def save_sample_data(sample_key, sample_data): save_data(sample_key, sample_data) - def get_matched_sample(sample_key, unmatched_samples, sample_id_to_individual_id_mapping): + def match_sample(sample_key, unmatched_samples, sample_id_to_individual_id_mapping): if sample_key in potential_samples: sample = potential_samples[sample_key] sample_guid = sample['guid'] @@ -503,7 +503,7 @@ def get_matched_sample(sample_key, unmatched_samples, sample_id_to_individual_id unmatched_samples.add(sample_key) warnings, not_loaded_count = _load_rna_seq_file( - file_path, user, potential_loaded_samples, update_sample_models, save_sample_data, get_matched_sample, + file_path, user, potential_loaded_samples, update_sample_models, save_sample_data, match_sample, *args, **kwargs) message = f'Parsed {len(sample_guid_keys_to_load) + not_loaded_count} RNA-seq samples' info = [message] From 0e276daad8575e24364793b1cccb3434de8a3cd6 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 20 May 2024 16:24:48 -0400 Subject: [PATCH 150/736] fix tests --- seqr/views/apis/data_manager_api_tests.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index 5f38fe3bed..ddae103c5b 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -882,12 +882,13 @@ def test_update_rna_splice_outlier(self, *args, **kwargs): @mock.patch('seqr.views.apis.data_manager_api.get_temp_upload_directory', lambda: 'tmp/') @mock.patch('seqr.views.utils.dataset_utils.safe_post_to_slack') @mock.patch('seqr.views.apis.data_manager_api.datetime') + @mock.patch('seqr.views.apis.data_manager_api.os.mkdir') @mock.patch('seqr.views.apis.data_manager_api.os.rename') @mock.patch('seqr.views.apis.data_manager_api.load_uploaded_file') @mock.patch('seqr.utils.file_utils.subprocess.Popen') @mock.patch('seqr.views.apis.data_manager_api.gzip.open') def _test_update_rna_seq(self, data_type, mock_open, mock_subprocess, mock_load_uploaded_file, - mock_rename, mock_datetime, mock_send_slack): + mock_rename, mock_mkdir, mock_datetime, mock_send_slack): url = reverse(update_rna_seq) self.check_pm_login(url) @@ -1058,6 +1059,7 @@ def _test_basic_data_loading(data, num_parsed_samples, num_loaded_samples, new_s f'gsutil cat {RNA_FILE_ID} | gunzip -c -q - ', f'gsutil mv tmp/{file_path}/* gs://seqr-scratch-temp/{file_path}', ]]) + mock_mkdir.assert_called_with(f'tmp/{file_path}') filename = f'tmp/{file_path}/{new_sample_guid}.json.gz' expected_files = { f'tmp/{file_path}/{new_sample_guid if sample_guid == PLACEHOLDER_GUID else sample_guid}.json.gz': data From 82cd9dc991ab11c2b36212f1f473ccd27f010698 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 20 May 2024 16:59:56 -0400 Subject: [PATCH 151/736] search backend specific logic for gene breakdown --- seqr/utils/search/search_utils_tests.py | 25 ++++++++++++++++++------- seqr/utils/search/utils.py | 14 +++++++++----- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/seqr/utils/search/search_utils_tests.py b/seqr/utils/search/search_utils_tests.py index cf75e4af10..968633b412 100644 --- a/seqr/utils/search/search_utils_tests.py +++ b/seqr/utils/search/search_utils_tests.py @@ -427,13 +427,6 @@ def test_cached_get_variant_query_gene_counts(self): gene_counts = get_variant_query_gene_counts(self.results_model, self.user) self.assertDictEqual(gene_counts, cached_gene_counts) - self.set_cache({'all_results': PARSED_COMPOUND_HET_VARIANTS_MULTI_PROJECT, 'total_results': 2}) - gene_counts = get_variant_query_gene_counts(self.results_model, self.user) - self.assertDictEqual(gene_counts, { - 'ENSG00000135953': {'total': 1, 'families': {'F000003_3': 1, 'F000011_11': 1}}, - 'ENSG00000228198': {'total': 1, 'families': {'F000003_3': 1, 'F000011_11': 1}} - }) - @mock.patch('seqr.utils.search.elasticsearch.es_utils.ELASTICSEARCH_SERVICE_HOSTNAME', 'testhost') class ElasticsearchSearchUtilsTests(TestCase, SearchUtilsTests): @@ -491,6 +484,13 @@ def test_get_variant_query_gene_counts(self, mock_get_variants): def test_cached_get_variant_query_gene_counts(self): super(ElasticsearchSearchUtilsTests, self).test_cached_get_variant_query_gene_counts() + self.set_cache({'all_results': PARSED_COMPOUND_HET_VARIANTS_MULTI_PROJECT, 'total_results': 2}) + gene_counts = get_variant_query_gene_counts(self.results_model, self.user) + self.assertDictEqual(gene_counts, { + 'ENSG00000135953': {'total': 1, 'families': {'F000003_3': 1, 'F000011_11': 1}}, + 'ENSG00000228198': {'total': 1, 'families': {'F000003_3': 1, 'F000011_11': 1}}, + }) + self.set_cache({ 'grouped_results': [ {'null': [PARSED_VARIANTS[0]]}, {'ENSG00000228198': PARSED_COMPOUND_HET_VARIANTS_MULTI_PROJECT}, @@ -533,3 +533,14 @@ def test_query_variants(self, mock_call): @mock.patch('seqr.utils.search.utils.get_hail_variants') def test_get_variant_query_gene_counts(self, mock_call): super(HailSearchUtilsTests, self).test_get_variant_query_gene_counts(mock_call) + + def test_cached_get_variant_query_gene_counts(self): + super(HailSearchUtilsTests, self).test_cached_get_variant_query_gene_counts() + + self.set_cache({'all_results': PARSED_COMPOUND_HET_VARIANTS_MULTI_PROJECT + [SV_VARIANT1], 'total_results': 3}) + gene_counts = get_variant_query_gene_counts(self.results_model, self.user) + self.assertDictEqual(gene_counts, { + 'ENSG00000135953': {'total': 2, 'families': {'F000003_3': 2, 'F000011_11': 2}}, + 'ENSG00000228198': {'total': 2, 'families': {'F000003_3': 2, 'F000011_11': 2}}, + 'ENSG00000171621': {'total': 1, 'families': {'F000011_11': 1}}, + }) diff --git a/seqr/utils/search/utils.py b/seqr/utils/search/utils.py index a114e2cfe2..6c5875af49 100644 --- a/seqr/utils/search/utils.py +++ b/seqr/utils/search/utils.py @@ -300,11 +300,15 @@ def get_variant_query_gene_counts(search_model, user): def _get_gene_aggs_for_cached_variants(previous_search_results): gene_aggs = defaultdict(lambda: {'total': 0, 'families': defaultdict(int)}) for var in previous_search_results['all_results']: - gene_id = next(( - gene_id for gene_id, transcripts in var['transcripts'].items() - if any(t['transcriptId'] == var['mainTranscriptId'] for t in transcripts) - ), None) if var['mainTranscriptId'] else None - if gene_id: + # ES only reports breakdown for main transcript gene only, hail backend reports for all genes + gene_ids = backend_specific_call( + lambda variant_transcripts: next(( + [gene_id] for gene_id, transcripts in variant_transcripts.items() + if any(t['transcriptId'] == var['mainTranscriptId'] for t in transcripts) + ), []) if var['mainTranscriptId'] else [], + lambda variant_transcripts: variant_transcripts.keys(), + )(var['transcripts']) + for gene_id in gene_ids: gene_aggs[gene_id]['total'] += 1 for family_guid in var['familyGuids']: gene_aggs[gene_id]['families'][family_guid] += 1 From c636c5cd9c56ffc4e1232b0c36e18aa170d47619 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 21 May 2024 05:47:00 +0000 Subject: [PATCH 152/736] --- updated-dependencies: - dependency-name: requests dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements-dev.txt | 8 +++++--- requirements.txt | 5 +---- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 7669e6fe78..faa3c538b3 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -39,8 +39,10 @@ idna==3.7 # requests mock==5.0.1 # via -r requirements-dev.in -packaging==23.0 - # via build +packaging==24.0 + # via + # -c requirements.txt + # build pip-tools==6.12.2 # via -r requirements-dev.in pyproject-hooks==1.0.0 @@ -51,7 +53,7 @@ pytz==2022.7.1 # django rcssmin==1.1.1 # via django-compressor -requests==2.31.0 +requests==2.32.0 # via # -c requirements.txt # responses diff --git a/requirements.txt b/requirements.txt index 65c121d18a..916ad0153b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -131,7 +131,7 @@ pytz==2022.7.1 # django-notifications-hq redis==4.5.4 # via -r requirements.in -requests==2.31.0 +requests==2.32.0 # via # -r requirements.in # django-anymail @@ -179,6 +179,3 @@ urllib3==1.26.18 # requests whitenoise==6.3.0 # via -r requirements.in - -# The following packages are considered to be unsafe in a requirements file: -# setuptools From e67848b6bd19085547723e2b45ab3628f975467e Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 21 May 2024 11:45:14 -0400 Subject: [PATCH 153/736] improve mgration performance --- seqr/migrations/0064_alter_phenotypeprioritization.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/seqr/migrations/0064_alter_phenotypeprioritization.py b/seqr/migrations/0064_alter_phenotypeprioritization.py index b3538361e9..c005925ce4 100644 --- a/seqr/migrations/0064_alter_phenotypeprioritization.py +++ b/seqr/migrations/0064_alter_phenotypeprioritization.py @@ -19,10 +19,11 @@ def update_guids(apps, schema_editor): PhenotypePrioritization = apps.get_model('seqr', 'PhenotypePrioritization') db_alias = schema_editor.connection.alias pps = PhenotypePrioritization.objects.using(db_alias).all() + individual_id_map = dict(pps.values_list('id', 'individual__individual_id')) for pp in pps: - ids_as_str = "%s:%s:%s" % (pp.individual.individual_id, pp.gene_id, pp.disease_id) + ids_as_str = "%s:%s:%s" % (individual_id_map[pp.id], pp.gene_id, pp.disease_id) pp.guid = 'PP%07d_%s' % (pp.id, _slugify(str(ids_as_str)))[:MAX_GUID_SIZE] - PhenotypePrioritization.objects.using(db_alias).bulk_update(pps, ['guid']) + PhenotypePrioritization.objects.using(db_alias).bulk_update(pps, ['guid'], batch_size=1000) operations = [ migrations.AddField( From 772f4e3a6e87a4b3440ff2e7ed0eb323a3f49cfe Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Tue, 21 May 2024 12:05:14 -0400 Subject: [PATCH 154/736] some changes --- seqr/utils/search/hail_search_utils.py | 15 ++++++--------- seqr/utils/search/hail_search_utils_tests.py | 2 +- seqr/utils/search/search_utils_tests.py | 4 ++-- seqr/utils/search/utils.py | 19 ++++++++++++++----- 4 files changed, 23 insertions(+), 17 deletions(-) diff --git a/seqr/utils/search/hail_search_utils.py b/seqr/utils/search/hail_search_utils.py index 2977ef697d..945d0d02a2 100644 --- a/seqr/utils/search/hail_search_utils.py +++ b/seqr/utils/search/hail_search_utils.py @@ -77,7 +77,7 @@ def get_hail_variants_for_variant_ids(samples, genome_version, parsed_variant_id return response_json['results'] -def _execute_lookup(variant_id, data_type, user, **kwargs): +def _execute_lookup(user, variant_id, data_type, **kwargs): body = { 'variant_id': variant_id, 'data_type': data_type, @@ -86,22 +86,19 @@ def _execute_lookup(variant_id, data_type, user, **kwargs): return _execute_search(body, user, path='lookup', exception_map={404: 'Variant not present in seqr'}), body -def hail_variant_lookup(user, variant_id, **kwargs): - if kwargs.get('genome_version') == 'GRCh37' and variant_id[0] == 'M': - from seqr.utils.search.utils import InvalidSearchException - raise InvalidSearchException('Mitochondrial variants are not supported in GRCh37') - variant, _ = _execute_lookup(variant_id, Sample.DATASET_TYPE_VARIANT_CALLS, user, **kwargs) +def hail_variant_lookup(user, variant_id, dataset_type, **kwargs): + variant, _ = _execute_lookup(user, variant_id, data_type=dataset_type, **kwargs) return variant -def hail_sv_variant_lookup(user, variant_id, samples, sample_type=None, **kwargs): +def hail_sv_variant_lookup(user, variant_id, dataset_type, samples, sample_type=None, **kwargs): if not sample_type: from seqr.utils.search.utils import InvalidSearchException raise InvalidSearchException('Sample type must be specified to look up a structural variant') - data_type = f'{Sample.DATASET_TYPE_SV_CALLS}_{sample_type}' + data_type = f'{dataset_type}_{sample_type}' sample_data = _get_sample_data(samples) - variant, body = _execute_lookup(variant_id, data_type, user, sample_data=sample_data.pop(data_type), **kwargs) + variant, body = _execute_lookup(user, variant_id, data_type, sample_data=sample_data.pop(data_type), **kwargs) variants = [variant] if variant['svType'] in {'DEL', 'DUP'}: diff --git a/seqr/utils/search/hail_search_utils_tests.py b/seqr/utils/search/hail_search_utils_tests.py index 512ba7fa4a..40029b363d 100644 --- a/seqr/utils/search/hail_search_utils_tests.py +++ b/seqr/utils/search/hail_search_utils_tests.py @@ -265,7 +265,7 @@ def test_variant_lookup(self): responses.add(responses.POST, f'{MOCK_HOST}:5000/lookup', status=400) with self.assertRaises(InvalidSearchException) as cm: variant_lookup(self.user, ('M', 11018, 'G', 'T'), genome_version='37') - self.assertEqual(str(cm.exception), 'Mitochondrial variants are not supported in GRCh37') + self.assertEqual(str(cm.exception), 'Only SNV_INDEL variants are available for GRCh37') @responses.activate def test_sv_variant_lookup(self): diff --git a/seqr/utils/search/search_utils_tests.py b/seqr/utils/search/search_utils_tests.py index cf75e4af10..c985afb58c 100644 --- a/seqr/utils/search/search_utils_tests.py +++ b/seqr/utils/search/search_utils_tests.py @@ -56,7 +56,7 @@ def test_variant_lookup(self, mock_variant_lookup): mock_variant_lookup.return_value = VARIANT_LOOKUP_VARIANT variant = variant_lookup(self.user, ('1', 10439, 'AC', 'A'), genome_version='38') self.assertDictEqual(variant, VARIANT_LOOKUP_VARIANT) - mock_variant_lookup.assert_called_with(self.user, ('1', 10439, 'AC', 'A'), genome_version='GRCh38') + mock_variant_lookup.assert_called_with(self.user, ('1', 10439, 'AC', 'A'), 'SNV_INDEL', genome_version='GRCh38') cache_key = "variant_lookup_results__('1', 10439, 'AC', 'A')__38__" self.assert_cached_results(variant, cache_key=cache_key) @@ -73,7 +73,7 @@ def test_sv_variant_lookup(self, mock_sv_variant_lookup): variants = sv_variant_lookup(self.user, 'phase2_DEL_chr14_4640', self.families, genome_version='38', sample_type='WGS') self.assertListEqual(variants, [SV_VARIANT4, SV_VARIANT1]) mock_sv_variant_lookup.assert_called_with( - self.user, 'phase2_DEL_chr14_4640', genome_version='GRCh38', samples=mock.ANY, sample_type='WGS') + self.user, 'phase2_DEL_chr14_4640', 'SV', genome_version='GRCh38', samples=mock.ANY, sample_type='WGS') cache_key = 'variant_lookup_results__phase2_DEL_chr14_4640__38__test_user' self.assert_cached_results(variants, cache_key=cache_key) expected_samples = {s for s in self.search_samples if s.guid in SV_SAMPLES} diff --git a/seqr/utils/search/utils.py b/seqr/utils/search/utils.py index a114e2cfe2..61c5abdc91 100644 --- a/seqr/utils/search/utils.py +++ b/seqr/utils/search/utils.py @@ -2,7 +2,7 @@ from copy import deepcopy from datetime import timedelta -from reference_data.models import GENOME_VERSION_LOOKUP, GENOME_VERSION_GRCh38 +from reference_data.models import GENOME_VERSION_LOOKUP, GENOME_VERSION_GRCh38, GENOME_VERSION_GRCh37 from seqr.models import Sample, Individual, Project from seqr.utils.redis_utils import safe_redis_get_json, safe_redis_set_json from seqr.utils.search.constants import XPOS_SORT_KEY, PRIORITIZED_GENE_SORT, RECESSIVE, COMPOUND_HET, \ @@ -149,27 +149,36 @@ def _get_variants_for_variant_ids(families, variant_ids, user, user_email=None, ) -def _variant_lookup(lookup_func, user, variant_id, genome_version=None, cache_key_suffix='', **kwargs): +def _variant_lookup(lookup_func, user, variant_id, dataset_type, genome_version=None, cache_key_suffix='', **kwargs): genome_version = genome_version or GENOME_VERSION_GRCh38 + _validate_dataset_type_genome_version(dataset_type, genome_version) cache_key = f'variant_lookup_results__{variant_id}__{genome_version}__{cache_key_suffix}' variant = safe_redis_get_json(cache_key) if variant: return variant lookup_func = backend_specific_call(_raise_search_error('Hail backend is disabled'), lookup_func) - variant = lookup_func(user, variant_id, genome_version=GENOME_VERSION_LOOKUP[genome_version], **kwargs) + variant = lookup_func(user, variant_id, dataset_type, genome_version=GENOME_VERSION_LOOKUP[genome_version], **kwargs) safe_redis_set_json(cache_key, variant, expire=timedelta(weeks=2)) return variant -def variant_lookup(*args, **kwargs): - return _variant_lookup(hail_variant_lookup, *args, **kwargs) +def _validate_dataset_type_genome_version(dataset_type, genome_version): + if genome_version == GENOME_VERSION_GRCh37 and dataset_type != Sample.DATASET_TYPE_VARIANT_CALLS: + raise InvalidSearchException('Only SNV_INDEL variants are available for GRCh37') + + +def variant_lookup(user, parsed_variant_id, **kwargs): + dt = _variant_ids_dataset_type([parsed_variant_id]) + dataset_type = DATASET_TYPES_LOOKUP[dt][0] + return _variant_lookup(hail_variant_lookup, user, parsed_variant_id, **kwargs, dataset_type=dataset_type) def sv_variant_lookup(user, variant_id, families, **kwargs): samples, _ = _get_families_search_data(families, dataset_type=Sample.DATASET_TYPE_SV_CALLS) return _variant_lookup( hail_sv_variant_lookup, user, variant_id, **kwargs, samples=samples, cache_key_suffix=user, + dataset_type=Sample.DATASET_TYPE_SV_CALLS, ) From e015cf72d4c964c827d1ed28f08f3744b35556cc Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Tue, 21 May 2024 12:35:08 -0400 Subject: [PATCH 155/736] small things --- seqr/utils/search/hail_search_utils_tests.py | 2 +- seqr/utils/search/utils.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/seqr/utils/search/hail_search_utils_tests.py b/seqr/utils/search/hail_search_utils_tests.py index 40029b363d..65b25977cc 100644 --- a/seqr/utils/search/hail_search_utils_tests.py +++ b/seqr/utils/search/hail_search_utils_tests.py @@ -265,7 +265,7 @@ def test_variant_lookup(self): responses.add(responses.POST, f'{MOCK_HOST}:5000/lookup', status=400) with self.assertRaises(InvalidSearchException) as cm: variant_lookup(self.user, ('M', 11018, 'G', 'T'), genome_version='37') - self.assertEqual(str(cm.exception), 'Only SNV_INDEL variants are available for GRCh37') + self.assertEqual(str(cm.exception), 'MITO variants are not available for GRCh37') @responses.activate def test_sv_variant_lookup(self): diff --git a/seqr/utils/search/utils.py b/seqr/utils/search/utils.py index 61c5abdc91..7289b1a560 100644 --- a/seqr/utils/search/utils.py +++ b/seqr/utils/search/utils.py @@ -165,12 +165,11 @@ def _variant_lookup(lookup_func, user, variant_id, dataset_type, genome_version= def _validate_dataset_type_genome_version(dataset_type, genome_version): if genome_version == GENOME_VERSION_GRCh37 and dataset_type != Sample.DATASET_TYPE_VARIANT_CALLS: - raise InvalidSearchException('Only SNV_INDEL variants are available for GRCh37') + raise InvalidSearchException(f'{dataset_type} variants are not available for GRCh37') def variant_lookup(user, parsed_variant_id, **kwargs): - dt = _variant_ids_dataset_type([parsed_variant_id]) - dataset_type = DATASET_TYPES_LOOKUP[dt][0] + dataset_type = DATASET_TYPES_LOOKUP[_variant_ids_dataset_type([parsed_variant_id])][0] return _variant_lookup(hail_variant_lookup, user, parsed_variant_id, **kwargs, dataset_type=dataset_type) From 1a2180d8e0d9f999528b0db92f7ab93076a05b91 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 21 May 2024 15:46:38 -0400 Subject: [PATCH 156/736] update multi option conditional column validation --- seqr/views/apis/report_api.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index b308c6434a..fd04906ff9 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -695,12 +695,16 @@ def _load_data_model_validators(): return table_configs, required_tables +def _get_multi_conditional_validator(validator): + match = re.match(r'CONDITIONAL \(([^\)]+)\)', validator) + return match and match.group(1).split(', ') + + def _parse_table_required(required_validator): if required_validator is True: return True - match = re.match(r'CONDITIONAL \(([\w+(\s,)?]+)\)', required_validator) - return match and match.group(1).split(', ') + return _get_multi_conditional_validator(required_validator) def _has_required_table(table, validator, tables): @@ -718,15 +722,12 @@ def _is_required_col(required_validator, row): if required_validator is True: return True - match = re.match(r'CONDITIONAL \(([\w+(\s)?]+) = ([\w+(\s)?]+)\)', required_validator) - if not match: + condition_validators = _get_multi_conditional_validator(required_validator) + if not condition_validators: return True - field, value = match.groups() - return row[field] == value - - - + conditions = [re.match(r'([^\s]+) = ([^\s]+)', c).groups() for c in condition_validators] + return any(row[field] == value for field, value in conditions) def _validate_column_data(column, file_name, data, column_validator, warnings, errors): From 11bce25994813ce75ec66588be6919cdc01a63e3 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Tue, 21 May 2024 15:53:02 -0400 Subject: [PATCH 157/736] add link to upload format document for rna-seq --- ui/pages/DataManagement/components/RnaSeq.jsx | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ui/pages/DataManagement/components/RnaSeq.jsx b/ui/pages/DataManagement/components/RnaSeq.jsx index 345e01da33..acd5f1c879 100644 --- a/ui/pages/DataManagement/components/RnaSeq.jsx +++ b/ui/pages/DataManagement/components/RnaSeq.jsx @@ -1,3 +1,4 @@ +import React from 'react' import { connect } from 'react-redux' import { validators } from 'shared/components/form/FormHelpers' @@ -8,6 +9,19 @@ import UploadFormPage from 'shared/components/page/UploadFormPage' import { getRnaSeqUploadStats } from '../selectors' import { uploadRnaSeq } from '../reducers' +const uploadLabelHelp = ( +
+ RNA-seq data should be formatted according to  + + these guidelines + + . +
+) const mapStateToProps = state => ({ fields: [ { @@ -15,6 +29,7 @@ const mapStateToProps = state => ({ label: 'RNA-seq data', placeholder: 'gs:// Google bucket path', validate: validators.required, + labelHelp: uploadLabelHelp, }, { name: 'dataType', From c7d78290caa38af42332262460a4537a90c00e01 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 21 May 2024 15:53:21 -0400 Subject: [PATCH 158/736] add new columns --- seqr/views/apis/report_api.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index fd04906ff9..6b07e4a54c 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -206,7 +206,7 @@ def _add_row(row, family_id, row_type): 'targeted_region_bed_file', 'date_data_generation', 'target_insert_size', 'sequencing_platform', ] EXPERIMENT_COLUMNS = {'analyte_id', 'experiment_sample_id'} -EXPERIMENT_TABLE_COLUMNS = {'experiment_dna_short_read_id'} +EXPERIMENT_TABLE_COLUMNS = {'experiment_dna_short_read_id', 'sequencing_event_details'} EXPERIMENT_TABLE_COLUMNS.update(EXPERIMENT_COLUMNS) EXPERIMENT_TABLE_COLUMNS.update(EXPERIMENT_TABLE_AIRTABLE_FIELDS) EXPERIMENT_RNA_TABLE_AIRTABLE_FIELDS = [ @@ -240,9 +240,10 @@ def _add_row(row, family_id, row_type): } GENETIC_FINDINGS_TABLE_COLUMNS = { 'chrom', 'pos', 'ref', 'alt', 'variant_type', 'variant_reference_assembly', 'gene', 'transcript', 'hgvsc', 'hgvsp', - *FINDING_METADATA_COLUMNS[:4], 'phenotype_contribution', + 'hgvs', 'sv_type', 'chrom_end', 'pos_end', 'copy_number', *FINDING_METADATA_COLUMNS[:4], 'phenotype_contribution', 'genetic_findings_id', 'participant_id', 'experiment_id', 'zygosity', 'allele_balance_or_heteroplasmy_percentage', 'variant_inheritance', 'linked_variant', 'additional_family_members_with_variant', 'method_of_discovery', + 'gene_disease_validity', } RNA_ONLY = EXPERIMENT_RNA_TABLE_AIRTABLE_FIELDS + READ_RNA_TABLE_AIRTABLE_FIELDS + [ From 6cca4b37095ab2b8a3e9f84687d04b6760533b4b Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 21 May 2024 16:07:23 -0400 Subject: [PATCH 159/736] rename gene_of_interest column --- seqr/views/apis/report_api.py | 9 +++++---- seqr/views/utils/anvil_metadata_utils.py | 7 ++++--- ui/shared/utils/constants.js | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 6b07e4a54c..7316cd48b0 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -14,7 +14,7 @@ from seqr.views.utils.airtable_utils import AirtableSession from seqr.views.utils.anvil_metadata_utils import parse_anvil_metadata, \ FAMILY_ROW_TYPE, SUBJECT_ROW_TYPE, SAMPLE_ROW_TYPE, DISCOVERY_ROW_TYPE, PARTICIPANT_TABLE, PHENOTYPE_TABLE, \ - EXPERIMENT_TABLE, EXPERIMENT_LOOKUP_TABLE, FINDINGS_TABLE, FINDING_METADATA_COLUMNS + EXPERIMENT_TABLE, EXPERIMENT_LOOKUP_TABLE, FINDINGS_TABLE, FINDING_METADATA_COLUMNS, GENE_COLUMN from seqr.views.utils.export_utils import export_multiple_files, write_multiple_files_to_gs from seqr.views.utils.json_utils import create_json_response from seqr.views.utils.permissions_utils import analyst_required, get_project_and_check_permissions, \ @@ -123,9 +123,10 @@ def _add_row(row, family_id, row_type): [f'Discovery variant(s) {", ".join(missing_gene_rows)} in family {family_id} have no associated gene']) parsed_rows[row_type] += [{ 'entity:discovery_id': f'{discovery_row["chrom"]}_{discovery_row["pos"]}_{discovery_row["participant_id"]}', - **{k: str(discovery_row.get(k.lower()) or '') for k in ['Gene', 'Zygosity', 'Chrom', 'Pos', 'Ref', 'Alt', 'Transcript']}, + **{k: str(discovery_row.get(k.lower()) or '') for k in ['Zygosity', 'Chrom', 'Pos', 'Ref', 'Alt', 'Transcript']}, **{k: discovery_row[field] for k, field in { 'subject_id': 'participant_id', + 'Gene': GENE_COLUMN, 'Gene_Class': 'gene_known_for_phenotype', 'inheritance_description': 'variant_inheritance', 'variant_genome_build': 'variant_reference_assembly', @@ -239,7 +240,7 @@ def _add_row(row, family_id, row_type): 'caller_software', 'variant_types', 'analysis_details', } GENETIC_FINDINGS_TABLE_COLUMNS = { - 'chrom', 'pos', 'ref', 'alt', 'variant_type', 'variant_reference_assembly', 'gene', 'transcript', 'hgvsc', 'hgvsp', + 'chrom', 'pos', 'ref', 'alt', 'variant_type', 'variant_reference_assembly', GENE_COLUMN, 'transcript', 'hgvsc', 'hgvsp', 'hgvs', 'sv_type', 'chrom_end', 'pos_end', 'copy_number', *FINDING_METADATA_COLUMNS[:4], 'phenotype_contribution', 'genetic_findings_id', 'participant_id', 'experiment_id', 'zygosity', 'allele_balance_or_heteroplasmy_percentage', 'variant_inheritance', 'linked_variant', 'additional_family_members_with_variant', 'method_of_discovery', @@ -813,7 +814,7 @@ def _add_row(row, family_id, row_type): family = families_by_id[family_id] if 'inheritance_models' not in family: family.update({'genes': set(), 'inheritance_models': set()}) - family['genes'].update({v.get('gene') or v.get('sv_name') or v.get('gene_id') or '' for v in row}) + family['genes'].update({v.get(GENE_COLUMN) or v.get('sv_name') or v.get('gene_id') or '' for v in row}) family['inheritance_models'].update({v['variant_inheritance'] for v in row}) parse_anvil_metadata( diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index b88762aaed..d422bf1618 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -28,6 +28,7 @@ 'gene_known_for_phenotype', 'known_condition_name', 'condition_id', 'condition_inheritance', 'GREGoR_variant_classification', 'notes', ] +GENE_COLUMN = 'gene_of_interest' HISPANIC = 'AMR' OTHER = 'OTH' @@ -345,7 +346,7 @@ def _get_parsed_saved_discovery_variants_by_family( saved_variants_by_family = defaultdict(list) for row in variants: - row['gene'] = genes_by_id.get(row['gene_id'], {}).get('geneSymbol') + row[GENE_COLUMN] = genes_by_id.get(row['gene_id'], {}).get('geneSymbol') family_id = row.pop('family_id') saved_variants_by_family[family_id].append(row) @@ -462,13 +463,13 @@ def _get_genetic_findings_rows(rows: list[dict], individual: Individual, partici if sample is not None: parsed_row['sample_id'] = sample.sample_id parsed_rows.append(parsed_row) - variants_by_gene[row['gene']].append({**parsed_row, 'individual_genotype': individual_genotype}) + variants_by_gene[row[GENE_COLUMN]].append({**parsed_row, 'individual_genotype': individual_genotype}) to_remove = [] for row in parsed_rows: del row['genotypes'] process_func = post_process_variant or _post_process_variant_metadata - update = process_func(row, variants_by_gene[row['gene']], include_parent_mnvs=include_parent_mnvs) + update = process_func(row, variants_by_gene[row[GENE_COLUMN]], include_parent_mnvs=include_parent_mnvs) if update: row.update(update) else: diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js index 992f1aef7d..55346bc3aa 100644 --- a/ui/shared/utils/constants.js +++ b/ui/shared/utils/constants.js @@ -1854,7 +1854,7 @@ export const VARIANT_METADATA_COLUMNS = [ { name: 'pos' }, { name: 'ref' }, { name: 'alt' }, - { name: 'gene' }, + { name: 'gene_of_interest' }, { name: 'seqr_chosen_consequence' }, { name: 'transcript' }, { name: 'hgvsc' }, From cc4cba1cb6dda0b10326d0e11208d07f11eadf00 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 21 May 2024 16:17:36 -0400 Subject: [PATCH 160/736] include version in transcript id --- seqr/views/utils/anvil_metadata_utils.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index d422bf1618..ab592fbc23 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -100,10 +100,21 @@ Sample.SAMPLE_TYPE_WGS: 'SR-GS', } + +def _format_hgvs(hgvs, *args): + return (hgvs or '').split(':')[-1] + + +def _format_transcript_id(transcript_id, transcript): + if transcript_id and (transcript.get('hgvsc') or '').startswith(transcript_id): + return transcript['hgvsc'].split(':')[0] + return transcript_id + + TRANSCRIPT_FIELDS = { - 'transcript': {'seqr_field': 'transcriptId'}, - 'hgvsc': {'format': lambda hgvs: (hgvs or '').split(':')[-1]}, - 'hgvsp': {'format': lambda hgvs: (hgvs or '').split(':')[-1]}, + 'transcript': {'seqr_field': 'transcriptId', 'format': _format_transcript_id}, + 'hgvsc': {'format': _format_hgvs}, + 'hgvsp': {'format': _format_hgvs}, } @@ -374,7 +385,7 @@ def _get_variant_main_transcript(variant_model): def _get_transcript_field(field, config, transcript): value = transcript.get(config.get('seqr_field', field)) if config.get('format'): - value = config['format'](value) + value = config['format'](value, transcript) return value From 3b0e4363dbfc0ed30bcf4a983c6fd922df912df6 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 21 May 2024 16:33:17 -0400 Subject: [PATCH 161/736] update tests --- seqr/views/apis/report_api_tests.py | 43 +++++++++++++---------- seqr/views/apis/summary_data_api_tests.py | 6 ++-- 2 files changed, 28 insertions(+), 21 deletions(-) diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index 6e99831378..54a5d05dbb 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -314,6 +314,7 @@ {'column': 'date_data_generation', 'data_type': 'date'}, {'column': 'target_insert_size', 'data_type': 'integer'}, {'column': 'sequencing_platform'}, + {'column': 'sequencing_event_details'}, ], }, { @@ -418,7 +419,7 @@ {'column': 'ref','required': True}, {'column': 'alt', 'required': True}, {'column': 'ClinGen_allele_ID'}, - {'column': 'gene', 'required': True}, + {'column': 'gene_of_interest', 'required': True}, {'column': 'transcript'}, {'column': 'hgvsc'}, {'column': 'hgvsp'}, @@ -435,7 +436,13 @@ {'column': 'partial_contribution_explained'}, {'column': 'additional_family_members_with_variant'}, {'column': 'method_of_discovery', 'data_type': 'enumeration', 'multi_value_delimiter': '|', 'enumerations': ['SR-ES', 'SR-GS', 'LR-GS', 'SNP array']}, - {'column': 'notes'} + {'column': 'notes'}, + {'column': 'sv_type'}, + {'column': 'chrom_end'}, + {'column': 'pos_end'}, + {'column': 'copy_number'}, + {'column': 'hgvs'}, + {'column': 'gene_disease_validity'}, ] }, ] @@ -558,16 +565,16 @@ [ 'experiment_dna_short_read_id', 'analyte_id', 'experiment_sample_id', 'seq_library_prep_kit_method', 'read_length', 'experiment_type', 'targeted_regions_method', 'targeted_region_bed_file', - 'date_data_generation', 'target_insert_size', 'sequencing_platform', + 'date_data_generation', 'target_insert_size', 'sequencing_platform', 'sequencing_event_details', ], [ 'Broad_exome_VCGS_FAM203_621_D2', 'Broad_SM-JDBTM', 'VCGS_FAM203_621_D2', 'Kapa HyperPrep', '151', 'exome', - 'Twist', 'gs://fc-eb352699-d849-483f-aefe-9d35ce2b21ac/SR_experiment.bed', '2022-08-15', '385', 'NovaSeq', + 'Twist', 'gs://fc-eb352699-d849-483f-aefe-9d35ce2b21ac/SR_experiment.bed', '2022-08-15', '385', 'NovaSeq', '', ], [ 'Broad_exome_NA20888', 'Broad_SM-L5QMP', 'NA20888', 'Kapa HyperPrep', '151', 'exome', - 'Twist', 'gs://fc-eb352699-d849-483f-aefe-9d35ce2b21ac/SR_experiment.bed', '2022-06-05', '380', 'NovaSeq', + 'Twist', 'gs://fc-eb352699-d849-483f-aefe-9d35ce2b21ac/SR_experiment.bed', '2022-06-05', '380', 'NovaSeq', '', ], [ 'Broad_genome_NA20888_1', 'Broad_SM-L5QMWP', 'NA20888_1', 'Kapa HyperPrep w/o amplification', '200', 'genome', - '', 'gs://fc-eb352699-d849-483f-aefe-9d35ce2b21ac/SR_experiment.bed', '2023-03-13', '450', 'NovaSeq2', + '', 'gs://fc-eb352699-d849-483f-aefe-9d35ce2b21ac/SR_experiment.bed', '2023-03-13', '450', 'NovaSeq2', '', ], ] @@ -592,29 +599,29 @@ GENETIC_FINDINGS_TABLE = [ [ 'genetic_findings_id', 'participant_id', 'experiment_id', 'variant_type', 'variant_reference_assembly', - 'chrom', 'pos', 'ref', 'alt', 'ClinGen_allele_ID', 'gene', 'transcript', 'hgvsc', 'hgvsp', 'zygosity', + 'chrom', 'pos', 'ref', 'alt', 'ClinGen_allele_ID', 'gene_of_interest', 'transcript', 'hgvsc', 'hgvsp', 'zygosity', 'allele_balance_or_heteroplasmy_percentage', 'variant_inheritance', 'linked_variant', 'linked_variant_phase', 'gene_known_for_phenotype', 'known_condition_name', 'condition_id', 'condition_inheritance', 'phenotype_contribution', 'partial_contribution_explained', 'additional_family_members_with_variant', - 'method_of_discovery', 'notes', + 'method_of_discovery', 'notes', 'sv_type', 'chrom_end', 'pos_end', 'copy_number', 'hgvs', 'gene_disease_validity', ], [ 'Broad_NA19675_1_21_3343353', 'Broad_NA19675_1', '', 'SNV/INDEL', 'GRCh37', '21', '3343353', 'GAGA', 'G', '', - 'RP11', 'ENST00000258436', 'c.375_377delTCT', 'p.Leu126del', 'Heterozygous', '', 'de novo', '', '', 'Candidate', + 'RP11', 'ENST00000258436.5', 'c.375_377delTCT', 'p.Leu126del', 'Heterozygous', '', 'de novo', '', '', 'Candidate', 'Myasthenic syndrome, congenital, 8, with pre- and postsynaptic defects', 'OMIM:615120', 'Autosomal recessive|X-linked', - 'Full', '', '', 'SR-ES', '', + 'Full', '', '', 'SR-ES', '', '', '', '', '', '', '', ], [ 'Broad_HG00731_1_248367227', 'Broad_HG00731', 'Broad_exome_VCGS_FAM203_621_D2', 'SNV/INDEL', 'GRCh37', '1', '248367227', 'TC', 'T', '', 'RP11', '', '', '', 'Homozygous', '', 'paternal', '', '', 'Known', '', - 'MONDO:0044970', '', 'Full', '', 'Broad_HG00732', 'SR-ES', '', + 'MONDO:0044970', '', 'Full', '', 'Broad_HG00732', 'SR-ES', '', '', '', '', '', '', '', ], [ 'Broad_NA20889_1_248367227', 'Broad_NA20889', '', 'SNV/INDEL', 'GRCh37', '1', '248367227', 'TC', 'T', '', 'OR4G11P', 'ENST00000505820', 'c.3955G>A', 'c.1586-17C>G', 'Heterozygous', '', 'unknown', 'Broad_NA20889_1_249045487', '', 'Candidate', 'IRIDA syndrome', 'MONDO:0008788', 'Autosomal dominant', - 'Full', '', '', 'SR-ES', '', + 'Full', '', '', 'SR-ES', '', '', '', '', '', '', '', ], [ 'Broad_NA20889_1_249045487', 'Broad_NA20889', '', 'SNV/INDEL', 'GRCh37', '1', '249045487', 'A', 'G', '', 'OR4G11P', '', '', '', 'Heterozygous', '', 'unknown', 'Broad_NA20889_1_248367227', '', 'Candidate', - 'IRIDA syndrome', 'MONDO:0008788', 'Autosomal dominant', 'Full', '', '', 'SR-ES', '', + 'IRIDA syndrome', 'MONDO:0008788', 'Autosomal dominant', 'Full', '', '', 'SR-ES', '', '', '', '', '', '', '', ], ] @@ -723,7 +730,7 @@ def test_anvil_export(self, mock_google_authenticated, mock_zip): 'Homozygous', 'GRCh37', '1', '248367227', 'TC', 'T', '-', '-', '-', '-', '-', '-', '-'], discovery_file) self.assertIn([ '21_3343353_NA19675_1', 'NA19675_1', 'NA19675', 'RP11', 'Candidate', 'de novo', - 'Heterozygous', 'GRCh37', '21', '3343353', 'GAGA', 'G', 'c.375_377delTCT', 'p.Leu126del', 'ENST00000258436', + 'Heterozygous', 'GRCh37', '21', '3343353', 'GAGA', 'G', 'c.375_377delTCT', 'p.Leu126del', 'ENST00000258436.5', '-', '-', '-', '-'], discovery_file) self.assertIn([ '19_1912633_HG00731', 'HG00731', 'HG00731', 'OR4G11P', 'Known', 'unknown', 'Heterozygous', 'GRCh38', '19', @@ -1202,7 +1209,7 @@ def test_variant_metadata(self): 'displayName': '2', 'familyGuid': 'F000002_2', 'family_id': '2', - 'gene': 'RP11', + 'gene_of_interest': 'RP11', 'gene_id': 'ENSG00000135953', 'gene_known_for_phenotype': 'Known', 'genetic_findings_id': 'HG00731_1_248367227', @@ -1230,7 +1237,7 @@ def test_variant_metadata(self): 'end': 1912634, 'familyGuid': 'F000002_2', 'family_id': '2', - 'gene': 'OR4G11P', + 'gene_of_interest': 'OR4G11P', 'gene_id': 'ENSG00000240361', 'gene_known_for_phenotype': 'Known', 'genetic_findings_id': 'HG00731_19_1912634', @@ -1271,7 +1278,7 @@ def test_variant_metadata(self): 'familyGuid': 'F000012_12', 'family_id': '12', 'family_history': 'Yes', - 'gene': 'OR4G11P', + 'gene_of_interest': 'OR4G11P', 'gene_id': 'ENSG00000240361', 'gene_known_for_phenotype': 'Candidate', 'genetic_findings_id': 'NA20889_1_248367227', @@ -1299,7 +1306,7 @@ def test_variant_metadata(self): 'familyGuid': 'F000012_12', 'family_id': '12', 'family_history': 'Yes', - 'gene': None, + 'gene_of_interest': None, 'gene_id': None, 'gene_known_for_phenotype': 'Candidate', 'genetic_findings_id': 'NA20889_1_249045487', diff --git a/seqr/views/apis/summary_data_api_tests.py b/seqr/views/apis/summary_data_api_tests.py index 6a4a040932..d6dd32f765 100644 --- a/seqr/views/apis/summary_data_api_tests.py +++ b/seqr/views/apis/summary_data_api_tests.py @@ -70,7 +70,7 @@ "sex": "Female", "chrom-1": "1", "alt-1": "T", - "gene-1": "OR4G11P", + "gene_of_interest-1": "OR4G11P", "gene_id-1": "ENSG00000240361", 'variant_reference_assembly-1': 'GRCh37', 'variant_reference_assembly-2': 'GRCh37', @@ -94,7 +94,7 @@ 'hgvsp-2': '', 'transcript-2': None, 'seqr_chosen_consequence-2': None, - 'gene-2': None, + 'gene_of_interest-2': None, 'gene_id-2': None, 'svName-2': None, 'svType-1': None, @@ -156,7 +156,7 @@ 'zygosity-1': 'Heterozygous', 'variant_reference_assembly-1': 'GRCh38', 'allele_balance_or_heteroplasmy_percentage-1': None, - 'gene-1': None, + 'gene_of_interest-1': None, 'gene_id-1': None, 'hgvsc-1': '', 'hgvsp-1': '', From 6e3aeca3776caf1f75493719857fc96e63a3da99 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 21 May 2024 16:49:09 -0400 Subject: [PATCH 162/736] test multi conditional column --- seqr/views/apis/report_api_tests.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index 54a5d05dbb..f2b7a05316 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -452,7 +452,7 @@ INVALID_MODEL_TABLES = { 'participant': { 'internal_project_id': {'data_type': 'reference'}, - 'prior_testing': {'data_type': 'enumeration'}, + 'prior_testing': {'data_type': 'enumeration', 'required': 'CONDITIONAL (proband_relationship = Self, proband_relationship = Father)'}, 'proband_relationship': {'required': 'CONDITIONAL (sex = Male)'}, 'reported_race': {'enumerations': ['Asian', 'White', 'Black']}, 'age_at_enrollment': {'data_type': 'date'} @@ -830,6 +830,7 @@ def test_gregor_export(self, mock_subprocess, mock_temp_dir, mock_open, mock_dat ] + [ 'The following tables are required in the data model but absent from the reports: subject, dna_read_data_set', ] + [ + 'The following entries are missing required "prior_testing" in the "participant" table: Broad_HG00731, Broad_HG00732', 'The following entries are missing required "proband_relationship" in the "participant" table: Broad_NA19678, Broad_NA20870, Broad_NA20872, Broad_NA20874, Broad_NA20875, Broad_NA20876, Broad_NA20881', 'The following entries have invalid values for "reported_race" in the "participant" table. Allowed values: Asian, White, Black. Invalid values: Broad_NA19675_1 (Middle Eastern or North African)', 'The following entries have invalid values for "age_at_enrollment" in the "participant" table. Allowed values have data type date. Invalid values: Broad_NA19675_1 (18)', From 6eccb1eff89067e4cab42d9c479dcc57eeaa821a Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 22 May 2024 10:35:22 -0400 Subject: [PATCH 163/736] fix individual export download --- ui/pages/SummaryData/components/IndividualMetadata.jsx | 2 +- ui/pages/SummaryData/components/IndividualMetadata.test.js | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ui/pages/SummaryData/components/IndividualMetadata.jsx b/ui/pages/SummaryData/components/IndividualMetadata.jsx index c55db3aac3..00bd8c31e4 100644 --- a/ui/pages/SummaryData/components/IndividualMetadata.jsx +++ b/ui/pages/SummaryData/components/IndividualMetadata.jsx @@ -80,7 +80,7 @@ const getColumns = (data) => { ...[...Array(maxSavedVariants).keys()].map(i => VARIANT_METADATA_COLUMNS.map( ({ name, format, fieldName, ...col }) => ({ name: `${name}-${i + 1}`, - secondaryExportColumn: name === 'gene' ? `gene_id-${i + 1}` : null, + secondaryExportColumn: name === 'gene_of_interest' ? `gene_id-${i + 1}` : null, format: format ? row => format({ [fieldName]: row[`${fieldName}-${i + 1}`] }) : null, ...col, }), diff --git a/ui/pages/SummaryData/components/IndividualMetadata.test.js b/ui/pages/SummaryData/components/IndividualMetadata.test.js index 8a573d3c00..625bdf05e8 100644 --- a/ui/pages/SummaryData/components/IndividualMetadata.test.js +++ b/ui/pages/SummaryData/components/IndividualMetadata.test.js @@ -48,7 +48,7 @@ const DATA = [ sex: 'Female', 'chrom-1': '1', 'alt-1': 'T', - 'gene-1': 'OR4G11P', + 'gene_of_interest-1': 'OR4G11P', 'gene_id-1': 'ENSG00000240361', pmid_id: null, phenotype_description: null, @@ -75,10 +75,10 @@ test('IndividualMetadata render and export', () => { 'condition_id', 'known_condition_name', 'disorders', 'affected_status', 'hpo_present', 'hpo_absent', 'phenotype_description', 'analysis_groups', 'analysis_status', 'solve_status', 'MME', 'data_type', 'date_data_generation', 'filter_flags', 'consanguinity', 'family_history', 'genetic_findings_id-1', 'variant_reference_assembly-1', - 'chrom-1', 'pos-1', 'ref-1', 'alt-1', 'gene-1', 'gene_id-1', 'seqr_chosen_consequence-1', 'transcript-1', + 'chrom-1', 'pos-1', 'ref-1', 'alt-1', 'gene_of_interest-1', 'gene_id-1', 'seqr_chosen_consequence-1', 'transcript-1', 'hgvsc-1', 'hgvsp-1', 'zygosity-1', 'sv_name-1', 'sv_type-1', 'variant_inheritance-1', 'gene_known_for_phenotype-1', 'notes-1', 'genetic_findings_id-2', 'variant_reference_assembly-2', 'chrom-2', 'pos-2', - 'ref-2', 'alt-2', 'gene-2', 'gene_id-2', 'seqr_chosen_consequence-2', 'transcript-2', 'hgvsc-2', 'hgvsp-2', + 'ref-2', 'alt-2', 'gene_of_interest-2', 'gene_id-2', 'seqr_chosen_consequence-2', 'transcript-2', 'hgvsc-2', 'hgvsp-2', 'zygosity-2', 'sv_name-2', 'sv_type-2', 'variant_inheritance-2', 'gene_known_for_phenotype-2', 'notes-2']) expect(exportConfig.processRow(DATA[0])).toEqual([ 'Test Reprocessed Project', 'R0003_test', '12', 'F000012_12', 'NA20889', 'I000017_na20889', null, '', '', '', '', From 6497bdd2e874885de80746d1dd88a5003fa8079e Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 22 May 2024 10:39:51 -0400 Subject: [PATCH 164/736] fix gregor import --- seqr/views/apis/individual_api.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/seqr/views/apis/individual_api.py b/seqr/views/apis/individual_api.py index 159291cb86..7b6d6f0c59 100644 --- a/seqr/views/apis/individual_api.py +++ b/seqr/views/apis/individual_api.py @@ -13,7 +13,7 @@ from seqr.utils.file_utils import file_iter from seqr.utils.gene_utils import get_genes, get_gene_ids_for_gene_symbols from seqr.views.utils.anvil_metadata_utils import PARTICIPANT_TABLE, PHENOTYPE_TABLE, EXPERIMENT_TABLE, \ - EXPERIMENT_LOOKUP_TABLE, FINDINGS_TABLE, FINDING_METADATA_COLUMNS, TRANSCRIPT_FIELDS, parse_population + EXPERIMENT_LOOKUP_TABLE, FINDINGS_TABLE, FINDING_METADATA_COLUMNS, TRANSCRIPT_FIELDS, GENE_COLUMN, parse_population from seqr.views.utils.file_utils import save_uploaded_file, load_uploaded_file, parse_file from seqr.views.utils.json_to_orm_utils import update_individual_from_json, update_model_from_json from seqr.views.utils.json_utils import create_json_response, _to_snake_case, _to_camel_case @@ -909,13 +909,13 @@ def import_gregor_metadata(request, project_guid): 'support_vars': [], }) family_variant_data[key] = variant - genes.add(variant['gene']) + genes.add(variant[GENE_COLUMN]) finding_id_map[variant['genetic_findings_id']] = variant_id gene_symbols_to_ids = {k: v[0] for k, v in get_gene_ids_for_gene_symbols(genes).items()} missing_genes = set() for variant in family_variant_data.values(): - gene = variant['gene'] + gene = variant[GENE_COLUMN] transcript = variant.pop('transcript') if gene in gene_symbols_to_ids: variant.update({ From 862f37af123bb1ee86d149d914cdf27591a03e8e Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 22 May 2024 11:39:19 -0400 Subject: [PATCH 165/736] add post discovery mondo id --- CHANGELOG.md | 1 + .../0066_family_post_discovery_mondo_id.py | 18 ++++++++++++++++++ seqr/models.py | 3 ++- 3 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 seqr/migrations/0066_family_post_discovery_mondo_id.py diff --git a/CHANGELOG.md b/CHANGELOG.md index bd225f41f2..20802a294c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## dev * Adds external_data to Family model (REQUIRES DB MIGRATION) +* Adds post_discovery_mondo_id to Family model (REQUIRES DB MIGRATION) * Adds guid and created fields to PhenotypePrioritization model (REQUIRES DB MIGRATION) * Enable "Reports" tab by default for local installations diff --git a/seqr/migrations/0066_family_post_discovery_mondo_id.py b/seqr/migrations/0066_family_post_discovery_mondo_id.py new file mode 100644 index 0000000000..84ca3ecbf6 --- /dev/null +++ b/seqr/migrations/0066_family_post_discovery_mondo_id.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.23 on 2024-05-22 15:37 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('seqr', '0065_family_external_data'), + ] + + operations = [ + migrations.AddField( + model_name='family', + name='post_discovery_mondo_id', + field=models.CharField(blank=True, max_length=30, null=True), + ), + ] diff --git a/seqr/models.py b/seqr/models.py index 7453ab2c9a..945cf17894 100644 --- a/seqr/models.py +++ b/seqr/models.py @@ -356,6 +356,7 @@ class Family(ModelWithGUID): coded_phenotype = models.TextField(null=True, blank=True) mondo_id = models.CharField(null=True, blank=True, max_length=30) + post_discovery_mondo_id = models.CharField(null=True, blank=True, max_length=30) post_discovery_omim_numbers = ArrayField(models.PositiveIntegerField(), default=list) pubmed_ids = ArrayField(models.TextField(), default=list) @@ -382,7 +383,7 @@ class Meta: 'post_discovery_omim_numbers', 'pedigree_dataset', 'coded_phenotype', 'mondo_id', ] internal_json_fields = [ - 'success_story_types', 'success_story', 'pubmed_ids', 'external_data', + 'success_story_types', 'success_story', 'pubmed_ids', 'external_data', 'post_discovery_mondo_id', ] audit_fields = {'analysis_status'} From bb6042b7f2bfa3b2d0e87a1a169e13cfd3ef9365 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 22 May 2024 11:54:55 -0400 Subject: [PATCH 166/736] add ui mondo field --- ui/shared/components/panel/family/Family.jsx | 26 ++++++++++++++------ ui/shared/utils/constants.js | 3 +++ 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/ui/shared/components/panel/family/Family.jsx b/ui/shared/components/panel/family/Family.jsx index e0ad7c88e8..edc9590776 100644 --- a/ui/shared/components/panel/family/Family.jsx +++ b/ui/shared/components/panel/family/Family.jsx @@ -26,6 +26,7 @@ import { FAMILY_FIELD_SUCCESS_STORY_TYPE, FAMILY_FIELD_FIRST_SAMPLE, FAMILY_FIELD_NAME_LOOKUP, + FAMILY_FIELD_DISCOVERY_MONDO_ID, FAMILY_FIELD_OMIM_NUMBERS, FAMILY_FIELD_PMIDS, FAMILY_FIELD_DESCRIPTION, FAMILY_FIELD_SUCCESS_STORY, FAMILY_NOTES_FIELDS, FAMILY_FIELD_CODED_PHENOTYPE, FAMILY_FIELD_INTERNAL_NOTES, FAMILY_FIELD_INTERNAL_SUMMARY, FAMILY_EXTERNAL_DATA_LOOKUP, @@ -59,6 +60,15 @@ const getNoteField = noteType => ({ ...BASE_NOTE_FIELD, }) +const MONDO_FIELD = { + component: SingleFieldView, + fieldDisplay: value => ( + + {value} + + ), +} + const FAMILY_FIELD_RENDER_LOOKUP = { [FAMILY_FIELD_ANALYSIS_GROUPS]: { canEdit: true, @@ -111,13 +121,13 @@ const FAMILY_FIELD_RENDER_LOOKUP = { }, [FAMILY_FIELD_CODED_PHENOTYPE]: { component: SingleFieldView, canEdit: true }, [FAMILY_FIELD_MONDO_ID]: { - component: SingleFieldView, + ...MONDO_FIELD, canEdit: true, - fieldDisplay: value => ( - - {value} - - ), + }, + [FAMILY_FIELD_DISCOVERY_MONDO_ID]: { + ...MONDO_FIELD, + internal: true, + canEditFamily: ({ postDiscoveryOmimOptions }) => Object.keys(postDiscoveryOmimOptions || {}).length > 0, }, [FAMILY_FIELD_OMIM_NUMBERS]: { canEditFamily: ({ postDiscoveryOmimOptions }) => Object.keys(postDiscoveryOmimOptions || {}).length > 0, @@ -197,8 +207,8 @@ class Family extends React.PureComponent { values => dispatchUpdateFamily({ ...values, ...submitArgs }) : dispatchUpdateFamily return React.createElement(component || TextFieldView, { key: field.id, - isEditable: !disableEdit && ( - canEdit || (canEditFamily && canEditFamily(family)) || (!disableInternalEdit && internal)), + isEditable: !disableEdit && (canEditFamily ? canEditFamily(family) : + (canEdit || (!disableInternalEdit && internal))), isPrivate: internal, fieldName: compact ? null : name, field: field.id, diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js index 992f1aef7d..347e4ac7a9 100644 --- a/ui/shared/utils/constants.js +++ b/ui/shared/utils/constants.js @@ -259,6 +259,7 @@ export const FAMILY_FIELD_INTERNAL_SUMMARY = 'caseReviewSummary' export const FAMILY_FIELD_FIRST_SAMPLE = 'firstSample' export const FAMILY_FIELD_CODED_PHENOTYPE = 'codedPhenotype' export const FAMILY_FIELD_MONDO_ID = 'mondoId' +export const FAMILY_FIELD_DISCOVERY_MONDO_ID = 'postDiscoveryMondoId' export const FAMILY_FIELD_OMIM_NUMBERS = 'postDiscoveryOmimNumbers' export const FAMILY_FIELD_PMIDS = 'pubmedIds' export const FAMILY_FIELD_PEDIGREE = 'pedigreeImage' @@ -289,6 +290,7 @@ export const FAMILY_DETAIL_FIELDS = [ ...FAMILY_NOTES_FIELDS, { id: FAMILY_FIELD_CODED_PHENOTYPE }, { id: FAMILY_FIELD_MONDO_ID }, + { id: FAMILY_FIELD_DISCOVERY_MONDO_ID }, { id: FAMILY_FIELD_OMIM_NUMBERS }, { id: FAMILY_FIELD_PMIDS }, ] @@ -300,6 +302,7 @@ export const FAMILY_FIELD_NAME_LOOKUP = { [FAMILY_FIELD_MME_NOTES]: 'Matchmaker Notes', [FAMILY_FIELD_CODED_PHENOTYPE]: 'Phenotype Description', [FAMILY_FIELD_MONDO_ID]: 'MONDO ID', + [FAMILY_FIELD_DISCOVERY_MONDO_ID]: 'Post-discovery MONDO ID', [FAMILY_FIELD_OMIM_NUMBERS]: 'Post-discovery OMIM #', [FAMILY_FIELD_PMIDS]: 'Publications on this discovery', [FAMILY_FIELD_INTERNAL_NOTES]: 'Internal Notes', From ee31c989d8e604b33353dc0c07c1394bd6cfac18 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 22 May 2024 11:59:13 -0400 Subject: [PATCH 167/736] proper conditional for mondo --- ui/shared/components/panel/family/Family.jsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ui/shared/components/panel/family/Family.jsx b/ui/shared/components/panel/family/Family.jsx index edc9590776..b6036941f6 100644 --- a/ui/shared/components/panel/family/Family.jsx +++ b/ui/shared/components/panel/family/Family.jsx @@ -127,7 +127,7 @@ const FAMILY_FIELD_RENDER_LOOKUP = { [FAMILY_FIELD_DISCOVERY_MONDO_ID]: { ...MONDO_FIELD, internal: true, - canEditFamily: ({ postDiscoveryOmimOptions }) => Object.keys(postDiscoveryOmimOptions || {}).length > 0, + canEditFamily: ({ discoveryTags }) => discoveryTags?.length > 0, }, [FAMILY_FIELD_OMIM_NUMBERS]: { canEditFamily: ({ postDiscoveryOmimOptions }) => Object.keys(postDiscoveryOmimOptions || {}).length > 0, From 4ce69b638a71d9e178c9d565330c4e1fb48beebe Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 22 May 2024 12:06:02 -0400 Subject: [PATCH 168/736] use post discovery mondo id --- seqr/views/utils/anvil_metadata_utils.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index b88762aaed..3ce2855ac3 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -109,7 +109,7 @@ def _get_family_metadata(family_filter, family_fields, include_metadata, include_mondo, format_id): family_data = Family.objects.filter(**family_filter).distinct().order_by('id').values( 'id', 'family_id', 'post_discovery_omim_numbers', - *(['mondo_id'] if include_mondo else []), + *(['post_discovery_mondo_id'] if include_mondo else []), internal_project_id=F('project__name'), pmid_id=Replace('pubmed_ids__0', Value('PMID:'), Value(''), output_field=CharField()), phenotype_description=Replace( @@ -511,9 +511,9 @@ def _get_condition_map(families): mondo_ids = set() for family in families: mim_numbers.update(family['post_discovery_omim_numbers']) - if family.get('mondo_id'): - family['mondo_id'] = f"MONDO:{family['mondo_id'].replace('MONDO:', '')}" - mondo_ids.add(family['mondo_id']) + if family.get('post_discovery_mondo_id'): + family['post_discovery_mondo_id'] = f"MONDO:{family['post_discovery_mondo_id'].replace('MONDO:', '')}" + mondo_ids.add(family['post_discovery_mondo_id']) omim_conditions_by_id_gene = defaultdict(lambda: defaultdict(list)) for omim in Omim.objects.filter(phenotype_mim_number__in=mim_numbers).values( @@ -543,7 +543,7 @@ def _get_mondo_condition_data(mondo_id): def _update_conditions(family_subject_row, variants, omim_conditions, mondo_conditions, set_conditions_for_variants): - mondo_id = family_subject_row.pop('mondo_id', None) + mondo_id = family_subject_row.pop('post_discovery_mondo_id', None) mim_numbers = family_subject_row.pop('post_discovery_omim_numbers') if mim_numbers: family_conditions = [] From bb943ec8d931984556cd70fdfe58ad3d234699ce Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 22 May 2024 12:19:55 -0400 Subject: [PATCH 169/736] popualte mondo for findings table if no matched omim --- seqr/views/utils/anvil_metadata_utils.py | 55 +++++++++++------------- 1 file changed, 25 insertions(+), 30 deletions(-) diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index 3ce2855ac3..7be57cd232 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -544,42 +544,37 @@ def _get_mondo_condition_data(mondo_id): def _update_conditions(family_subject_row, variants, omim_conditions, mondo_conditions, set_conditions_for_variants): mondo_id = family_subject_row.pop('post_discovery_mondo_id', None) - mim_numbers = family_subject_row.pop('post_discovery_omim_numbers') - if mim_numbers: - family_conditions = [] - for v in variants: - variant_conditions = [ - c for mim_number in mim_numbers for c in omim_conditions[mim_number][None] - if c['chrom'] == v['chrom'] and c['start'] <= v['pos'] <= c['end'] - ] + mondo_condition = {'condition_id': mondo_id, **mondo_conditions[mondo_id]} if mondo_id else {} + mim_numbers = family_subject_row.pop('post_discovery_omim_numbers') or [] + + family_conditions = [] + for v in variants: + variant_conditions = [ + c for mim_number in mim_numbers for c in omim_conditions[mim_number][None] + if c['chrom'] == v['chrom'] and c['start'] <= v['pos'] <= c['end'] + ] + for mim_number in mim_numbers: for gene_id in v['gene_ids']: - for mim_number in mim_numbers: - variant_conditions += omim_conditions[mim_number][gene_id] - - if set_conditions_for_variants: - v.update(_format_omim_conditions(variant_conditions)) - else: - family_conditions += variant_conditions + variant_conditions += omim_conditions[mim_number][gene_id] if set_conditions_for_variants: - return + conditions = _format_omim_conditions(variant_conditions) if variant_conditions else mondo_condition + v.update(conditions) + else: + family_conditions += variant_conditions - # Preferentially include conditions associated with discovery genes/regions, but fall back to all - if not family_conditions: - family_conditions = [ - c for mim_number in mim_numbers for conditions in omim_conditions[mim_number].values() for c in conditions - ] or [{'phenotype_mim_number': mim_number} for mim_number in mim_numbers] + if set_conditions_for_variants: + return - if family_conditions: - family_subject_row.update(_format_omim_conditions(family_conditions)) + # Preferentially include conditions associated with discovery genes/regions, but fall back to all + if not family_conditions: + family_conditions = [ + c for mim_number in mim_numbers for conditions in omim_conditions[mim_number].values() for c in conditions + ] or [{'phenotype_mim_number': mim_number} for mim_number in mim_numbers] - elif mondo_id: - mondo_condition = {'condition_id': mondo_id, **mondo_conditions[mondo_id]} - if set_conditions_for_variants: - for v in variants: - v.update(mondo_condition) - else: - family_subject_row.update(mondo_condition) + family_condition = _format_omim_conditions(family_conditions) if family_conditions else mondo_condition + if family_condition: + family_subject_row.update(family_condition) def _format_omim_conditions(conditions): From bda4683dfd77f3b4d0b08e05f2999e2070d61010 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 22 May 2024 12:54:34 -0400 Subject: [PATCH 170/736] update unit tests --- seqr/fixtures/1kg_project.json | 6 ++++-- seqr/views/apis/report_api_tests.py | 7 ++++++- seqr/views/utils/test_utils.py | 2 +- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/seqr/fixtures/1kg_project.json b/seqr/fixtures/1kg_project.json index 1b317dbf0a..61dbb4ffe9 100644 --- a/seqr/fixtures/1kg_project.json +++ b/seqr/fixtures/1kg_project.json @@ -150,7 +150,8 @@ "pedigree_image": "ped_2.png", "analysis_status": "Q", "coded_phenotype": "microcephaly, seizures", - "mondo_id": "MONDO:0044970", + "mondo_id": "MONDO:0044976", + "post_discovery_mondo_id": "MONDO:0044970", "case_review_notes": "
internal notes 2
\n
 
", "case_review_summary": "
internal case review summary 2
\n
 
" } @@ -334,7 +335,8 @@ "analysis_status": "Q", "success_story": "Published with Gleeson and Reza (PMID 31668703)", "success_story_types": ["C", "D"], - "mondo_id": "0008788", + "post_discovery_mondo_id": "0008788", + "post_discovery_omim_numbers": [616126], "case_review_notes": "
case review notes for family 12
\n
    \n
  • note1
  • \n
  • note 2
  • \n
  • note 3
  • \n
", "case_review_summary": "
summary for family 12
" } diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index 6e99831378..9c58b75af1 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -1117,6 +1117,9 @@ def test_family_metadata(self): 'displayName': '12', 'solve_status': 'Unsolved', 'actual_inheritance': 'unknown', + 'condition_id': 'OMIM:616126', + 'condition_inheritance': 'Autosomal recessive', + 'known_condition_name': 'Immunodeficiency 38', 'date_data_generation': '2017-02-05', 'data_type': 'WES', 'proband_id': 'NA20889', @@ -1293,7 +1296,9 @@ def test_variant_metadata(self): **BASE_VARIANT_METADATA_ROW, 'alt': None, 'chrom': '1', - 'condition_id': 'MONDO:0008788', + 'condition_id': 'OMIM:616126', + 'condition_inheritance': 'Autosomal recessive', + 'known_condition_name': 'Immunodeficiency 38', 'displayName': '12', 'end': 249045898, 'familyGuid': 'F000012_12', diff --git a/seqr/views/utils/test_utils.py b/seqr/views/utils/test_utils.py index 3577daeac9..0145463bd5 100644 --- a/seqr/views/utils/test_utils.py +++ b/seqr/views/utils/test_utils.py @@ -750,7 +750,7 @@ def _get_list_param(call, param): 'caseReviewNotes', 'caseReviewSummary' } INTERNAL_FAMILY_FIELDS = { - 'individualGuids', 'successStory', 'successStoryTypes', 'pubmedIds', 'externalData', + 'individualGuids', 'successStory', 'successStoryTypes', 'pubmedIds', 'externalData', 'postDiscoveryMondoId' } INTERNAL_FAMILY_FIELDS.update(FAMILY_FIELDS) From b657ae14b876b78368befe6742d98eccb89ec189 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 22 May 2024 12:58:49 -0400 Subject: [PATCH 171/736] update unit tests --- seqr/views/apis/family_api_tests.py | 2 +- seqr/views/apis/summary_data_api_tests.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/seqr/views/apis/family_api_tests.py b/seqr/views/apis/family_api_tests.py index 42b3b9332c..eb22e8e85d 100644 --- a/seqr/views/apis/family_api_tests.py +++ b/seqr/views/apis/family_api_tests.py @@ -121,7 +121,7 @@ def test_family_page_data(self): response_json = response.json() self.assertSetEqual(set(response_json.keys()), response_keys) self.assertSetEqual(set(response_json['familiesByGuid'].keys()), {'F000012_12'}) - self.assertListEqual(response_json['familiesByGuid']['F000012_12']['postDiscoveryOmimNumbers'], []) + self.assertListEqual(response_json['familiesByGuid']['F000012_12']['postDiscoveryOmimNumbers'], [616126]) self.assertDictEqual(response_json['familiesByGuid']['F000012_12']['postDiscoveryOmimOptions'], {'616126': { 'phenotypeMimNumber': 616126, 'phenotypes': [{ 'chrom': '1', diff --git a/seqr/views/apis/summary_data_api_tests.py b/seqr/views/apis/summary_data_api_tests.py index 6a4a040932..06a1eedbfe 100644 --- a/seqr/views/apis/summary_data_api_tests.py +++ b/seqr/views/apis/summary_data_api_tests.py @@ -107,6 +107,9 @@ 'notes-2': None, 'tags-1': ['Tier 1 - Novel gene and phenotype'], 'tags-2': ['Tier 1 - Novel gene and phenotype'], + 'condition_id': 'OMIM:616126', + 'condition_inheritance': 'Autosomal recessive', + 'known_condition_name': 'Immunodeficiency 38', } EXPECTED_SAMPLE_METADATA_ROW = { "dbgap_submission": "No", From 6118736dc3c0167ef213191a438d1f2705335a9f Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 22 May 2024 13:57:51 -0400 Subject: [PATCH 172/736] show transcriptId with version --- ui/shared/components/panel/variants/VariantUtils.jsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ui/shared/components/panel/variants/VariantUtils.jsx b/ui/shared/components/panel/variants/VariantUtils.jsx index 82cc5a0b21..663ff8b58c 100644 --- a/ui/shared/components/panel/variants/VariantUtils.jsx +++ b/ui/shared/components/panel/variants/VariantUtils.jsx @@ -13,7 +13,7 @@ const SequenceContainer = styled.span` export const TranscriptLink = styled.a.attrs(({ variant, transcript }) => ({ target: '_blank', href: `http://${variant.genomeVersion === GENOME_VERSION_37 ? 'grch37' : 'useast'}.ensembl.org/Homo_sapiens/Transcript/Summary?t=${transcript.transcriptId}`, - children: transcript.transcriptId, + children: transcript.hgvsc?.startsWith(transcript.transcriptId) ? transcript.hgvsc.split(':')[0] : transcript.transcriptId, }))` font-size: 1.3em; font-weight: normal; From ce7064ac39999ab61cb4610213d2a2aefa1f31ab Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 22 May 2024 14:07:53 -0400 Subject: [PATCH 173/736] fix js bug during page load --- ui/pages/Project/selectors.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ui/pages/Project/selectors.js b/ui/pages/Project/selectors.js index 666756d7ee..9f01266014 100644 --- a/ui/pages/Project/selectors.js +++ b/ui/pages/Project/selectors.js @@ -406,7 +406,7 @@ export const getVisibleFamilies = createSelector( return familyFilter ? searchedFamilies.filter(family => familyFilter({ ...family, - individuals: family?.individualGuids.map(individualGuid => (individualsByGuid[individualGuid])), + individuals: family?.individualGuids?.map(individualGuid => (individualsByGuid[individualGuid])), })) : searchedFamilies }, ) From 777c68e3c11f359c5476f35024f6896ae0418b7e Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 24 May 2024 11:42:11 -0400 Subject: [PATCH 174/736] bumop changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 20802a294c..64626f8078 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ # _seqr_ Changes ## dev + +## 5/24/24 * Adds external_data to Family model (REQUIRES DB MIGRATION) * Adds post_discovery_mondo_id to Family model (REQUIRES DB MIGRATION) * Adds guid and created fields to PhenotypePrioritization model (REQUIRES DB MIGRATION) From b76aa0cef9ca8c6337ce16d4bdc1cf2ce42511f7 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 24 May 2024 11:51:08 -0400 Subject: [PATCH 175/736] codacy cleanup --- seqr/views/apis/project_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seqr/views/apis/project_api.py b/seqr/views/apis/project_api.py index 30a1b61182..31cddd687f 100644 --- a/seqr/views/apis/project_api.py +++ b/seqr/views/apis/project_api.py @@ -6,7 +6,7 @@ from collections import defaultdict from django.contrib.postgres.aggregates import ArrayAgg from django.core.exceptions import PermissionDenied -from django.db.models import Count, Max, Q, F, Case, When, Value +from django.db.models import Count, Max, Q, F, When, Value from django.db.models.functions import JSONObject, TruncDate from django.utils import timezone from notifications.models import Notification From f0130ef84c178813d01420a8183675772cbb747a Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 24 May 2024 12:07:04 -0400 Subject: [PATCH 176/736] codacy cleanup --- seqr/views/apis/project_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seqr/views/apis/project_api.py b/seqr/views/apis/project_api.py index 31cddd687f..e232f8fb58 100644 --- a/seqr/views/apis/project_api.py +++ b/seqr/views/apis/project_api.py @@ -6,7 +6,7 @@ from collections import defaultdict from django.contrib.postgres.aggregates import ArrayAgg from django.core.exceptions import PermissionDenied -from django.db.models import Count, Max, Q, F, When, Value +from django.db.models import Count, Max, Q, F, Value from django.db.models.functions import JSONObject, TruncDate from django.utils import timezone from notifications.models import Notification From b626ae646d937d36bb39614725355a6f818b34eb Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Mon, 27 May 2024 16:51:49 -0400 Subject: [PATCH 177/736] update image tags in dev-broad-seqr chart --- .github/workflows/dev-hail-search-release.yaml | 4 ++-- .github/workflows/dev-release.yaml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/dev-hail-search-release.yaml b/.github/workflows/dev-hail-search-release.yaml index 18bad94549..9507e2b93d 100644 --- a/.github/workflows/dev-hail-search-release.yaml +++ b/.github/workflows/dev-hail-search-release.yaml @@ -47,11 +47,11 @@ jobs: persist-credentials: false fetch-depth: 0 - - name: update image tag in the broad seqr chart + - name: update image tag in the dev broad seqr chart uses: mikefarah/yq@v4.22.1 with: cmd: > - yq -i '.hail-search.image.tag = "${{ github.event.workflow_run.head_sha }}"' charts/broad-seqr/values-dev.yaml + yq -i '.hail-search.image.tag = "${{ github.event.workflow_run.head_sha }}"' charts/dev-broad-seqr/values.yaml - name: Commit and Push changes uses: Andro999b/push@v1.3 diff --git a/.github/workflows/dev-release.yaml b/.github/workflows/dev-release.yaml index 7df887327d..193110b0d0 100644 --- a/.github/workflows/dev-release.yaml +++ b/.github/workflows/dev-release.yaml @@ -47,11 +47,11 @@ jobs: persist-credentials: false fetch-depth: 0 - - name: update image tag in the broad seqr chart + - name: update image tag in the dev broad seqr chart uses: mikefarah/yq@v4.22.1 with: cmd: > - yq -i '.seqr.image.tag = "${{ github.event.workflow_run.head_sha }}"' charts/broad-seqr/values-dev.yaml + yq -i '.seqr.image.tag = "${{ github.event.workflow_run.head_sha }}"' charts/dev-broad-seqr/values.yaml - name: Commit and Push changes uses: Andro999b/push@v1.3 From 0b6d5fab730b4b6a95c447ea5ea4e3d433c08053 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 28 May 2024 13:27:48 -0400 Subject: [PATCH 178/736] limit concurrent requests during RNA loading --- ui/pages/DataManagement/reducers.js | 35 ++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/ui/pages/DataManagement/reducers.js b/ui/pages/DataManagement/reducers.js index ba672bfef0..3d58424007 100644 --- a/ui/pages/DataManagement/reducers.js +++ b/ui/pages/DataManagement/reducers.js @@ -54,7 +54,17 @@ export const uploadQcPipelineOutput = values => submitRequest( export const deleteEsIndex = index => submitRequest('delete_index', RECEIVE_ELASTICSEARCH_STATUS, { index }) -const loadMultipleData = (path, getUpdateData, dispatchType, formatSuccessMessage) => values => (dispatch) => { +const executeMultipleRequests = (requests, onSuccess, warnings) => Promise.all(requests.map( + ([entityUrl, entityId, body]) => new HttpRequestHelper( + entityUrl, + onSuccess, + e => warnings.push(`Error loading ${entityId}: ${e.body && e.body.error ? e.body.error : e.message}`), + ).post(body), +)) + +const loadMultipleData = ( + path, getUpdateData, dispatchType, formatSuccessMessage, maxConcurrentRequests = 50, +) => values => (dispatch) => { let successResponseJson = null return new HttpRequestHelper( `/api/data_management/${path}`, @@ -64,15 +74,19 @@ const loadMultipleData = (path, getUpdateData, dispatchType, formatSuccessMessag ).post(values).then(() => { const { info, warnings } = successResponseJson let numLoaded = 0 - return Promise.all(getUpdateData(successResponseJson, values).map( - ([entityUrl, entityId, body]) => new HttpRequestHelper( - entityUrl, - () => { - numLoaded += 1 - }, - e => warnings.push(`Error loading ${entityId}: ${e.body && e.body.error ? e.body.error : e.message}`), - ).post(body), - )).then(() => { + const updateData = getUpdateData(successResponseJson, values) + return updateData.reduce((prevPromise, item, index) => { + if (index % maxConcurrentRequests === 0) { + return prevPromise.then(() => executeMultipleRequests( + updateData.slice(index, index + maxConcurrentRequests), + () => { + numLoaded += 1 + }, + warnings, + )) + } + return prevPromise + }, Promise.resolve()).then(() => { info.push(formatSuccessMessage(numLoaded)) dispatch({ type: dispatchType, newValue: { info, warnings } }) }) @@ -86,6 +100,7 @@ export const uploadRnaSeq = loadMultipleData( ])), RECEIVE_RNA_SEQ_UPLOAD_STATS, numLoaded => `Successfully loaded data for ${numLoaded} RNA-seq samples`, + 10, ) export const addIgv = loadMultipleData( From 346ef0c45c2f75058902b0e1313c711765474c88 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Tue, 28 May 2024 13:53:31 -0400 Subject: [PATCH 179/736] add tenacity retry to panelapp function --- panelapp/pa_locus_list_api_tests.py | 25 +++++++++++++++++++++++++ panelapp/panelapp_utils.py | 7 +++++++ requirements.in | 1 + requirements.txt | 2 ++ 4 files changed, 35 insertions(+) diff --git a/panelapp/pa_locus_list_api_tests.py b/panelapp/pa_locus_list_api_tests.py index 9c1acd2555..fa121c76b9 100644 --- a/panelapp/pa_locus_list_api_tests.py +++ b/panelapp/pa_locus_list_api_tests.py @@ -1,9 +1,15 @@ import json +from collections import defaultdict + import mock import responses +import tenacity from django.core.management import call_command, CommandError from django.urls.base import reverse +from requests import Response +from urllib3.exceptions import MaxRetryError +from panelapp.panelapp_utils import _get_all_genes from seqr.views.apis.locus_list_api import locus_lists, locus_list_info from seqr.views.apis.locus_list_api_tests import BaseLocusListAPITest from seqr.views.utils.test_utils import AuthenticationTestCase, LOCUS_LIST_FIELDS @@ -165,3 +171,22 @@ def test_delete_all_panels(self): self.assertEqual(response.status_code, 200) locus_lists_dict = response.json()['locusListsByGuid'] self.assertSetEqual(set(locus_lists_dict.keys()), {LOCUS_LIST_GUID}) + + @mock.patch("panelapp.panelapp_utils.requests.get") + def test_get_all_genes_exhausts_retries(self, mock_get_request): + url = '{}/genes/?page=1'.format(PANEL_APP_API_URL_UK) + request_error = MaxRetryError(pool=mock.MagicMock(), url=url) + mock_get_request.side_effect = [request_error] * 5 + with self.assertRaises(tenacity.RetryError): + _get_all_genes(url, defaultdict(list)) + + @mock.patch("panelapp.panelapp_utils.requests.get") + def test_get_all_genes_retries_success(self, mock_get_request): + url = '{}/genes/?page=1'.format(PANEL_APP_API_URL_UK) + valid_response = Response() + valid_response.status_code = 200 + valid_response._content = b'{"results": [{"panel": {"id": 1207, "name": "Acute intermittent porphyria"}}]}' + request_error = MaxRetryError(pool=mock.MagicMock(), url=url) + mock_get_request.side_effect = [request_error] * 4 + [valid_response] + expected_res = {1207: [{'panel': {'id': 1207, 'name': 'Acute intermittent porphyria'}}]} + self.assertEqual(_get_all_genes(url, defaultdict(list)), expected_res) diff --git a/panelapp/panelapp_utils.py b/panelapp/panelapp_utils.py index edad4c9ec5..ef6e778b24 100644 --- a/panelapp/panelapp_utils.py +++ b/panelapp/panelapp_utils.py @@ -3,6 +3,8 @@ import requests from django.db import transaction from django.utils import timezone +from tenacity import retry, wait_exponential, stop_after_attempt, retry_if_exception_type +from urllib3.exceptions import MaxRetryError from panelapp.models import PaLocusList, PaLocusListGene from seqr.models import LocusList as SeqrLocusList, LocusListGene as SeqrLocusListGene @@ -113,6 +115,11 @@ def _get_all_panels(panels_url, all_results): return _get_all_panels(next_page, all_results) +@retry( + retry=retry_if_exception_type(MaxRetryError), + wait=wait_exponential(multiplier=1, min=4, max=10), + stop=stop_after_attempt(5), +) def _get_all_genes(genes_url: str, results_by_panel_id: dict): resp = requests.get(genes_url, timeout=REQUEST_TIMEOUT_S) resp_json = resp.json() diff --git a/requirements.in b/requirements.in index 647c5c36a7..9ddc62799f 100644 --- a/requirements.in +++ b/requirements.in @@ -25,3 +25,4 @@ tqdm # convenient way to create progress bar for lo whitenoise feedparser markdownify +tenacity diff --git a/requirements.txt b/requirements.txt index 916ad0153b..d825a8bb0c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -171,6 +171,8 @@ sqlparse==0.5.0 # via django swapper==1.3.0 # via django-notifications-hq +tenacity==8.3.0 + # via -r requirements.in tqdm==4.66.3 # via -r requirements.in urllib3==1.26.18 From 27f1a7779948280cfc28a9690cab5b1403463bc2 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 28 May 2024 15:51:24 -0400 Subject: [PATCH 180/736] conditioanlly persist temp files to gs --- seqr/views/apis/data_manager_api.py | 14 ++++----- seqr/views/utils/file_utils.py | 46 ++++++++++++++++++++--------- 2 files changed, 38 insertions(+), 22 deletions(-) diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index b3e4f91d72..a8b5cbc5a6 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -24,7 +24,7 @@ from seqr.views.utils.airtable_utils import AirtableSession from seqr.views.utils.dataset_utils import load_rna_seq, load_phenotype_prioritization_data_file, RNA_DATA_TYPE_CONFIGS, \ post_process_rna_data -from seqr.views.utils.file_utils import parse_file, get_temp_upload_directory, load_uploaded_file +from seqr.views.utils.file_utils import parse_file, get_temp_file_path, load_uploaded_file, persist_temp_file from seqr.views.utils.json_utils import create_json_response from seqr.views.utils.json_to_orm_utils import update_model_from_json from seqr.views.utils.permissions_utils import data_manager_required, pm_or_data_manager_required, get_internal_projects @@ -35,8 +35,6 @@ logger = SeqrLogger(__name__) -TEMP_GS_BUCKET = 'gs://seqr-scratch-temp' - @data_manager_required def elasticsearch_status(request): @@ -276,7 +274,7 @@ def update_rna_seq(request): mapping_file = load_uploaded_file(uploaded_mapping_file_id) file_name_prefix = f'rna_sample_data__{data_type}__{datetime.now().isoformat()}' - file_dir = os.path.join(get_temp_upload_directory(), file_name_prefix) + file_dir = get_temp_file_path(file_name_prefix) os.mkdir(file_dir) sample_files = {} @@ -302,7 +300,7 @@ def _save_sample_data(sample_key, sample_data): ) if sample_guids_to_keys: - mv_file_to_gs(f'{file_dir}/*', f'{TEMP_GS_BUCKET}/{file_name_prefix}', request.user) + persist_temp_file(file_name_prefix, request.user, is_directory=True) return create_json_response({ 'info': info, @@ -326,9 +324,9 @@ def load_rna_seq_sample_data(request, sample_guid): data_type = request_json['dataType'] config = RNA_DATA_TYPE_CONFIGS[data_type] - gs_file_name = f'{TEMP_GS_BUCKET}/{file_name}/{sample_guid}.json.gz' - if does_file_exist(gs_file_name, user=request.user): - data_rows = [json.loads(line) for line in file_iter(gs_file_name, user=request.user)] + file_path = get_temp_file_path(f'{file_name}/{sample_guid}.json.gz') + if does_file_exist(file_path, user=request.user): + data_rows = [json.loads(line) for line in file_iter(file_path, user=request.user)] data_rows, error = post_process_rna_data(sample_guid, data_rows, **config.get('post_process_kwargs', {})) else: logger.error(f'No saved temp data found for {sample_guid} with file prefix {file_name}', request.user) diff --git a/seqr/views/utils/file_utils.py b/seqr/views/utils/file_utils.py index 5c562c67be..bec83883b7 100644 --- a/seqr/views/utils/file_utils.py +++ b/seqr/views/utils/file_utils.py @@ -9,11 +9,15 @@ import tempfile import openpyxl as xl +from seqr.utils.file_utils import mv_file_to_gs, file_iter from seqr.views.utils.json_utils import create_json_response from seqr.views.utils.permissions_utils import login_and_policies_required +from seqr.views.utils.terra_api_utils import anvil_enabled logger = logging.getLogger(__name__) +TEMP_GS_BUCKET = 'gs://seqr-scratch-temp' + @login_and_policies_required def save_temp_file(request): @@ -77,22 +81,25 @@ def _parse_excel_string_cell(cell): cell_value = '{:.0f}'.format(cell_value) return cell_value or '' -def get_temp_upload_directory(): + +def get_temp_file_path(file_name, is_local=None): + if is_local is None: + is_local = not anvil_enabled() + if not is_local: + return f'{TEMP_GS_BUCKET}/{file_name}' + upload_directory = os.path.join(tempfile.gettempdir(), 'temp_uploads') if not os.path.isdir(upload_directory): - logger.debug("Creating directory: " + upload_directory) os.makedirs(upload_directory) - return upload_directory -def _compute_serialized_file_path(uploaded_file_id): - """Compute local file path, and make sure the directory exists""" + return os.path.join(upload_directory, file_name) - upload_directory = get_temp_upload_directory() - return os.path.join(upload_directory, "temp_upload_{}.json.gz".format(uploaded_file_id)) +def _compute_serialized_file_name(uploaded_file_id): + return f'temp_upload_{uploaded_file_id}.json.gz' -def save_uploaded_file(request, process_records=None, allow_json=False): +def save_uploaded_file(request, user, process_records=None, allow_json=False): if len(request.FILES) != 1: raise ValueError("Received %s files instead of 1" % len(request.FILES)) @@ -110,16 +117,27 @@ def save_uploaded_file(request, process_records=None, allow_json=False): # save json to temporary file uploaded_file_id = hashlib.md5(str(json_records).encode('utf-8')).hexdigest() # nosec - serialized_file_path = _compute_serialized_file_path(uploaded_file_id) + file_name = _compute_serialized_file_name(uploaded_file_id) + serialized_file_path = get_temp_file_path(file_name, is_local=True) with gzip.open(serialized_file_path, 'wt') as f: json.dump(json_records, f) + persist_temp_file(file_name, user) + return uploaded_file_id, filename, json_records -def load_uploaded_file(upload_file_id): - serialized_file_path = _compute_serialized_file_path(upload_file_id) - with gzip.open(serialized_file_path, "rt") as f: - json_records = json.load(f) +def persist_temp_file(file_name, user, is_directory=False): + if not anvil_enabled(): + return + + src_path = get_temp_file_path(file_name, is_local=True) + dest_path = get_temp_file_path(file_name, is_local=False) + if is_directory: + src_path = f'{src_path}/*' + mv_file_to_gs(src_path, dest_path, user) - return json_records + +def load_uploaded_file(upload_file_id): + serialized_file_path = get_temp_file_path(_compute_serialized_file_name(upload_file_id)) + return json.loads(next(file_iter(serialized_file_path))) From e93540ea4f7f0697d2182e3d862b749807e69b2e Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 28 May 2024 17:09:01 -0400 Subject: [PATCH 181/736] add tests for data manager api --- seqr/views/apis/data_manager_api.py | 4 +- seqr/views/apis/data_manager_api_tests.py | 127 +++++++++++++++++----- 2 files changed, 99 insertions(+), 32 deletions(-) diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index a8b5cbc5a6..5b07ab4551 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -16,7 +16,7 @@ from seqr.utils.communication_utils import send_project_notification from seqr.utils.search.utils import get_search_backend_status, delete_search_backend_data -from seqr.utils.file_utils import file_iter, does_file_exist, mv_file_to_gs +from seqr.utils.file_utils import file_iter, does_file_exist from seqr.utils.logging_utils import SeqrLogger from seqr.utils.vcf_utils import validate_vcf_exists @@ -274,7 +274,7 @@ def update_rna_seq(request): mapping_file = load_uploaded_file(uploaded_mapping_file_id) file_name_prefix = f'rna_sample_data__{data_type}__{datetime.now().isoformat()}' - file_dir = get_temp_file_path(file_name_prefix) + file_dir = get_temp_file_path(file_name_prefix, is_local=True) os.mkdir(file_dir) sample_files = {} diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index ddae103c5b..afb5c69aec 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -11,7 +11,7 @@ update_rna_seq, load_rna_seq_sample_data, load_phenotype_prioritization_data, write_pedigree, validate_callset, \ get_loaded_projects, load_data from seqr.views.utils.orm_to_json_utils import _get_json_for_models -from seqr.views.utils.test_utils import AuthenticationTestCase, AirflowTestCase, AirtableTest +from seqr.views.utils.test_utils import AuthenticationTestCase, AnvilAuthenticationTestCase, AirflowTestCase, AirtableTest from seqr.utils.search.elasticsearch.es_utils_tests import urllib3_responses from seqr.models import Individual, RnaSeqOutlier, RnaSeqTpm, RnaSeqSpliceOutlier, Sample, Project, PhenotypePrioritization from settings import SEQR_SLACK_LOADING_NOTIFICATION_CHANNEL @@ -439,8 +439,7 @@ @mock.patch('seqr.views.utils.permissions_utils.PM_USER_GROUP', 'project-managers') -class DataManagerAPITest(AuthenticationTestCase, AirtableTest): - fixtures = ['users', '1kg_project', 'reference_data'] +class DataManagerAPITest(AirtableTest): @mock.patch('seqr.utils.search.elasticsearch.es_utils.ELASTICSEARCH_SERVICE_HOSTNAME', 'testhost') @urllib3_responses.activate @@ -879,7 +878,7 @@ def test_update_rna_splice_outlier(self, *args, **kwargs): @mock.patch('seqr.views.utils.dataset_utils.BASE_URL', 'https://test-seqr.org/') @mock.patch('seqr.views.utils.dataset_utils.SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL', 'seqr-data-loading') - @mock.patch('seqr.views.apis.data_manager_api.get_temp_upload_directory', lambda: 'tmp/') + @mock.patch('seqr.views.utils.file_utils.tempfile.gettempdir', lambda: 'tmp/') @mock.patch('seqr.views.utils.dataset_utils.safe_post_to_slack') @mock.patch('seqr.views.apis.data_manager_api.datetime') @mock.patch('seqr.views.apis.data_manager_api.os.mkdir') @@ -1054,15 +1053,18 @@ def _test_basic_data_loading(data, num_parsed_samples, num_loaded_samples, new_s # test correct file interactions file_path = RNA_FILENAME_TEMPLATE.format(data_type) - mock_subprocess.assert_has_calls([mock.call(command, stdout=-1, stderr=-2, shell=True) for command in [ # nosec + expected_subprocess_calls = [ f'gsutil ls {RNA_FILE_ID}', f'gsutil cat {RNA_FILE_ID} | gunzip -c -q - ', - f'gsutil mv tmp/{file_path}/* gs://seqr-scratch-temp/{file_path}', - ]]) - mock_mkdir.assert_called_with(f'tmp/{file_path}') - filename = f'tmp/{file_path}/{new_sample_guid}.json.gz' + ] + self._additional_expected_loading_subprocess_calls(file_path) + self.assertEqual(mock_subprocess.call_count, len(expected_subprocess_calls)) + mock_subprocess.assert_has_calls([ + mock.call(command, stdout=-1, stderr=-2, shell=True) for command in expected_subprocess_calls # nosec + ]) + mock_mkdir.assert_any_call(f'tmp/temp_uploads/{file_path}') + filename = f'tmp/temp_uploads/{file_path}/{new_sample_guid}.json.gz' expected_files = { - f'tmp/{file_path}/{new_sample_guid if sample_guid == PLACEHOLDER_GUID else sample_guid}.json.gz': data + f'tmp/temp_uploads/{file_path}/{new_sample_guid if sample_guid == PLACEHOLDER_GUID else sample_guid}.json.gz': data for sample_guid, data in params['parsed_file_data'].items() } self.assertIn(filename, expected_files) @@ -1103,7 +1105,7 @@ def _test_basic_data_loading(data, num_parsed_samples, num_loaded_samples, new_s self.assertTrue(second_tissue_sample_guid != new_sample_guid) self.assertTrue(second_tissue_sample_guid in response_json['sampleGuids']) self._assert_expected_file_open(mock_rename, mock_open, [ - f'tmp/{RNA_FILENAME_TEMPLATE.format(data_type)}/{sample_guid}.json.gz' + f'tmp/temp_uploads/{RNA_FILENAME_TEMPLATE.format(data_type)}/{sample_guid}.json.gz' for sample_guid in response_json['sampleGuids'] ]) self.assertSetEqual( @@ -1111,14 +1113,20 @@ def _test_basic_data_loading(data, num_parsed_samples, num_loaded_samples, new_s params['write_data'], ) + @staticmethod + def _additional_expected_loading_subprocess_calls(file_path): + return [] + + def _get_expected_read_file_subprocess_calls(self, file_name, sample_guid): + return [] + def _assert_expected_file_open(self, mock_rename, mock_open, expected_file_names): file_rename = {call.args[1]: call.args[0] for call in mock_rename.call_args_list} self.assertSetEqual(set(expected_file_names), set(file_rename.keys())) mock_open.assert_has_calls([mock.call(file_rename[filename], 'at') for filename in expected_file_names]) return file_rename - @mock.patch('seqr.utils.file_utils.subprocess.Popen') - def test_load_rna_seq_sample_data(self, mock_subprocess): + def test_load_rna_seq_sample_data(self): url = reverse(load_rna_seq_sample_data, args=[RNA_MUSCLE_SAMPLE_GUID]) self.check_pm_login(url) @@ -1132,12 +1140,8 @@ def test_load_rna_seq_sample_data(self, mock_subprocess): self.reset_logs() parsed_file_lines = params['parsed_file_data'][sample_guid].strip().split('\n') - mock_does_file_exist = mock.MagicMock() - mock_does_file_exist.wait.return_value = 1 - mock_does_file_exist.stdout = [b'CommandException: One or more URLs matched no objects'] - mock_subprocess.side_effect = [mock_does_file_exist] - file_name = RNA_FILENAME_TEMPLATE.format(data_type) + not_found_logs = self._set_file_not_found(file_name, sample_guid) body = {'fileName': file_name, 'dataType': data_type} response = self.client.post(url, content_type='application/json', data=json.dumps(body)) @@ -1145,17 +1149,14 @@ def test_load_rna_seq_sample_data(self, mock_subprocess): self.assertDictEqual(response.json(), {'error': 'Data for this sample was not properly parsed. Please re-upload the data'}) self.assert_json_logs(self.pm_user, [ (f'Loading outlier data for {params["loaded_data_row"][0]}', None), - (f'==> gsutil ls gs://seqr-scratch-temp/{file_name}/{sample_guid}.json.gz', None), - ('CommandException: One or more URLs matched no objects', None), + *not_found_logs, (f'No saved temp data found for {sample_guid} with file prefix {file_name}', { 'severity': 'ERROR', '@type': 'type.googleapis.com/google.devtools.clouderrorreporting.v1beta1.ReportedErrorEvent', }), ]) - mock_does_file_exist.wait.return_value = 0 - mock_file_iter = mock.MagicMock() - mock_file_iter.stdout = [row.encode('utf-8') for row in parsed_file_lines] - mock_subprocess.side_effect = [mock_does_file_exist, mock_file_iter] + self._add_file_iter([row.encode('utf-8') for row in parsed_file_lines]) + self.reset_logs() response = self.client.post(url, content_type='application/json', data=json.dumps(body)) self.assertEqual(response.status_code, 200) @@ -1167,13 +1168,11 @@ def test_load_rna_seq_sample_data(self, mock_subprocess): self.assertSetEqual({model.sample.guid for model in models}, {sample_guid}) self.assertTrue(all(model.sample.is_active for model in models)) - gsutil_cat = f'gsutil cat gs://seqr-scratch-temp/{file_name}/{sample_guid}.json.gz | gunzip -c -q - ' - mock_subprocess.assert_called_with(gsutil_cat, stdout=-1, stderr=-2, shell=True) # nosec + subprocess_logs = self._get_expected_read_file_subprocess_calls(file_name, sample_guid) self.assert_json_logs(self.pm_user, [ (f'Loading outlier data for {params["loaded_data_row"][0]}', None), - (f'==> gsutil ls gs://seqr-scratch-temp/{file_name}/{sample_guid}.json.gz', None), - (f'==> {gsutil_cat}', None), + *subprocess_logs, (f'create {model_cls.__name__}s', {'dbUpdate': { 'dbEntity': model_cls.__name__, 'numEntities': num_models, 'parentEntityIds': [sample_guid], 'updateType': 'bulk_create', @@ -1183,8 +1182,7 @@ def test_load_rna_seq_sample_data(self, mock_subprocess): self.assertListEqual(list(params['get_models_json'](models)), params['expected_models_json']) mismatch_row = {**json.loads(parsed_file_lines[0]), params.get('mismatch_field', 'p_value'): '0.05'} - mock_file_iter.stdout += [json.dumps(mismatch_row).encode('utf-8')] - mock_subprocess.side_effect = [mock_does_file_exist, mock_file_iter] + self._add_file_iter([json.dumps(mismatch_row).encode('utf-8')]) response = self.client.post(url, content_type='application/json', data=json.dumps(body)) self.assertEqual(response.status_code, 400) self.assertDictEqual(response.json(), { @@ -1471,6 +1469,75 @@ def test_get_loaded_projects(self): self.assertDictEqual(response.json(), {'projects': [EMPTY_PROJECT_OPTION]}) +class LocalDataManagerAPITest(AuthenticationTestCase, DataManagerAPITest): + fixtures = ['users', '1kg_project', 'reference_data'] + + def setUp(self): + patcher = mock.patch('seqr.utils.file_utils.os.path.isfile') + self.mock_does_file_exist = patcher.start() + self.addCleanup(patcher.stop) + patcher = mock.patch('seqr.utils.file_utils.gzip.open') + self.mock_open = patcher.start() + self.mock_file_iter = self.mock_open.return_value.__enter__.return_value.__iter__ + self.mock_file_iter.return_value = [] + self.addCleanup(patcher.stop) + super().setUp() + + def _set_file_not_found(self, file_name, sample_guid): + self.mock_does_file_exist.return_value = False + self.mock_file_iter.return_value = [] + return [] + + def _add_file_iter(self, stdout): + self.mock_does_file_exist.return_value = True + self.mock_file_iter.return_value += stdout + + +class AnvilDataManagerAPITest(AnvilAuthenticationTestCase, DataManagerAPITest): + fixtures = ['users', '1kg_project', 'reference_data'] + + def setUp(self): + patcher = mock.patch('seqr.utils.file_utils.subprocess.Popen') + self.mock_subprocess = patcher.start() + self.mock_does_file_exist = mock.MagicMock() + self.mock_file_iter = mock.MagicMock() + self.mock_file_iter.stdout = [] + self.mock_subprocess.side_effect = [self.mock_does_file_exist, self.mock_file_iter] + self.addCleanup(patcher.stop) + super().setUp() + + def _set_file_not_found(self, file_name, sample_guid): + self.mock_file_iter.stdout = [] + self.mock_does_file_exist.wait.return_value = 1 + self.mock_does_file_exist.stdout = [b'CommandException: One or more URLs matched no objects'] + self.mock_subprocess.side_effect = [self.mock_does_file_exist] + return [ + (f'==> gsutil ls gs://seqr-scratch-temp/{file_name}/{sample_guid}.json.gz', None), + ('CommandException: One or more URLs matched no objects', None), + ] + + def _add_file_iter(self, stdout): + self.mock_does_file_exist.wait.return_value = 0 + self.mock_file_iter.stdout += stdout + self.mock_subprocess.side_effect = [self.mock_does_file_exist, self.mock_file_iter] + + def _get_expected_read_file_subprocess_calls(self, file_name, sample_guid): + gsutil_cat = f'gsutil cat gs://seqr-scratch-temp/{file_name}/{sample_guid}.json.gz | gunzip -c -q - ' + self.mock_subprocess.assert_called_with(gsutil_cat, stdout=-1, stderr=-2, shell=True) # nosec + return [ + (f'==> gsutil ls gs://seqr-scratch-temp/{file_name}/{sample_guid}.json.gz', None), + (f'==> {gsutil_cat}', None), + ] + + @staticmethod + def _additional_expected_loading_subprocess_calls(file_path): + return [f'gsutil mv tmp/temp_uploads/{file_path}/* gs://seqr-scratch-temp/{file_path}'] + + def test_get_loaded_projects(self, *args, **kwargs): + # Test relies on the local-only project data, and has no real difference for local/ non-local behavior + pass + + @mock.patch('seqr.views.utils.permissions_utils.PM_USER_GROUP', 'project-managers') class LoadDataAPITest(AirflowTestCase): fixtures = ['users', 'social_auth', '1kg_project'] From 6b861dc62132d68243546466ab0d24ae398f9128 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 28 May 2024 17:09:50 -0400 Subject: [PATCH 182/736] fix syntax --- seqr/views/utils/file_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/seqr/views/utils/file_utils.py b/seqr/views/utils/file_utils.py index bec83883b7..5c08c7e2e8 100644 --- a/seqr/views/utils/file_utils.py +++ b/seqr/views/utils/file_utils.py @@ -99,7 +99,7 @@ def _compute_serialized_file_name(uploaded_file_id): return f'temp_upload_{uploaded_file_id}.json.gz' -def save_uploaded_file(request, user, process_records=None, allow_json=False): +def save_uploaded_file(request, process_records=None, allow_json=False): if len(request.FILES) != 1: raise ValueError("Received %s files instead of 1" % len(request.FILES)) @@ -122,7 +122,7 @@ def save_uploaded_file(request, user, process_records=None, allow_json=False): with gzip.open(serialized_file_path, 'wt') as f: json.dump(json_records, f) - persist_temp_file(file_name, user) + persist_temp_file(file_name, request.user) return uploaded_file_id, filename, json_records From f035ba0057ea484d989a89a7a80e623103389018 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 29 May 2024 11:19:36 -0400 Subject: [PATCH 183/736] test gs file interactions for individual temp files --- seqr/views/apis/individual_api_tests.py | 37 +++++++++++++++++++++++++ seqr/views/utils/file_utils_tests.py | 1 + 2 files changed, 38 insertions(+) diff --git a/seqr/views/apis/individual_api_tests.py b/seqr/views/apis/individual_api_tests.py index b83b2d47a2..1a4c01a700 100644 --- a/seqr/views/apis/individual_api_tests.py +++ b/seqr/views/apis/individual_api_tests.py @@ -1,7 +1,9 @@ # -*- coding: utf-8 -*- import datetime +import gzip import json import mock +import re from copy import deepcopy from django.core.files.uploadedfile import SimpleUploadedFile @@ -1321,6 +1323,14 @@ class LocalIndividualAPITest(AuthenticationTestCase, IndividualAPITest): fixtures = ['users', '1kg_project', 'reference_data'] HAS_EXTERNAL_PROJECT_ACCESS = False + def setUp(self): + patcher = mock.patch('seqr.utils.file_utils.subprocess.Popen') + _mock_subprocess = patcher.start() + _mock_subprocess.side_effect = Exception('Calling gs from local') + self.addCleanup(patcher.stop) + + super().setUp() + def test_import_gregor_metadata(self, *args): # Importing gregor metadata does not work in local environment pass @@ -1329,3 +1339,30 @@ def test_import_gregor_metadata(self, *args): class AnvilIndividualAPITest(AnvilAuthenticationTestCase, IndividualAPITest): fixtures = ['users', 'social_auth', '1kg_project', 'reference_data'] HAS_EXTERNAL_PROJECT_ACCESS = True + + def setUp(self): + patcher = mock.patch('seqr.utils.file_utils.subprocess.Popen') + _mock_subprocess = patcher.start() + self.addCleanup(patcher.stop) + + self.mock_subprocess = mock.MagicMock() + self.mock_subprocess.wait.return_value = 0 + self.mock_subprocess.stdout.__iter__.return_value = [] + self.gs_files = {} + _mock_subprocess.side_effect = self._mock_subprocess + + super().setUp() + + def _mock_subprocess(self, command, **kwargs): + command_args = re.match( + r'gsutil (?Pcat|mv)(?P \S+)? gs://seqr-scratch-temp/(?P\S+)', command, + ).groupdict() + file_name = command_args['gs_path'] + if command_args['cmd'] == 'mv': + src_path = command_args['local_path'].strip() + self.assertEqual(src_path.split('/')[-1], file_name) + with gzip.open(src_path) as f: + self.gs_files[file_name] = f.readlines() + else: + self.mock_subprocess.stdout.__iter__.return_value = self.gs_files[file_name] + return self.mock_subprocess diff --git a/seqr/views/utils/file_utils_tests.py b/seqr/views/utils/file_utils_tests.py index 0bc80c59a3..02304f3259 100644 --- a/seqr/views/utils/file_utils_tests.py +++ b/seqr/views/utils/file_utils_tests.py @@ -59,6 +59,7 @@ def _mock_cell(value): class FileUtilsTest(AuthenticationTestCase): fixtures = ['users'] + # TODO test persist_temp_file def test_temp_file_upload(self): url = reverse(save_temp_file) self.check_require_login(url) From 268a0af1a5b2df79103c49c19916323c7a5fe15a Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 29 May 2024 11:50:25 -0400 Subject: [PATCH 184/736] test generic save temp file endpoint --- seqr/views/utils/file_utils_tests.py | 42 ++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/seqr/views/utils/file_utils_tests.py b/seqr/views/utils/file_utils_tests.py index 02304f3259..2ac3b8b8b6 100644 --- a/seqr/views/utils/file_utils_tests.py +++ b/seqr/views/utils/file_utils_tests.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- from io import StringIO +import gzip import mock import openpyxl as xl @@ -8,8 +9,8 @@ from django.core.files.uploadedfile import SimpleUploadedFile from django.urls.base import reverse -from seqr.views.utils.file_utils import save_temp_file, parse_file, load_uploaded_file -from seqr.views.utils.test_utils import AuthenticationTestCase +from seqr.views.utils.file_utils import save_temp_file, parse_file, load_uploaded_file, get_temp_file_path +from seqr.views.utils.test_utils import AuthenticationTestCase, AnvilAuthenticationTestCase TSV_DATA = b'Family ID Individual ID Notes\n\ "1" "NA19675" "An affected individual, additional metadata"\n\ @@ -40,6 +41,8 @@ ['0', 'NA19678', ''], ] +HASH_FILE_NAME = 'temp_upload_87f3489196cd3b81b98f3ffd3bc2653c.json.gz' + def _mock_cell(value): mock_cell = mock.MagicMock() @@ -56,10 +59,8 @@ def _mock_cell(value): MOCK_EXCEL_SHEET.iter_rows.return_value = [[_mock_cell(cell) for cell in row] for row in PARSED_DATA] -class FileUtilsTest(AuthenticationTestCase): - fixtures = ['users'] +class FileUtilsTest(object): - # TODO test persist_temp_file def test_temp_file_upload(self): url = reverse(save_temp_file) self.check_require_login(url) @@ -133,3 +134,34 @@ def test_parse_file(self, mock_load_xl): parse_file('test.{}'.format(ext), StringIO(data.decode('utf-8'))) self.assertEqual(str(cm.exception), f'Unexpected file type: test.{ext}') self.assertListEqual(parse_file('test.{}'.format(ext), StringIO(data.decode('utf-8')), allow_json=True), PARSED_DATA) + + +class LocalFileUtilsTest(AuthenticationTestCase, FileUtilsTest): + fixtures = ['users'] + + +class AnvilFileUtilsTest(AnvilAuthenticationTestCase, FileUtilsTest): + fixtures = ['users'] + + @mock.patch('seqr.utils.file_utils.subprocess.Popen') + def test_temp_file_upload(self, mock_subprocess): + mock_subprocess.return_value.wait.return_value = 0 + mock_subprocess.return_value.stdout.__iter__.side_effect = self._iter_gs_data + super().test_temp_file_upload() + gs_file = f'gs://seqr-scratch-temp/{HASH_FILE_NAME}' + mock_subprocess.assert_has_calls([ + mock.call(f'gsutil mv {self._temp_file_path()} {gs_file}', stdout=-1, stderr=-2, shell=True), # nosec + mock.call().wait(), + mock.call(f'gsutil cat {gs_file} | gunzip -c -q - ', stdout=-1, stderr=-2, shell=True), # nosec + mock.call().stdout.__iter__(), + ]) + + @staticmethod + def _temp_file_path(): + return get_temp_file_path(HASH_FILE_NAME, is_local=True) + + @classmethod + def _iter_gs_data(cls): + with gzip.open(cls._temp_file_path()) as f: + for line in f: + yield line \ No newline at end of file From fe8c47f6c4efbcc16ac0a3676dcb1f981f573f86 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 29 May 2024 11:56:41 -0400 Subject: [PATCH 185/736] style fix --- seqr/views/utils/file_utils_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seqr/views/utils/file_utils_tests.py b/seqr/views/utils/file_utils_tests.py index 2ac3b8b8b6..e31a335300 100644 --- a/seqr/views/utils/file_utils_tests.py +++ b/seqr/views/utils/file_utils_tests.py @@ -164,4 +164,4 @@ def _temp_file_path(): def _iter_gs_data(cls): with gzip.open(cls._temp_file_path()) as f: for line in f: - yield line \ No newline at end of file + yield line From dfe0b6ee883db2e62e541ba6d02096bbdb889040 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 29 May 2024 12:00:55 -0400 Subject: [PATCH 186/736] codacy fix --- seqr/views/utils/file_utils_tests.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/seqr/views/utils/file_utils_tests.py b/seqr/views/utils/file_utils_tests.py index e31a335300..fc828b2b43 100644 --- a/seqr/views/utils/file_utils_tests.py +++ b/seqr/views/utils/file_utils_tests.py @@ -61,7 +61,7 @@ def _mock_cell(value): class FileUtilsTest(object): - def test_temp_file_upload(self): + def test_temp_file_upload(self, *args, **kwargs): url = reverse(save_temp_file) self.check_require_login(url) @@ -144,10 +144,10 @@ class AnvilFileUtilsTest(AnvilAuthenticationTestCase, FileUtilsTest): fixtures = ['users'] @mock.patch('seqr.utils.file_utils.subprocess.Popen') - def test_temp_file_upload(self, mock_subprocess): + def test_temp_file_upload(self, mock_subprocess, *args, **kwargs): mock_subprocess.return_value.wait.return_value = 0 mock_subprocess.return_value.stdout.__iter__.side_effect = self._iter_gs_data - super().test_temp_file_upload() + super().test_temp_file_upload(*args, **kwargs) gs_file = f'gs://seqr-scratch-temp/{HASH_FILE_NAME}' mock_subprocess.assert_has_calls([ mock.call(f'gsutil mv {self._temp_file_path()} {gs_file}', stdout=-1, stderr=-2, shell=True), # nosec From d1cd420f2b0336c9bba8d2c43afc1df655aed9e1 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 29 May 2024 12:05:14 -0400 Subject: [PATCH 187/736] codacy fix --- seqr/views/utils/file_utils_tests.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/seqr/views/utils/file_utils_tests.py b/seqr/views/utils/file_utils_tests.py index fc828b2b43..bf2333d99a 100644 --- a/seqr/views/utils/file_utils_tests.py +++ b/seqr/views/utils/file_utils_tests.py @@ -144,10 +144,10 @@ class AnvilFileUtilsTest(AnvilAuthenticationTestCase, FileUtilsTest): fixtures = ['users'] @mock.patch('seqr.utils.file_utils.subprocess.Popen') - def test_temp_file_upload(self, mock_subprocess, *args, **kwargs): + def test_temp_file_upload(self, mock_subprocess, **kwargs): mock_subprocess.return_value.wait.return_value = 0 mock_subprocess.return_value.stdout.__iter__.side_effect = self._iter_gs_data - super().test_temp_file_upload(*args, **kwargs) + super().test_temp_file_upload() gs_file = f'gs://seqr-scratch-temp/{HASH_FILE_NAME}' mock_subprocess.assert_has_calls([ mock.call(f'gsutil mv {self._temp_file_path()} {gs_file}', stdout=-1, stderr=-2, shell=True), # nosec From 354005239077144ab7f812a5ffa7a198a34a52ce Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 29 May 2024 12:14:38 -0400 Subject: [PATCH 188/736] codacy fix --- seqr/views/utils/file_utils_tests.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/seqr/views/utils/file_utils_tests.py b/seqr/views/utils/file_utils_tests.py index bf2333d99a..544183691d 100644 --- a/seqr/views/utils/file_utils_tests.py +++ b/seqr/views/utils/file_utils_tests.py @@ -144,7 +144,8 @@ class AnvilFileUtilsTest(AnvilAuthenticationTestCase, FileUtilsTest): fixtures = ['users'] @mock.patch('seqr.utils.file_utils.subprocess.Popen') - def test_temp_file_upload(self, mock_subprocess, **kwargs): + def test_temp_file_upload(self, *args, **kwargs): + mock_subprocess = args[0] mock_subprocess.return_value.wait.return_value = 0 mock_subprocess.return_value.stdout.__iter__.side_effect = self._iter_gs_data super().test_temp_file_upload() From f3134779c4c18c49d583487b640a25e7f3380293 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 29 May 2024 15:22:58 -0400 Subject: [PATCH 189/736] fix update_gencc command --- .../management/commands/utils/download_utils.py | 10 +++++----- .../management/commands/utils/update_utils.py | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/reference_data/management/commands/utils/download_utils.py b/reference_data/management/commands/utils/download_utils.py index 7a4bfa3ad1..838e99d224 100644 --- a/reference_data/management/commands/utils/download_utils.py +++ b/reference_data/management/commands/utils/download_utils.py @@ -18,8 +18,7 @@ def download_file(url, to_dir=tempfile.gettempdir(), verbose=True): if not (url and url.startswith(("http://", "https://"))): raise ValueError("Invalid url: {}".format(url)) local_file_path = os.path.join(to_dir, os.path.basename(url)) - remote_file_size = _get_remote_file_size(url) - if os.path.isfile(local_file_path) and os.path.getsize(local_file_path) == remote_file_size: + if os.path.isfile(local_file_path) and os.path.getsize(local_file_path) == _get_remote_file_size(url): logger.info("Re-using {} previously downloaded from {}".format(local_file_path, url)) return local_file_path @@ -39,9 +38,10 @@ def download_file(url, to_dir=tempfile.gettempdir(), verbose=True): def _get_remote_file_size(url): - if url.startswith("http"): + try: response = requests.head(url, timeout=10) return int(response.headers.get('Content-Length', '0')) - else: - return 0 # file size not yet implemented for FTP and other protocols + except Exception: + # file size not yet implemented for FTP and other protocols, and HEAD not supported for all http requests + return 0 diff --git a/reference_data/management/commands/utils/update_utils.py b/reference_data/management/commands/utils/update_utils.py index 2609aa0a65..4ece8c604a 100644 --- a/reference_data/management/commands/utils/update_utils.py +++ b/reference_data/management/commands/utils/update_utils.py @@ -73,15 +73,15 @@ def update_records(reference_data_handler, file_path=None): Args: file_path (str): optional local file path. If not specified, or the path doesn't exist, the table will be downloaded. """ - logger.info('Updating {}'.format(reference_data_handler)) - - if not file_path or not os.path.isfile(file_path): - file_path = download_file(reference_data_handler.url) - model_cls = reference_data_handler.model_cls model_name = model_cls.__name__ model_objects = getattr(model_cls, 'objects') + logger.info(f'Updating {model_name}') + + if not file_path or not os.path.isfile(file_path): + file_path = download_file(reference_data_handler.url) + models = [] skip_counter = 0 logger.info('Parsing file') From 1fcd602ba532f6ae239e33d2695dc436cf0644d9 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 29 May 2024 15:24:37 -0400 Subject: [PATCH 190/736] better timeout --- reference_data/management/commands/utils/download_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reference_data/management/commands/utils/download_utils.py b/reference_data/management/commands/utils/download_utils.py index 838e99d224..0488506b24 100644 --- a/reference_data/management/commands/utils/download_utils.py +++ b/reference_data/management/commands/utils/download_utils.py @@ -39,7 +39,7 @@ def download_file(url, to_dir=tempfile.gettempdir(), verbose=True): def _get_remote_file_size(url): try: - response = requests.head(url, timeout=10) + response = requests.head(url, timeout=5) return int(response.headers.get('Content-Length', '0')) except Exception: # file size not yet implemented for FTP and other protocols, and HEAD not supported for all http requests From 9bce3e54e877a8c289c1bfad2db8f615bccc4cc1 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 29 May 2024 15:28:22 -0400 Subject: [PATCH 191/736] update tests --- reference_data/management/tests/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reference_data/management/tests/test_utils.py b/reference_data/management/tests/test_utils.py index c240ec1f02..e2908d65f6 100644 --- a/reference_data/management/tests/test_utils.py +++ b/reference_data/management/tests/test_utils.py @@ -28,7 +28,6 @@ def setUp(self): @responses.activate def _test_update_command(self, command_name, model_name, existing_records=1, created_records=1, skipped_records=1): # test without a file_path parameter - responses.add(responses.HEAD, self.URL, headers={"Content-Length": "1024"}) body = ''.join(self.DATA) if self.URL.endswith('gz'): body = gzip.compress(body.encode()) @@ -51,6 +50,7 @@ def _test_update_command(self, command_name, model_name, existing_records=1, cre # test with a file_path parameter self.mock_logger.reset_mock() + responses.add(responses.HEAD, self.URL, headers={"Content-Length": "1024"}) responses.remove(responses.GET, self.URL) call_command(command_name, self.tmp_file) log_calls[1] = mock.call('Deleting {} existing {} records'.format(created_records, model_name)) From 250949f7d6746d17d288f9a04a72b75c9c0cc077 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 29 May 2024 16:01:05 -0400 Subject: [PATCH 192/736] fix tests --- reference_data/management/tests/update_gencode_tests.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/reference_data/management/tests/update_gencode_tests.py b/reference_data/management/tests/update_gencode_tests.py index 42330cf21d..c75d1c85a7 100644 --- a/reference_data/management/tests/update_gencode_tests.py +++ b/reference_data/management/tests/update_gencode_tests.py @@ -150,7 +150,7 @@ def test_update_gencode_command_url_generation(self, mock_logger): responses.add(responses.GET, url_23_lift, body=self.gzipped_gtf_data, stream=True) call_command('update_gencode', '--gencode-release=23') self.assertEqual(responses.calls[0].request.url, url_23_lift) - self.assertEqual(responses.calls[2].request.url, url_23) + self.assertEqual(responses.calls[1].request.url, url_23) def _has_expected_new_genes(self, expected_release=None): gene_info = GeneInfo.objects.get(gene_id='ENSG00000223972') @@ -261,7 +261,7 @@ def test_update_gencode_command(self, mock_logger, mock_update_transcripts_logge ]) self.assertEqual(responses.calls[0].request.url, url_lift) - self.assertEqual(responses.calls[2].request.url, url) + self.assertEqual(responses.calls[1].request.url, url) @responses.activate @mock.patch('reference_data.management.commands.utils.update_utils.logger') From 1be53e123f5dcd1374098a47b9fc2ba462b1903b Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Wed, 29 May 2024 19:42:33 -0400 Subject: [PATCH 193/736] caid in search --- .../SNV_INDEL/annotations.ht/.README.txt.crc | Bin 12 -> 12 bytes .../annotations.ht/.metadata.json.gz.crc | Bin 16 -> 16 bytes .../SNV_INDEL/annotations.ht/README.txt | 2 +- .../.index.crc | Bin 12 -> 0 bytes .../.metadata.json.gz.crc | Bin 12 -> 0 bytes .../index | Bin 67 -> 0 bytes .../metadata.json.gz | Bin 185 -> 0 bytes .../.index.crc | Bin 0 -> 12 bytes .../.metadata.json.gz.crc | Bin 0 -> 12 bytes .../index | Bin 0 -> 65 bytes .../metadata.json.gz | Bin 0 -> 185 bytes .../SNV_INDEL/annotations.ht/metadata.json.gz | Bin 882 -> 888 bytes .../annotations.ht/rows/.metadata.json.gz.crc | Bin 20 -> 20 bytes .../annotations.ht/rows/metadata.json.gz | Bin 1236 -> 1234 bytes ...0-51a119fe-d7b8-4308-a65f-b03043bbab4c.crc | Bin 12 -> 0 bytes ...0-b419794d-b8da-42bb-8084-9ede50917538.crc | Bin 0 -> 12 bytes ...rt-0-b419794d-b8da-42bb-8084-9ede50917538} | Bin 335 -> 344 bytes .../SNV_INDEL/annotations.ht/.README.txt.crc | Bin 12 -> 12 bytes .../annotations.ht/.metadata.json.gz.crc | Bin 16 -> 16 bytes .../SNV_INDEL/annotations.ht/README.txt | 2 +- .../.index.crc | Bin 0 -> 12 bytes .../.metadata.json.gz.crc | Bin 0 -> 12 bytes .../index | Bin 0 -> 71 bytes .../metadata.json.gz | Bin 0 -> 185 bytes .../.index.crc | Bin 12 -> 0 bytes .../.metadata.json.gz.crc | Bin 12 -> 0 bytes .../index | Bin 139 -> 0 bytes .../metadata.json.gz | Bin 185 -> 0 bytes .../.index.crc | Bin 0 -> 12 bytes .../.metadata.json.gz.crc | Bin 0 -> 12 bytes .../index | Bin 0 -> 69 bytes .../metadata.json.gz | Bin 0 -> 185 bytes .../.index.crc | Bin 0 -> 12 bytes .../.metadata.json.gz.crc | Bin 0 -> 12 bytes .../index | Bin 0 -> 69 bytes .../metadata.json.gz | Bin 0 -> 185 bytes .../.index.crc | Bin 0 -> 12 bytes .../.metadata.json.gz.crc | Bin 0 -> 12 bytes .../index | Bin 0 -> 71 bytes .../metadata.json.gz | Bin 0 -> 185 bytes .../.index.crc | Bin 0 -> 12 bytes .../.metadata.json.gz.crc | Bin 0 -> 12 bytes .../index | Bin 0 -> 71 bytes .../metadata.json.gz | Bin 0 -> 185 bytes .../.index.crc | Bin 0 -> 12 bytes .../.metadata.json.gz.crc | Bin 0 -> 12 bytes .../index | Bin 0 -> 71 bytes .../metadata.json.gz | Bin 0 -> 185 bytes .../SNV_INDEL/annotations.ht/metadata.json.gz | Bin 1000 -> 1009 bytes .../annotations.ht/rows/.metadata.json.gz.crc | Bin 20 -> 24 bytes .../annotations.ht/rows/metadata.json.gz | Bin 1369 -> 1607 bytes ...0-683e0f92-5521-41a4-8803-4a9ffbb8a445.crc | Bin 0 -> 12 bytes ...0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b.crc | Bin 20 -> 0 bytes ...1-9da96751-8211-4483-90fd-32199fcd4721.crc | Bin 0 -> 12 bytes ...2-87e2d4cc-0868-4352-b987-3bb7b24917df.crc | Bin 0 -> 12 bytes ...3-edcd13fe-0870-498a-b5e3-9bb75da458e5.crc | Bin 0 -> 16 bytes ...4-7eae1a6f-2232-49ff-9d00-da1b8f1aa042.crc | Bin 0 -> 12 bytes ...5-de9f4b90-41d1-4643-a915-91613e76fb2e.crc | Bin 0 -> 12 bytes ...art-0-683e0f92-5521-41a4-8803-4a9ffbb8a445 | Bin 0 -> 143 bytes ...art-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b | Bin 1502 -> 0 bytes ...art-1-9da96751-8211-4483-90fd-32199fcd4721 | Bin 0 -> 162 bytes ...art-2-87e2d4cc-0868-4352-b987-3bb7b24917df | Bin 0 -> 224 bytes ...art-3-edcd13fe-0870-498a-b5e3-9bb75da458e5 | Bin 0 -> 787 bytes ...art-4-7eae1a6f-2232-49ff-9d00-da1b8f1aa042 | Bin 0 -> 316 bytes ...art-5-de9f4b90-41d1-4643-a915-91613e76fb2e | Bin 0 -> 323 bytes hail_search/queries/ont_snv_indel.py | 5 +++++ hail_search/queries/snv_indel.py | 9 ++++++--- hail_search/test_search.py | 1 + hail_search/test_utils.py | 4 ++++ 69 files changed, 18 insertions(+), 5 deletions(-) delete mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/index/part-0-51a119fe-d7b8-4308-a65f-b03043bbab4c.idx/.index.crc delete mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/index/part-0-51a119fe-d7b8-4308-a65f-b03043bbab4c.idx/.metadata.json.gz.crc delete mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/index/part-0-51a119fe-d7b8-4308-a65f-b03043bbab4c.idx/index delete mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/index/part-0-51a119fe-d7b8-4308-a65f-b03043bbab4c.idx/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/index/part-0-b419794d-b8da-42bb-8084-9ede50917538.idx/.index.crc create mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/index/part-0-b419794d-b8da-42bb-8084-9ede50917538.idx/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/index/part-0-b419794d-b8da-42bb-8084-9ede50917538.idx/index create mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/index/part-0-b419794d-b8da-42bb-8084-9ede50917538.idx/metadata.json.gz delete mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/rows/parts/.part-0-51a119fe-d7b8-4308-a65f-b03043bbab4c.crc create mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/rows/parts/.part-0-b419794d-b8da-42bb-8084-9ede50917538.crc rename hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/rows/parts/{part-0-51a119fe-d7b8-4308-a65f-b03043bbab4c => part-0-b419794d-b8da-42bb-8084-9ede50917538} (63%) create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-683e0f92-5521-41a4-8803-4a9ffbb8a445.idx/.index.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-683e0f92-5521-41a4-8803-4a9ffbb8a445.idx/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-683e0f92-5521-41a4-8803-4a9ffbb8a445.idx/index create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-683e0f92-5521-41a4-8803-4a9ffbb8a445.idx/metadata.json.gz delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b.idx/.index.crc delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b.idx/.metadata.json.gz.crc delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b.idx/index delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b.idx/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-1-9da96751-8211-4483-90fd-32199fcd4721.idx/.index.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-1-9da96751-8211-4483-90fd-32199fcd4721.idx/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-1-9da96751-8211-4483-90fd-32199fcd4721.idx/index create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-1-9da96751-8211-4483-90fd-32199fcd4721.idx/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-2-87e2d4cc-0868-4352-b987-3bb7b24917df.idx/.index.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-2-87e2d4cc-0868-4352-b987-3bb7b24917df.idx/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-2-87e2d4cc-0868-4352-b987-3bb7b24917df.idx/index create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-2-87e2d4cc-0868-4352-b987-3bb7b24917df.idx/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-3-edcd13fe-0870-498a-b5e3-9bb75da458e5.idx/.index.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-3-edcd13fe-0870-498a-b5e3-9bb75da458e5.idx/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-3-edcd13fe-0870-498a-b5e3-9bb75da458e5.idx/index create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-3-edcd13fe-0870-498a-b5e3-9bb75da458e5.idx/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-4-7eae1a6f-2232-49ff-9d00-da1b8f1aa042.idx/.index.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-4-7eae1a6f-2232-49ff-9d00-da1b8f1aa042.idx/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-4-7eae1a6f-2232-49ff-9d00-da1b8f1aa042.idx/index create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-4-7eae1a6f-2232-49ff-9d00-da1b8f1aa042.idx/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-5-de9f4b90-41d1-4643-a915-91613e76fb2e.idx/.index.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-5-de9f4b90-41d1-4643-a915-91613e76fb2e.idx/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-5-de9f4b90-41d1-4643-a915-91613e76fb2e.idx/index create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-5-de9f4b90-41d1-4643-a915-91613e76fb2e.idx/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/.part-0-683e0f92-5521-41a4-8803-4a9ffbb8a445.crc delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/.part-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/.part-1-9da96751-8211-4483-90fd-32199fcd4721.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/.part-2-87e2d4cc-0868-4352-b987-3bb7b24917df.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/.part-3-edcd13fe-0870-498a-b5e3-9bb75da458e5.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/.part-4-7eae1a6f-2232-49ff-9d00-da1b8f1aa042.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/.part-5-de9f4b90-41d1-4643-a915-91613e76fb2e.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/part-0-683e0f92-5521-41a4-8803-4a9ffbb8a445 delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/part-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/part-1-9da96751-8211-4483-90fd-32199fcd4721 create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/part-2-87e2d4cc-0868-4352-b987-3bb7b24917df create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/part-3-edcd13fe-0870-498a-b5e3-9bb75da458e5 create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/part-4-7eae1a6f-2232-49ff-9d00-da1b8f1aa042 create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/part-5-de9f4b90-41d1-4643-a915-91613e76fb2e diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/.README.txt.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/.README.txt.crc index 3ddcb80acd7282742e72925b25269a2aebd62f48..d3264ff96dc2514aff98378519de12a69733999a 100644 GIT binary patch literal 12 TcmYc;N@ieSU}9MILVh;@6UhUk literal 12 TcmYc;N@ieSU}6w4^U(tU4>$q; diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/.metadata.json.gz.crc index 57404656934245429fa5791bcc4a23f9216c3da9..630f941d514837febc72838f273067085daf042b 100644 GIT binary patch literal 16 XcmYc;N@ieSU}A9e30*Sba_VdVA=m{Z literal 16 XcmYc;N@ieSU}7-db1a=LqHr+)B6I}F diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/README.txt b/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/README.txt index a22aabc57a..1bf854cfa4 100644 --- a/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/README.txt +++ b/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/README.txt @@ -1,3 +1,3 @@ This folder comprises a Hail (www.hail.is) native Table or MatrixTable. Written with version 0.2.128-eead8100a1c1 - Created at 2024/02/26 15:45:13 \ No newline at end of file + Created at 2024/05/29 18:43:47 \ No newline at end of file diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/index/part-0-51a119fe-d7b8-4308-a65f-b03043bbab4c.idx/.index.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/index/part-0-51a119fe-d7b8-4308-a65f-b03043bbab4c.idx/.index.crc deleted file mode 100644 index 15ea1607963344e92843d75966a3ad18e99e04b3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}E6=_eT-{62k*r diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/index/part-0-51a119fe-d7b8-4308-a65f-b03043bbab4c.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/index/part-0-51a119fe-d7b8-4308-a65f-b03043bbab4c.idx/.metadata.json.gz.crc deleted file mode 100644 index 7b9ae4ad7c263def2614a8404535d91bce0b3b5b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}Bi|S|t6+dM z5-t?FntJQ&F3Q7rC%f}T=ZmRO)}qBI*CD8M=O7aw0|-#gc~ZnL{~@F2_Lw5bnE>OS n_&F)k9QbK=gZFf7nut6j&zVF_tvW)k^uzE490YGC&j0`bbSY6H diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/index/part-0-b419794d-b8da-42bb-8084-9ede50917538.idx/.index.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/index/part-0-b419794d-b8da-42bb-8084-9ede50917538.idx/.index.crc new file mode 100644 index 0000000000000000000000000000000000000000..78fad9791a2cb58bbe1666966c0af14f039b3b79 GIT binary patch literal 12 TcmYc;N@ieSU}D(g@^vZz6T1V& literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/index/part-0-b419794d-b8da-42bb-8084-9ede50917538.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/index/part-0-b419794d-b8da-42bb-8084-9ede50917538.idx/.metadata.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..ca274b3389a33a5773a7d1ce93a6b7b617301ef4 GIT binary patch literal 12 TcmYc;N@ieSU}BiCA|nX^66ymz literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/index/part-0-b419794d-b8da-42bb-8084-9ede50917538.idx/index b/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/index/part-0-b419794d-b8da-42bb-8084-9ede50917538.idx/index new file mode 100644 index 0000000000000000000000000000000000000000..3d8c9a969ba727feb8ae1c78543e6b46e2aed60b GIT binary patch literal 65 ycmb1PU|`?}VvVi(e--#Efh-0_M)Ns`R=6-RIx@O5$N~ifVDbWuAXy+rQUCyR*$d+U literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/index/part-0-b419794d-b8da-42bb-8084-9ede50917538.idx/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/index/part-0-b419794d-b8da-42bb-8084-9ede50917538.idx/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..14e2c0d67c660e738ce10971071199f6c6915139 GIT binary patch literal 185 zcmV;q07m~GiwFP!0000009B5`3c@fDME_+^3OQ6;s^%tw9uyQ6FXADs+a?$iNw#1p z{dc$CybKF7`(}D*jKv#6+dM z5-t?FntJQ&F3Q8Glihiv^TkvsYtdqq>kw4BbC8LT0R$-LJSpPJf5_;$J*Eh9CcwBS neol%s2YxrZ!FxJ3O+=oN^A1H!tvW(}>4)J9{jxpC&j0`bO+iqn literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/metadata.json.gz index 3ff949d32da0d607ac6ab84d59fcdcb80af2a28e..73c4e4017e27dfd70d9f982136989b8c00eeb4eb 100644 GIT binary patch literal 888 zcmV-;1Bd({iwFP!000000M%CAZ`(Ey|6la90a_6|LmTMLb>b#iJ`5)a1_TDdP-mGf zMQS9S#t8Jk??s#LGdcM% z|1g`M{Q*Gpv)RcBov~SxCK`62K{7BzgCG^PdsObZsAB0ey0^y)Q1al2w?e3ADzba4 z56r$fBjX81^W2@eEK(-8e4*Nt`Li^;%2U7#kb!um%%2OXN}3aRRVA1a6HQ`nSp};~ z^O9O1lzWb;1ZOeBs;Y#L+`cINxlTKr{^wPIJd~3X+_{d6@K>fV4bHedW(%$9@u~H) zO{p=UEut~x*HPup#m&J|r}aRM1x+fANsSBwIBgCdo|c*bG+;*%wfqjGmKeg+@&-UT zR>xp?t?#qqK#bSWTs2npZaV9+G|L%1z4%)<}~eVC6_G z)Q;dBqhu7cP7hU%#iDz#PvLTfFkt((QD1>sQXF{b22A%G9J42(*ukWz4+6|K)#|&? zvyo)i?*F$ho8|uW)9+qylpGF6=(wL?5DZ-eT5_ofWjqhf&Ztz_cDOyRq&=M@EO|F~ z&47Re!lUkfF)Sv++(qGg^$vK)@y>^s6PHMvVWY53xgidDNeldCpOq3oWpo)+izqUl zzO|9+ZHm*ysf^Qkt(0-%kAJ4|Fs?5*7w6eZDC!Kyli6OtRaC(a+t{MNbu_bljPzij zV*n1)h7=1W@_S3Q>7?%%g#|Dvel*e-jv&?Ejee*np7OxE_VSzgj(v zSZgJY)gI)WTs|%~BjFFeJ5j!cB?jeYn(^&4^}Ww{xcVu>MdqFZ{R;p`A_}rxT%G$h zTjL9fsj^ZMq%}$Qmb8zf<|t|D9!;Ca^QBk0uYfi0uJG9M`ZaRZ_g}Ybl+mc7IIPXM zHq#zESbNd3k&K;r{ioU1`O&I!a}NU#f!Wy%wQ_E7H8CrWf58#*BtrzD#&4FlZ{CZI OY5gxPQs!Bx3IG7@?z!s# literal 882 zcmV-&1C9J2iwFP!000000M%AYZ`(K!{x3Rh0c{aGi#E{Bb>k-3JPaoY3IqnhP-B@b zMXDs7#t8J^JCsCKN%rC6>HI8B_LPgBnVh_z zzn{&|{sf@;+3e(m&e$wT6Ae4iAQ>2aYD7kmUYa!GV71^ED z2WDTLk?{nhdG5|!7AX^4K2z<<{7IT!!@<);`(5z)4Hd|f+m&5q(%k-oHhp!PfJYz8n7dXT7CyoOAKLZc>|yv zt79;{R`_vn-8{~(hq86y!EE3Yh{M>xC~YApR#R!1=9P`Thh(3)a?>(`HPR#qSUD04 zwIevkC>aH<(?gYGvFIM`W4K%)4A{PH)Hh(36bBx<0n_~s$Lvukb}%XGg8;KlwfgDv zY$Vy0`~Tz1X1PE8_|EH%lEdK$9rq6y1Va~rmRu@A8P7wrGb$Cf9d3_HX;0?}OWw_0 zGaw*=@Tj|=4U35|cTxCWy#wBHyz?RE#3j;Z*eGmMZiqu((gJ_kC#3{X8C}NIB8rTs z?`@=do8okFD&ursDrKDb!(VAUjO&Zd`R8mU6m^E<$!ss+GOA#QZEVrsI+|HNMtU&N zF#rc?LyCnG`JJWObkaAB!UC8SKN{(CN04goMnBXOPx)d+*0<~H#aAy*7pwlbB#Zo8 z7ZZsKuo0okSWoj3BELR2a=Rg4u5Ldqu7cePg^`;+{#*O$zl|B~ zzU%d}hkCjO4O1F=g~?9LL0C!#TIqC8hE^D_$WYSn>g~%6H#z=hY|bCfuSVhTE?4&> z)>?^UwFfyT7Y~cgNcfxYPLyw9i9xxUW_&YEeeW|Ku71jJk-6tU{{p~~=!4WXzKWPC zDeHkFK^zB4 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/rows/metadata.json.gz index 15fbeb2967ba6eeb41f930ce93de4067d1eb0236..2f194fb30871f084c25a823bed8a2dba99b8c42a 100644 GIT binary patch delta 1228 zcmV;-1T*{83DOCW7Juo|Jt&1jzqQapC0?o)q0=E0xv|@NN$i~MqIOaK`;GJB*hzq6 zb$cf*s!%+h@w|BQILWKWP)xjmH)WXY2%bM&ED{oSJ>LtS$=(}0dJzw|62>TsNJOOf z1XCf$B-D!D;I&-gexDG0ZYDsBXc5f(HXb2K7D{YG^_nd=aDPN0XNs=1j4~wle!?YH zlrxY_nL6sGe&HJgLm|-qu~dEXg`{{;8cHq{!GRKp$xzTlL7+^MzqW*hU|ks7u2D=G z4^T+4G^>P&aYjQF4RZHBWDCof`88oAu)J>8FEVsSi5Xd;SQbml6_h3ul`c%2fg&Of zvVe+sGnFTw%6}l@tAL?3{9rB4BZ8Q(Wh)wiRhy9&09-wrl0Nw_NKnL_xOPzaETWSh zQ~~RWs#LS9`S@ZmoFU7gwoCw$Y|1X#G^UYEJSAPTi{Eh`hAV>lGeDD$LjbWYgg?R;e zhw;U1iTGle6W$sm<-p~8A++%R4&CzzTiN~w+8v8J*jun(gp*O-g1W==2F~jw{}AL( zr5hl&0p7&-8}x32x1D^6?iOmh&~EVB!gY<-4y+wkyQp?Z-3sc8kromRAuBYz&Bq!Z z9e+Bu!}&k)sex>(aMEmf=(*-zrVBlZLr1xNV{ByRiL$wRO}s zBED^gz9}Ov0NY?|$*dBsHd*tzjH}D-YDGltvHW^GI_#c~swv+KH#EYQ$_NPj42w8Q zOD$Z`Z=i5;e0+4$H2YI3oSA;o4k9Kfu76vm`VW$z`bDIc!?cuT?4}pD+QF@aE&ZYgD>0Gx&k8GpWy4D@l}z1Tn_*K68Df z=~LTLTBTwo;s9DF@Y!?MMAe9?7R^u6fN(!E6x~AKd|yLl{sdv|EtGYvKGsQ4+kfn4 zBj%DH-gLPeK>%>LJ*ZFip{an=d(F)2dDpZ4e5bah@k7GdUPV@?*xod6yXR9 zX9ZI}3$U*gozZK#!u>wM!MUCQEv!T^^JzLk0xu=s`SLYeZhvHnd`2bNC>f!M@7;t6 zA_=1)xuUXvl)8m)5R5rT`^Q3a<#IuSQE4X_mpJewM^yNnBogAJ0{^?i)CXg2TX{w? zq0B=*34~t7N0d?GqiB?ScOh9=M$d0B#h&4HyLlGAHA?iz2*t2i6DFZDnW%K358u0XT%AjR$MPo_$;E~ zF;pQ?6IH3^my79_(R3Culx>M05ejQqI{7wTnQ4U)@g<>~Ig(ojr6WFUZM7LzA0&p( z-m1=yN+a|Qi-M&@T$Boj43Q9+t4?YuP3mF-CRY4YC4cdfEysb8DSkn|r8b^<^Sk-Y z_;0P`j%UX8xi=2JmNCHS5=(Q$VrwoYdU$IXrtXHa7zG}FVX-Qh6@cOGwpy*cGF_Ch zdUp%&;`(kfzOHO%Fl(-51f$(UMgAZEabj2fW`1sLO2SEuB=!)ol!VVXHq=A{{nyY6 za3qmu>wlK2=B=nZDRm`FVr<_LRle=@Fse{>Kq z8Np3wcumzo@LU`O))8>6{{L~YIN$b9{?vre?|)HH1ZEU?&j;_(T^rW9UYJ){x2Rss zmQXK-In#|nQVwjQ7eWK*Z}2>iu$ATqU~XB|QQiXbBAkrl2F5LdHvnE|_q*t}D&0V@ ziR>n@5AeDT*Jko1tXnv3W4b|T1JE@-TX?qUY{S`Nb1RxFDjE>bOsrsVn20qXT0CsW z@qaJzsSs??w++4)cU!RALfp53YxA~(lqORn-zrVLlbW(EfNkOHn|OVPu64LJBED^g zzAYmzklLVW$*dBl220nwFvPs}?0r3*^o~v^)s*Y_TM`9^$^@wU1dCcqYb9KgZ=f(7 z4En>S**`)Cndt}ZAOnozx@D^VAPJ)0Hh*e4T$Qry<%8o$aR=m|KmV1fD=9!S%w93R z>%GG~sY1eqV&qHy90_QI*PH5v!a5RRX7uykSyGpUp}i7xijjdK42U(h>iP!VKQIKpT?=gZkAoD4d8kDj-& fn{oSS9zIrnl_!B&f|vUZBQFC4fHW&G08Ie^c-#~( diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/.README.txt.crc index 6408cf00b610aae2242e225f06b01296377afec9..d2f7aab7c15a80dede87ab55910d7f3fb68b1e78 100644 GIT binary patch literal 12 TcmYc;N@ieSU}8A)<&+fw6pltq5OD(J literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-683e0f92-5521-41a4-8803-4a9ffbb8a445.idx/index b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-683e0f92-5521-41a4-8803-4a9ffbb8a445.idx/index new file mode 100644 index 0000000000000000000000000000000000000000..e007ed4a2918efcea1273e05f44649ddcf03cd9e GIT binary patch literal 71 zcmb1VU|r#+866oEfMUWx3|1mM6R3cZfdNeo E0KbO}>i_@% literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-683e0f92-5521-41a4-8803-4a9ffbb8a445.idx/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-683e0f92-5521-41a4-8803-4a9ffbb8a445.idx/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..172607a22a25fc75d8e6556b90df6cfebbf5d65d GIT binary patch literal 185 zcmV;q07m~GiwFP!0000009B5`3c@fDME_+^3OQ6;OU+FLJt!zDUc^JJ+a?$iNw#1p z{dc$CyetDV^LBb@jKwR4XuJbimRV5=cm(BYZCtxiHTeLt$d;u5ji%KEaxUP7>5|?E z63!I5ntJQ&F3SCQC)?9n=hajwYtdqqs}NMWagd4k0R$-LJSpOr|B%shJ4_MeOn`Au n{2Uc&4*WE`!FxJ1O+=oNFPKD3eRPCe>4)J9c<2+M&j0`bP##cr literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b.idx/.index.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b.idx/.index.crc deleted file mode 100644 index 0577f8ecbc8a1d2dfcdee9db2e0b7faf671e97e1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}Ct@dr$xX6d40? diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b.idx/.metadata.json.gz.crc deleted file mode 100644 index da6df53be05e706231fbb7a59231bccd9f3c1f66..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}8vBR5t_w5B35X diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b.idx/index b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b.idx/index deleted file mode 100644 index 7203c8999a09bee25bc428e4cc0bd5e3e8258d0e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 139 zcmYddU|>iEVvVi(e-%>KGBKF2Ffgz&uq0;`87}f?Vs>~Px2xKHYdcw-Y z7{cg$h4JvQ-9qjsnHxV|&}L$EXLP*9-?sfCkQu`0{#S^NA)&&cIZeTseUjxl$utQ? Upba80TSOKzFfcL#d1#gZ0OlGaY5)KL diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b.idx/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b.idx/metadata.json.gz deleted file mode 100644 index 5b44f36c6f25e26361bcff6cfbdc5dbf74c1c5ca..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 185 zcmb2|=3sz;(7uCQhYWby?yIV|$}Q_NtQL0U=j!Sb{}_-cB zFW9Z}{qdX}|Ln~xmOELt^G%L9y5duBx6dAa1&2y`55fI^+Iwc+PxQ-CI`B)quCI$n lyv};LA&ZrXuhX>|qB@H@Hum>T$p4l2gDIuPaoPo-ivfGmO=<*h B44VJ| literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-1-9da96751-8211-4483-90fd-32199fcd4721.idx/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-1-9da96751-8211-4483-90fd-32199fcd4721.idx/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..051d3e03d56a7a32825d7404a65aab17a00602f9 GIT binary patch literal 185 zcmV;q07m~GiwFP!0000009B5`3c@fDME_+^3OQ6;O3h6KJt!zDUc^IMw@ol4l5D|H z`tNSNd07T#=Isp78jCj!(Rc^4EVH5#PzU82ZCtli4fz1F$X2BQji%ECaw*`2>6+dM z5-t?_h6d}KKFY&%XS?%8=ha*&Ytdqq>kw4BbC8LT5d6+dM z5-t?_h6d}KKFY&%XS?%8=ha*&Ytdqq>kw4BbC8LT5dpltq5OD(J literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-3-edcd13fe-0870-498a-b5e3-9bb75da458e5.idx/index b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-3-edcd13fe-0870-498a-b5e3-9bb75da458e5.idx/index new file mode 100644 index 0000000000000000000000000000000000000000..d1d20716413f77f28276eaa93ceabc1dddfc3aab GIT binary patch literal 71 zcmb1VU|5|?E z63!I5ntJQ&F3SCQC)?9n=hajwYtdqqs}NMWagd4k0R$-LJSpOr|B%shJ4_MeOn`Au n{2Uc&4*WE`!FxJ1O+=oNFPKD3eRPCe>4)J9c<2+M&j0`bP##cr literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-4-7eae1a6f-2232-49ff-9d00-da1b8f1aa042.idx/.index.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-4-7eae1a6f-2232-49ff-9d00-da1b8f1aa042.idx/.index.crc new file mode 100644 index 0000000000000000000000000000000000000000..008c1e75e64fb2cb0aa65c43ff13d7a5f6e077c3 GIT binary patch literal 12 TcmYc;N@ieSU}BhJSa28s5(Wb^ literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-4-7eae1a6f-2232-49ff-9d00-da1b8f1aa042.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-4-7eae1a6f-2232-49ff-9d00-da1b8f1aa042.idx/.metadata.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..1ebb40475832fe624a5f861dbde3c00c6394b5eb GIT binary patch literal 12 TcmYc;N@ieSU}E6*>pltq5OD(J literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-4-7eae1a6f-2232-49ff-9d00-da1b8f1aa042.idx/index b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-4-7eae1a6f-2232-49ff-9d00-da1b8f1aa042.idx/index new file mode 100644 index 0000000000000000000000000000000000000000..7be7e16fec327e2b49953bfff728b294916fc7f6 GIT binary patch literal 71 zcmb1VU|5|?E z63!I5ntJQ&F3SCQC)?9n=hajwYtdqqs}NMWagd4k0R$-LJSpOr|B%shJ4_MeOn`Au n{2Uc&4*WE`!FxJ1O+=oNFPKD3eRPCe>4)J9c<2+M&j0`bP##cr literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-5-de9f4b90-41d1-4643-a915-91613e76fb2e.idx/.index.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-5-de9f4b90-41d1-4643-a915-91613e76fb2e.idx/.index.crc new file mode 100644 index 0000000000000000000000000000000000000000..56d40bbb43a76c491805f0108c8779ee85f0aa07 GIT binary patch literal 12 TcmYc;N@ieSU}6X`F4O}65NQHk literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-5-de9f4b90-41d1-4643-a915-91613e76fb2e.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-5-de9f4b90-41d1-4643-a915-91613e76fb2e.idx/.metadata.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..1ebb40475832fe624a5f861dbde3c00c6394b5eb GIT binary patch literal 12 TcmYc;N@ieSU}E6*>pltq5OD(J literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-5-de9f4b90-41d1-4643-a915-91613e76fb2e.idx/index b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/index/part-5-de9f4b90-41d1-4643-a915-91613e76fb2e.idx/index new file mode 100644 index 0000000000000000000000000000000000000000..c3b74582d5e2baed8dbe5b4b9b04f0a22aa9f754 GIT binary patch literal 71 zcmb1VU|5|?E z63!I5ntJQ&F3SCQC)?9n=hajwYtdqqs}NMWagd4k0R$-LJSpOr|B%shJ4_MeOn`Au n{2Uc&4*WE`!FxJ1O+=oNFPKD3eRPCe>4)J9c<2+M&j0`bP##cr literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/metadata.json.gz index 53b8a5905738ebf9411078a6a014759caf343fe3..10d50587b5ef702cdacb46903f3dbaadf75d33c3 100644 GIT binary patch literal 1009 zcmVCuI|qX`p=TN;}XBR4qbRLdY`CY3ePp zb8VN=i~8U1aS}U@Gd6TTPfU>We*7H!vG+3C@B+vaD9xo9rJqhu#%IZBOL@^LM@JvV zACAXo?*V9bc6@Y1kJ<4k87bI+0>MBN1qG?7*`jl&MHfRKF}>MWfR#3L~o$d~@5p%%bc< z3c_TD8vG-)QIre6cFh!Y4Qr_kWCUFV8hB;u2>54LqwTOs+PVEf=cnj^*p&ll?DruA z$13a=lej$2xY?%@rRe^?$?}j;tw9+?xvxJ2h}slaJ43D4E!74TsT3|Y3h7@P#21o1B;b0Ieb=?^VY(ZeD64*T@JkZh+Dq$4OmKO{>2aP?|Sy0E|h z`m&mCPtM*szo>}HPfp3e{q$XehctWR-t`%6XO0NL^G)%ofneYl&|l*K~u;F0Xr-&RyV?5oW28g6TO!2`wJ`~HZLcrqx1m4Ibt#K|wFNOI7k86YWZdBD z%aUAuUAUq5>=B>NtU#)*ADMgg}}oS*FH)65#)8fk((9yvbgy?S@^g)8iO=l{nsuB=Zp~kubLS5i<-g3bG4jy zP!I2*_&CNFQU-;g(FPh`Qhxu$_mf}J&oJ(m=_lF0&nQE-wQr~HxrE0mD3%+daWm^- zx$9xwtt)p(rjyHa_X0WKvzW=Uk^+R$qx5Ceyol>hK}-8mUfrEfoy)ocRzJ4=3)}f0 zP%D4DJ+5}fpo$_<)KpP3O_*TmOw-yDCv*Py$BF-Wuo;!CEc-w&mshsYeXopbBuHk8 f{a4sSHcAmvuvf2(um8JHhaUa|KZV9hrwjl9$&K@c literal 1000 zcmVVEZ z&b3`eFY154$4Q(x&e+iXJPAS0`|)$^$L1y4@*Kz$D9xpalJkqR(PbR%D9<}(bn;>J z;dFHQ9)PBorza=$l$}O#q+kmQ1OrVJ7A5Oi;{|q4D0doN=A8KoBg+EX=5|?;2HAxa zgvk^&_(y1?C>QP8HB-_DxL__ z{eG-A!G_CWaO`;!#J>p4x!e+@KePlzkH`2s9FoUevb~OxjvxoUj+Ol3>Q$CxZh!yv zWjWcMUA}XEK@pXmU66tM#k&{}Y5K;!+h(+-X~Bggl<~AZbc`a}9CrR{V zUmYb7K)83xo;34?_WXP0JNE`zt8vNupc30i-C^ahj=3flT0t}XNj^!L0~H?H!w9c3 zetoYiAP^i%O$lbU*C6wrjQRjc3503r=Pnt0aeta zzxE*7;<+-|)50SW9*(ldFh0V2T!F(NlXIy4Lv;0iQ*eX8hkFzIOu4D< zieJ*$a{4fx_mPYrrmOppfez_zffP5?h9uavy}F*=`~EoBh0G7-4#bFDJ&c!uaf_=j z3Uc#x?uOp82YfoU0x5TvciY+d6g!@g#ogj&{CC^!c-DPR$TXC%7*CLeZz;Hw zZo8mY2;47m+a_r~yu7YAa9>h&oJ(m*-o;5pHYTvYu`@Ya|w@?S1dC`<7U>w za<{{}J68_x6MPCYS(H+MFgi+JqWTqFeF|FG*YWb<^TfHVN?_FkyM0wV|08PUkGIEF z&KOiuB!`L~Dt7S`ESzalS;Az_|M7fn^C($bo`GJ5R@ub#y;81`3Yp0dU*Q00lpuIu WFIg8~{bNvt9{&TyOstEM3;+OroAz)3 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/.metadata.json.gz.crc index 66c0bcabb7a829b28f367b0afe6724bb03153a4b..a2527f863d102e26317dc9ece3435d5adc8d0bc4 100644 GIT binary patch literal 24 gcmYc;N@ieSU}AW)XwLDsM>%Q*OxkwNe$2oM0Btl0X#fBK literal 20 ccmYc;N@ieSU}8A2Jm%55*FL!ijZzi@07y6pG5`Po diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/metadata.json.gz index 666bcdacb8cb38eac9a4f8268edd46b05f7825c3..df24a92a0e0e52e2d7ef30342b575d73b38a0568 100644 GIT binary patch literal 1607 zcmV-N2DtejiwFP!000000Nt2vZ=*O6$G;1&ZgsF19+E)brP;JwUE7V?syf}OLI#_} zyI?pVRl616{l+}nK-x|3`es+v3ftqEzsF<$AkPXz5m6S(Im6_U;Oo0jHwp0#MOEDI zv6xXGg^MbP3TXm2?f8~3 z;tElB+lS1TT1ndyqzE%S8t0JHva1Qv>U49GMs6vn)?1p~t2BcQ1ujEd_yb>Y91_G- z-WSjiW{ph}1i;hxHgWgr14=1klCDR{Z>eZzsaqCD2)mF9UM7@n%Upsx%5rIsez=(* zr)04{4P#X93B)6WNk%Ez!nV2fs{1Mn$T`N+gw5yc^(VonA5sR2C7Y_8&c!4KA@y?* zyFvLbhK-^=)JwPZQoSaE&6Bhjdst$ZsBIkLn;fNCV{m99$}*Dj@*LZUZ0|)hhg}lt zur7Ykjmxqg3>7N&SQaGGvai>7>x*8})w}i0t<~?m+~=DV4#cn<@o={&<5iPzlWr9^ z=cl*Q3@C<896lvG!VLHG-SWCW8bCgx$i4jYqA3;Gpil|j_6lOnd)#CPv4veGkI0wI zmZ$FK%jR)}4#vF_;Rwm=;ofnsOU=&(}stP9F7lpesg@7`(pekk+XieSqYnxq%=Y~aS@e_ z2B$uOvp2B9B+OC20Bck6(rfFlQ1dv6hmA<@Aq!mv6mStpQz(L_--&m@!$NY;ngs|=I2a=;*wT|;sm0M{6|r0fdRl5107 zmNd{3YXnwftQtqvE2J8L>P1eyhN&K=CdtMjZ(_BfJ?w&C8^bLDSvw?ZrOGaktEN zCu_V6$kJS_4OseI8HBqU#@#jHYMfL)OPYB5ARuY)qwQjSq8s)}H>4{5j)qt;IRTJ- z2Pcw|$2)K64=|Wbr{-)p?e8qd<*~^gGEsOnc7>!dDB;ZwWcKG_aF3s z{P;&{&U=9dLHTX^Qy;7%!2_CR90rfoi#^mNd>2b!qMa>_EZ*^Bw@~9C$VlEQL5#)K zJtxN~iuZ$of3J`e0P+Y}m_(~mH%7KOaip2{_d*dfXnS(An zvX?^P&J&d8rrM0d( z7&)`~RM%`n2W!hVHD?lF%`|k!34CnL4ZX28Gr&eEX^&S>d5NsgPTLmzImIPpY!eC%QE&y5C_T%gfaYOqL2%wn7;Ge;iTfhkHzJ zDrvC8r0rYt7`&%YkJWCOPCw+Eq`&0nJ(N3^t=n?MTQFE1Df~q@@1fhY=Z0nJ&M@ZL zkoa@y-*f+(r=L>KZyuEADUVeY{{muju_^Ky F006Pa;aK`R*O*N#!l-Y zak93H=%D=fJNmvXp!q$d}xlr_Tfl zlXUHY6lI9-+%yqX(uBdx1(W@w%q?Ae(-G%rcV9_9xP09|^*>{g3yQsJStMK%>`RWA z2slk8m**dQ!n4?S@d-T&6LBfeQoka=C_XPYA z!VDo!*061UnYA5d1~tdnP?&#TOuuP1gNQPS*pMl@=|V4J7|}pNuyZ6g32YP%pkIuJ zYt&OsW@qfh7E){qwN4_ul!%K;V4orqf^gNIT^k{`tC&jIC9Mz3=7+d(-qx+9%*87v z!qiB6F}<5!T19v7rpw!7d-7^4=Nw*$_B2xNZjn%3lPZxWi{-`a*3f`vm?zOL-4J%< zf4>_o?9~wJ5y$@3-`OW-l^iLE0Xr?_+ z{1N_9MGm9yMCzj?Hr8TV53e=FtZH`hoWw*}0chT?8*9~*?xKp-zghU_mp9|lWurTT zRo#{rOzj>jvfn?{`P2SE_f`LKYF7PfKG8NM~Hv@0w zYDKqlSinsiq}9MgTREwb?G>;U5q4s257i6}n`s?HE5pfw_70v|D6>hn2V#0qCBh7P zwcurQ>rHGKTiL>nj;l7T_OPl8RYr9+P?^1{VaiyjA=SaBEcmh(0PVo5&;RD&+kg67@G;K*ZuD3_3+Q~@X9Jur zp9(h`X}UmTFs1`B9dvmeE+$)=W4%7hTQzRdtaq{`N(&~{!CJAzMoG)vb=&Wza#s*! zwGBhUt()29blf`{j+-Ud2{tssn#dU7@d&{fLMt`7q(4DmaB|WgwAKC<66dNP)Pu|j zifzqI{evW^x{F#lTvT)U)s&M+op~_-_3K}`xS9n5g!u*ZUAIyl)FI_U0pPj#euqAU z%ddbz$M-+1c)#pMo4H9C3L@(`5MxPkvYAe^?4j$Ep-Dtb!@z+G|D)d*byTgGtZ06d z_5*@26kS8te6F2xf1I%H2Fm+YeWeiJ+U#~47Bj!NL)<&-cf^t!-L`J89KuF^q@aD zSsfgQz3^Z-An-Im$G9IH4iC>(`7ee)7Kp9M7(P0&(tmLPaa^jzir*LEp=e?78fmLRHBVq-E literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/.part-4-7eae1a6f-2232-49ff-9d00-da1b8f1aa042.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/.part-4-7eae1a6f-2232-49ff-9d00-da1b8f1aa042.crc new file mode 100644 index 0000000000000000000000000000000000000000..db657b76a3025d9c4bb74f561c62c8aab9d0b67f GIT binary patch literal 12 TcmYc;N@ieSU}9*#VP_8j63+t@ literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/.part-5-de9f4b90-41d1-4643-a915-91613e76fb2e.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/.part-5-de9f4b90-41d1-4643-a915-91613e76fb2e.crc new file mode 100644 index 0000000000000000000000000000000000000000..31e95f2c0edae40c37d1b66e43f6be0b88d3d210 GIT binary patch literal 12 TcmYc;N@ieSU}7j~`kj*A|@zQfE2G$P^Y6bB&1aNxR^g8i*pCJ@`&)y)Z literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/part-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/part-0-879a2dd7-365b-4f1f-86eb-46a4d3c0809b deleted file mode 100644 index 417105c39ba59d60adc1d3377417c93ca522a02e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1502 zcmV<41tI#$1pokF3jhEpwJ-f(UkcqV0E$dd5kzpPsSZ;JNihIIs(JQnPt~eo7pK+{ z2xb;7u{H*;h%T5`k4cFQ?&Th3C7%NeuHS2SQyP(gJpol!fL1#=b%YqSO{N0@@c;|~ zYOmt;7yU1#T!-8&{i9?gASgF`2FY>7FEQn5S4FO* zdD=UQqD4{vV9LYgiA z|5!Ncg#Ev^wXriNtrcCj70=E$7x~h2v*e{}qGeB3x{$_ot6jIXeHYTWJca`qpm-fS zI2XfQq0AEz%J3Y+ZkT&{a@OrZqt{28i0SpxgAZyOD!xrpmRP`qd7B<2Yt(Mym#FgI zNi;1*9w5owX}ME#r$_}}KTu86Joy+~@liF84iNYc%3a%3MWseuM|nSu4k-Nxb$0S3 z&)!pxmMW=sZki*GOnE<_Qj35+%Q_s-%gYDVdI2 z)nz%NCpwnjE4_|-ckM=d+3q%yS@ChBsCkwcyt=d$xNEEK&PSZpIhv_?q9ua8H;VGt z9Z~hRBaC!grDZGne zn6zw#SUmV^o-iN3*3fzQU(uR+tY*~Az@C6r;xIM&vZ4u+@kUdChyD(W z6;0Zb5bK-7p0&a%>EXk+c4!h?x2}Kyb8t>)#*@ha>D( z2V?ja>vnEPHRIqo=DBcz9!@vIbF7oe;IdtofRL&O<8)>+>=mBLy!n-RVy}hpjddo< zcDtRt0vf{@Z9+^Ri?@dr`(a&tke=F zMc~`CG|@YhSc&3AgvZc-5gMdP`d_(_b+K=zL{ahoA)46Kf*_^gVAXU$g7r|ux*lt{ zF|Me@JP0F;ixsVS6FG^J+vKXq1K6baHaS00wJ-Rb3j7yU@{{Zf^ycG9p1mZUUPra- zZL8^&$<0wM$&_4h_Km6=>}N-NFL_T+n!b1%RHYAt6#o zDKjJ@qA*Dc>97J3nWCBp5+LP31EHWXh!}*-h>ValC8=2qjROG{jGD5-g03UXBE=8> zLa`V_aUp?Ap% ziC$2nw#ok5jFoVJynAlFsK3|vx#&5iAz569NLHOc@_6sgC&Sy6ON-?O+Ut)hH zVXYkl!_-BLt7a{*{WMts#AM)%I(z1fEfbfsqoJ9(p}C={5d$ws0}y~61au7p0L-B) AY5)KL literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/part-2-87e2d4cc-0868-4352-b987-3bb7b24917df b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/part-2-87e2d4cc-0868-4352-b987-3bb7b24917df new file mode 100644 index 0000000000000000000000000000000000000000..387b68ff3c67ed3b9303435e0cdde2319115e645 GIT binary patch literal 224 zcmX@jz`(%7$iSenRsU}S6T@3p1{N+xrrWG6$r(k4kDjnHF@`WYGcu^MG4i|m1-ly< z8Cw__Sjq$0AqFO9W+ui4dZt!+dPZg@x<;nv7LIoA{B}$X5>gE6Z06?XCMFh!jNFF0 zmO$DO@(T@*zn1R+Y+*SEz&tQF@-vP*GcyQ2|-G0&`R+aGm55%^JEn#GibUML*l{ey$Ck1JEJ>iOD$J literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/part-3-edcd13fe-0870-498a-b5e3-9bb75da458e5 b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/part-3-edcd13fe-0870-498a-b5e3-9bb75da458e5 new file mode 100644 index 0000000000000000000000000000000000000000..92d121f5e9bb5ba9ef1323ebd44be8ff291a8a07 GIT binary patch literal 787 zcmV+u1MK|%0ssIK1^@skwJ-f(5(R}90QRVrND#5M0l;vG0Z`78o2;{`YfB>UaI{f<6F^SSf`dnf-*HGNt5}$xu^&T@63?O=4&s&&Q=EQ+@z{ z0DJ&Adl~ok<(uD|xh;b>y?rj{Tc^FfZS8W5<23DQjr&wl#ZFWvcy(@>Cyk|owX1dK zahE{~#&jV3Kz=jUMio!%cIRpCs!}kf0{336@jazrOd~O*NfAMs5C*20KpnMK5#}(y z8>9b&V}%I|7&~Dq>?5MXvxEO?%t*Fk0ltTp6wlvss#&OF9vH`{Vj&e1RV=F+hG9Yl zJ8)dBl|=_diVBLsp~cRrq$+Wo^QJdf!T~_KyScl&yFW%MX2QF!^RVVsu*N$N5&`v7 z5`N(38i6)ujkDKr5+V`RPl=Z|Z}W2ASaW>i8F!sFZX0Kh!0tA!H;&CO$MekrX1N+m zFY}At_TODSrJ=u*|3pg{OZHSWdxKI2!xnHr&3r%k(hMz1I(F~Ru#cc)nfBC|pVeDt zi-<)*`&hbuPubT$Q}LfN;sPuBvZiI4C=>FFt}iOEq!+CU^28~-(T!MXYB83HTyK6Y{VF-pbm3*^Ow-0erVKiIYF|S@ktn&Ks^*{RzDaK za_V=Xd{FCWL&Et~?6_h=WFuaTK=6DvH!9SShzunem`=~<#>IC7lHg;p;e%zVB$>KG zV#Nba4(M}0lKD_2AY^1GOV{~X0LWg>lw)V1Ha&gFYsET$ioQr|ioQ(yw`oWHSt8GK zMFv8s@u-uAxd}2-MheOeAOjI0ZUYiPlt=+dGD0UX#tDouLT6?sn#o)^34ok&J`bl) zqgYk^!Y{dDq#ts1#OYneiM;jk&DVGZ71?I>_{#hDz-2`T<*m7k;>BznMjjvx1Gpt1Rc(jq;ZKMeo? R0000004TLD{U87V004nbZ~FiM literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/part-4-7eae1a6f-2232-49ff-9d00-da1b8f1aa042 b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/part-4-7eae1a6f-2232-49ff-9d00-da1b8f1aa042 new file mode 100644 index 0000000000000000000000000000000000000000..7141b12f7392b1df94bacdc3ef0a1f42e95e5761 GIT binary patch literal 316 zcmV-C0mJ?$0RRB60RR9fwJ-f(uK>*m08$S-H&AbLu=47-1<0EPn;NtgPxGk)76|EE z;NyW_111?*o+e|_v}__*ZaINy=F;`w!kLnEc@rs9oiFD%`-2RY{4^jT71fi;NOF7( zWWabNq!FML(^KJK#p?D>CdAKWgjCVp52FdCBfvxBV{s65S_DGuj)NH8{VgP1gKardn?pMGtnZvBqWN9VQ#!FC!a%w6sp$A2L zD5|Ap5deCL5_(xz;}sMpVt5H1KoTkgjR6;6VdwyqP!F`51jG=k8!+z{lmQI@00000 O001bpFa00@0RRAt<%#M5 literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/part-5-de9f4b90-41d1-4643-a915-91613e76fb2e b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/parts/part-5-de9f4b90-41d1-4643-a915-91613e76fb2e new file mode 100644 index 0000000000000000000000000000000000000000..3c2f9b82011ef2b45067c5e5189da402791722b3 GIT binary patch literal 323 zcmV-J0lfY$0RRB*0RR9fwJ-f(?f?Y|0P;>cH&AbLNUedOvPCw{UBLB1!?nb?r8uxK z#BTQpFWUtP5u*B|$zHi-k|_zjEN`Qim1GGEyGfY>{q@#M-TuZ^%|KF(AY@Np0Mgk( zVWb;}QaJL%$ACm9U`8hUNITgGRuVd2EDJdzq$N`#F}=b3cgL0us_3BTX~P6X!?Qr^ zYS$feY`K8UGOFSCnZ4g?Iq_1Tw_f}8Kz7x!v%$#Vs`ZIdXj*)H;`hf-L!z%k@IW9| z?IeA$iN9*qQc(1DoNH-@sVDlnP3VPJ2!;TWyBai9x5o40;^EPYV3Zz}5)Hp7ktWJO z#ufmLFad=08DzmNfC#Pu;F?|DkQfvUh=2t_F@S@*pDe&Q9TfKl2yhTeu9^kbfcdwe V3=IGP0000004TLD{U87V007LWlZXHS literal 0 HcmV?d00001 diff --git a/hail_search/queries/ont_snv_indel.py b/hail_search/queries/ont_snv_indel.py index dc99ad8e18..2bc263dd5f 100644 --- a/hail_search/queries/ont_snv_indel.py +++ b/hail_search/queries/ont_snv_indel.py @@ -10,6 +10,11 @@ class OntSnvIndelHailTableQuery(SnvIndelHailTableQuery): CORE_FIELDS = BaseHailTableQuery.CORE_FIELDS + BASE_ANNOTATION_FIELDS = { + k: v for k, v in SnvIndelHailTableQuery.BASE_ANNOTATION_FIELDS.items() + if k not in SnvIndelHailTableQuery.SNV_INDEL_ANNOTATION_FIELDS + } + def _get_loaded_filter_ht(self, *args, **kwargs): return None diff --git a/hail_search/queries/snv_indel.py b/hail_search/queries/snv_indel.py index a95890e038..93e985f741 100644 --- a/hail_search/queries/snv_indel.py +++ b/hail_search/queries/snv_indel.py @@ -54,10 +54,13 @@ class SnvIndelHailTableQuery(MitoHailTableQuery): } PATHOGENICITY_FIELD_MAP = {} ANNOTATION_OVERRIDE_FIELDS = [SPLICE_AI_FIELD, SCREEN_KEY] - + SNV_INDEL_ANNOTATION_FIELDS = { + 'CAID': lambda r: r.CAID, + } BASE_ANNOTATION_FIELDS = { - k: v for k, v in MitoHailTableQuery.BASE_ANNOTATION_FIELDS.items() - if k not in MitoHailTableQuery.MITO_ANNOTATION_FIELDS + **SNV_INDEL_ANNOTATION_FIELDS, + **{k: v for k, v in MitoHailTableQuery.BASE_ANNOTATION_FIELDS.items() + if k not in MitoHailTableQuery.MITO_ANNOTATION_FIELDS}, } ENUM_ANNOTATION_FIELDS = { **MitoHailTableQuery.ENUM_ANNOTATION_FIELDS, diff --git a/hail_search/test_search.py b/hail_search/test_search.py index cc1586df32..905ebe3d4d 100644 --- a/hail_search/test_search.py +++ b/hail_search/test_search.py @@ -119,6 +119,7 @@ 'mainTranscriptId': 'ENST00000420911', 'selectedMainTranscriptId': None, '_sort': [7143270172], + 'CAID': 'CA4540310', } FAMILY_3_VARIANT = deepcopy(VARIANT3) diff --git a/hail_search/test_utils.py b/hail_search/test_utils.py index 7da21ce4af..1a4731450c 100644 --- a/hail_search/test_utils.py +++ b/hail_search/test_utils.py @@ -142,6 +142,7 @@ 'mainTranscriptId': None, 'selectedMainTranscriptId': None, '_sort': [1000010439], + 'CAID': 'CA16717152', } VARIANT2 = { 'variantId': '1-38724419-T-G', @@ -241,6 +242,7 @@ 'mainTranscriptId': 'ENST00000376585', 'selectedMainTranscriptId': None, '_sort': [1038724419], + 'CAID': None, } VARIANT3 = { 'variantId': '1-91502721-G-A', @@ -309,6 +311,7 @@ 'mainTranscriptId': 'ENST00000428239', 'selectedMainTranscriptId': None, '_sort': [1091502721], + 'CAID': 'CA10960369', } VARIANT4 = { 'variantId': '1-91511686-T-G', @@ -374,6 +377,7 @@ 'mainTranscriptId': 'ENST00000428239', 'selectedMainTranscriptId': None, '_sort': [1091511686], + 'CAID': 'CA341062623', } VARIANT_LOOKUP_VARIANT = { From 2e9bb4e6a8c74776ebc2de6e70508e6af8b12ab5 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 30 May 2024 11:15:47 -0400 Subject: [PATCH 194/736] allow transfer families in hail backend --- .../transfer_families_to_different_project.py | 24 ++++++++---- ...fer_families_to_different_project_tests.py | 37 ++++++++++--------- 2 files changed, 36 insertions(+), 25 deletions(-) diff --git a/seqr/management/commands/transfer_families_to_different_project.py b/seqr/management/commands/transfer_families_to_different_project.py index c2ff1e1b42..8c7187af98 100644 --- a/seqr/management/commands/transfer_families_to_different_project.py +++ b/seqr/management/commands/transfer_families_to_different_project.py @@ -1,17 +1,21 @@ from django.core.management.base import BaseCommand -from seqr.models import Project, Family, VariantTag, VariantTagType +from seqr.models import Project, Family, VariantTag, VariantTagType, Sample from seqr.utils.search.utils import backend_specific_call import logging logger = logging.getLogger(__name__) -def _validate_no_search_families(families): - search_families = families.filter(individual__sample__is_active=True).distinct().values_list('family_id', flat=True) - if search_families: - logger.info(f'Unable to transfer the following families with loaded search data: {", ".join(search_families)}') - return families.exclude(individual__sample__is_active=True) +def _disable_search(families): + search_samples = Sample.objects.filter(is_active=True, individual__family__in=families) + if search_samples: + updated_families = search_samples.values_list("individual__family__family_id", flat=True).distinct() + family_summary = ", ".join(sorted(updated_families)) + num_updated = search_samples.update(is_active=False) + logger.info( + f'Disabled search for {num_updated} samples in the following {len(updated_families)} families: {family_summary}' + ) class Command(BaseCommand): @@ -25,9 +29,13 @@ def handle(self, *args, **options): to_project = Project.objects.get(guid=options['to_project']) family_ids = options['family_ids'] families = Family.objects.filter(project=from_project, family_id__in=family_ids) - logger.info('Found {} out of {} families. No match for: {}.'.format(len(families), len(set(family_ids)), ', '.join(set(family_ids) - set([f.family_id for f in families])))) + num_found = len(families) - families = backend_specific_call(lambda f: f, _validate_no_search_families)(families) + num_expected = len(set(family_ids)) + missing_id_message = '' if num_found == num_expected else f' No match for: {", ".join(set(family_ids) - set([f.family_id for f in families]))}.' + logger.info(f'Found {num_found} out of {num_expected} families.{missing_id_message}') + + backend_specific_call(lambda f: None, _disable_search)(families) for variant_tag_type in VariantTagType.objects.filter(project=from_project): variant_tags = VariantTag.objects.filter(saved_variants__family__in=families, variant_tag_type=variant_tag_type) diff --git a/seqr/management/tests/transfer_families_to_different_project_tests.py b/seqr/management/tests/transfer_families_to_different_project_tests.py index 9e13cae56f..2e02c30ae1 100644 --- a/seqr/management/tests/transfer_families_to_different_project_tests.py +++ b/seqr/management/tests/transfer_families_to_different_project_tests.py @@ -2,21 +2,19 @@ from django.test import TestCase import mock -from seqr.models import Family, VariantTagType, VariantTag +from seqr.models import Family, VariantTagType, VariantTag, Sample class TransferFamiliesTest(TestCase): fixtures = ['users', '1kg_project'] - @mock.patch('seqr.utils.search.elasticsearch.es_utils.ELASTICSEARCH_SERVICE_HOSTNAME', 'testhost') - @mock.patch('seqr.management.commands.transfer_families_to_different_project.logger.info') - def test_es_command(self, mock_loger): + def _test_command(self, mock_loger, additional_family, logs): call_command( - 'transfer_families_to_different_project', '--from-project=R0001_1kg', '--to-project=R0003_test', '12', '2', + 'transfer_families_to_different_project', '--from-project=R0001_1kg', '--to-project=R0003_test', additional_family, '2', ) mock_loger.assert_has_calls([ - mock.call('Found 1 out of 2 families. No match for: 12.'), + *logs, mock.call('Updating "Excluded" tags'), mock.call('Updating families'), mock.call('Done.'), @@ -24,6 +22,7 @@ def test_es_command(self, mock_loger): family = Family.objects.get(family_id='2') self.assertEqual(family.project.guid, 'R0003_test') + self.assertEqual(family.individual_set.count(), 3) old_tag_type = VariantTagType.objects.get(name='Excluded', project__guid='R0001_1kg') new_tag_type = VariantTagType.objects.get(name='Excluded', project__guid='R0003_test') @@ -35,22 +34,26 @@ def test_es_command(self, mock_loger): self.assertEqual(len(new_tags), 1) self.assertEqual(new_tags[0].saved_variants.first().family, family) - @mock.patch('seqr.utils.search.elasticsearch.es_utils.ELASTICSEARCH_SERVICE_HOSTNAME', '') + return family + + @mock.patch('seqr.utils.search.elasticsearch.es_utils.ELASTICSEARCH_SERVICE_HOSTNAME', 'testhost') @mock.patch('seqr.management.commands.transfer_families_to_different_project.logger.info') - def test_hail_backend_command(self, mock_loger): - call_command( - 'transfer_families_to_different_project', '--from-project=R0001_1kg', '--to-project=R0003_test', '4', '2', + def test_es_command(self, mock_loger): + self._test_command( + mock_loger, additional_family='12', logs=[mock.call('Found 1 out of 2 families. No match for: 12.')] ) - mock_loger.assert_has_calls([ - mock.call('Found 2 out of 2 families. No match for: .'), - mock.call('Unable to transfer the following families with loaded search data: 2'), - mock.call('Updating families'), - mock.call('Done.'), + @mock.patch('seqr.utils.search.elasticsearch.es_utils.ELASTICSEARCH_SERVICE_HOSTNAME', '') + @mock.patch('seqr.management.commands.transfer_families_to_different_project.logger.info') + def test_hail_backend_command(self, mock_loger): + searchable_family = self._test_command(mock_loger, additional_family='4', logs=[ + mock.call('Found 2 out of 2 families.'), + mock.call('Disabled search for 7 samples in the following 1 families: 2'), ]) - no_transfer_family = Family.objects.get(family_id='2') - self.assertEqual(no_transfer_family.project.guid, 'R0001_1kg') + samples = Sample.objects.filter(individual__family=searchable_family) + self.assertEqual(samples.count(), 7) + self.assertEqual(samples.filter(is_active=True).count(), 0) family = Family.objects.get(family_id='4') self.assertEqual(family.project.guid, 'R0003_test') From adb84fbadc43e8c524105ec552fcdf819b61186d Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 30 May 2024 11:55:03 -0400 Subject: [PATCH 195/736] shared helper function --- .../check_for_new_samples_from_pipeline.py | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/seqr/management/commands/check_for_new_samples_from_pipeline.py b/seqr/management/commands/check_for_new_samples_from_pipeline.py index 84e09a3504..6dc5c05406 100644 --- a/seqr/management/commands/check_for_new_samples_from_pipeline.py +++ b/seqr/management/commands/check_for_new_samples_from_pipeline.py @@ -132,23 +132,27 @@ def handle(self, *args, **options): # Reload saved variant JSON updated_variants_by_id = update_projects_saved_variant_json( updated_project_families, user_email=USER_EMAIL, dataset_type=dataset_type) + + data_type = f'{dataset_type}_{sample_type}' if dataset_type == Sample.DATASET_TYPE_SV_CALLS else dataset_type self._reload_shared_variant_annotations( - updated_variants_by_id, updated_families, dataset_type, sample_type, genome_version) + data_type, genome_version, updated_variants_by_id, exclude_families=updated_families) logger.info('DONE') @staticmethod - def _reload_shared_variant_annotations(updated_variants_by_id, updated_families, dataset_type, sample_type, genome_version): - data_type = dataset_type - is_sv = dataset_type == Sample.DATASET_TYPE_SV_CALLS + def _reload_shared_variant_annotations(data_type, genome_version, updated_variants_by_id=None, exclude_families=None): + dataset_type = data_type.split('_')[0] + is_sv = dataset_type.startswith(Sample.DATASET_TYPE_SV_CALLS) + dataset_type = data_type.split('_')[0] if is_sv else data_type db_genome_version = genome_version.replace('GRCh', '') updated_annotation_samples = Sample.objects.filter( is_active=True, dataset_type=dataset_type, individual__family__project__genome_version=db_genome_version, - ).exclude(individual__family__guid__in=updated_families) + ) + if exclude_families: + updated_annotation_samples = updated_annotation_samples.exclude(individual__family__guid__in=exclude_families) if is_sv: - updated_annotation_samples = updated_annotation_samples.filter(sample_type=sample_type) - data_type = f'{dataset_type}_{sample_type}' + updated_annotation_samples = updated_annotation_samples.filter(sample_type=data_type.split('_')[1]) variant_models = SavedVariant.objects.filter( family_id__in=updated_annotation_samples.values_list('individual__family', flat=True).distinct(), @@ -167,7 +171,7 @@ def _reload_shared_variant_annotations(updated_variants_by_id, updated_families, updated_variants_by_id = { variant_id: {k: v for k, v in variant.items() if k not in {'familyGuids', 'genotypes', 'genotypeFilters'}} - for variant_id, variant in updated_variants_by_id.items() + for variant_id, variant in (updated_variants_by_id or {}).items() } fetch_variant_ids = sorted(set(variants_by_id.keys()) - set(updated_variants_by_id.keys())) if fetch_variant_ids: @@ -186,3 +190,6 @@ def _reload_shared_variant_annotations(updated_variants_by_id, updated_families, SavedVariant.objects.bulk_update(updated_variant_models, ['saved_variant_json'], batch_size=10000) logger.info(f'Updated {len(updated_variant_models)} saved variants') + + +reload_shared_variant_annotations = Command._reload_shared_variant_annotations From 739ad68fde4944c16f4062964a9288c258a98ae7 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 30 May 2024 12:15:44 -0400 Subject: [PATCH 196/736] nw command --- .../check_for_new_samples_from_pipeline.py | 7 +++---- .../reload_saved_variant_annotations.py | 20 +++++++++++++++++++ ...eck_for_new_samples_from_pipeline_tests.py | 2 +- seqr/utils/search/hail_search_utils.py | 4 ++++ 4 files changed, 28 insertions(+), 5 deletions(-) create mode 100644 seqr/management/commands/reload_saved_variant_annotations.py diff --git a/seqr/management/commands/check_for_new_samples_from_pipeline.py b/seqr/management/commands/check_for_new_samples_from_pipeline.py index 6dc5c05406..c530eef6b6 100644 --- a/seqr/management/commands/check_for_new_samples_from_pipeline.py +++ b/seqr/management/commands/check_for_new_samples_from_pipeline.py @@ -12,7 +12,7 @@ from seqr.utils.file_utils import file_iter, does_file_exist from seqr.utils.search.add_data_utils import notify_search_data_loaded from seqr.utils.search.utils import parse_valid_variant_id -from seqr.utils.search.hail_search_utils import hail_variant_multi_lookup +from seqr.utils.search.hail_search_utils import hail_variant_multi_lookup, search_data_type from seqr.views.utils.dataset_utils import match_and_update_search_samples from seqr.views.utils.variant_utils import reset_cached_search_results, update_projects_saved_variant_json, \ saved_variants_dataset_type_filter @@ -133,9 +133,8 @@ def handle(self, *args, **options): updated_variants_by_id = update_projects_saved_variant_json( updated_project_families, user_email=USER_EMAIL, dataset_type=dataset_type) - data_type = f'{dataset_type}_{sample_type}' if dataset_type == Sample.DATASET_TYPE_SV_CALLS else dataset_type self._reload_shared_variant_annotations( - data_type, genome_version, updated_variants_by_id, exclude_families=updated_families) + search_data_type(dataset_type, sample_type), genome_version, updated_variants_by_id, exclude_families=updated_families) logger.info('DONE') @@ -167,7 +166,7 @@ def _reload_shared_variant_annotations(data_type, genome_version, updated_varian for v in variant_models: variants_by_id[v.variant_id].append(v) - logger.info(f'Reloading shared annotations for {len(variant_models)} saved variants ({len(variants_by_id)} unique)') + logger.info(f'Reloading shared annotations for {len(variant_models)} {data_type} {genome_version} saved variants ({len(variants_by_id)} unique)') updated_variants_by_id = { variant_id: {k: v for k, v in variant.items() if k not in {'familyGuids', 'genotypes', 'genotypeFilters'}} diff --git a/seqr/management/commands/reload_saved_variant_annotations.py b/seqr/management/commands/reload_saved_variant_annotations.py new file mode 100644 index 0000000000..290f6a23ed --- /dev/null +++ b/seqr/management/commands/reload_saved_variant_annotations.py @@ -0,0 +1,20 @@ +from django.core.management.base import BaseCommand +from reference_data.models import GENOME_VERSION_LOOKUP +from seqr.models import Sample +from seqr.management.commands.check_for_new_samples_from_pipeline import reload_shared_variant_annotations +from seqr.utils.search.hail_search_utils import hail_variant_multi_lookup, search_data_type + +DATA_TYPE_CHOICES = { + search_data_type(dt, st) for dt in Sample.DATASET_TYPE_LOOKUP for st in [Sample.SAMPLE_TYPE_WGS, Sample.SAMPLE_TYPE_WES] +} + + +class Command(BaseCommand): + help = 'Reload shared variant annotations for all saved variants' + + def add_arguments(self, parser): + parser.add_argument('data_type', choices=soretd(DATA_TYPE_CONFIGS)) + parser.add_argument('genome_version', choices=sorted(GENOME_VERSION_LOOKUP.values())) + + def handle(self, *args, **options): + reload_shared_variant_annotations(options['data_type'], options['genome_version']) diff --git a/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py b/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py index 61d88c8bb2..d64c361ad7 100644 --- a/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py +++ b/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py @@ -220,7 +220,7 @@ def test_command(self, mock_email, mock_airtable_utils): {'individual_guid': 'I000018_na21234', 'family_guid': 'F000014_14', 'project_guid': 'R0004_non_analyst_project', 'affected': 'A', 'sample_id': 'NA21234'}, ]}}, ], reload_annotations_logs=[ - 'Reloading shared annotations for 3 saved variants (3 unique)', 'Fetched 1 additional variants', 'Fetched 1 additional variants', 'Updated 2 saved variants', + 'Reloading shared annotations for 3 SNV_INDEL GRCh38 saved variants (3 unique)', 'Fetched 1 additional variants', 'Fetched 1 additional variants', 'Updated 2 saved variants', ]) old_data_sample_guid = 'S000143_na20885' diff --git a/seqr/utils/search/hail_search_utils.py b/seqr/utils/search/hail_search_utils.py index 945d0d02a2..80409e7f50 100644 --- a/seqr/utils/search/hail_search_utils.py +++ b/seqr/utils/search/hail_search_utils.py @@ -129,6 +129,10 @@ def _format_search_body(samples, genome_version, num_results, search): return search_body +def search_data_type(dataset_type, sample_type): + return f'{dataset_type}_{sample_type}' if dataset_type == Sample.DATASET_TYPE_SV_CALLS else dataset_type + + def _get_sample_data(samples, inheritance_filter=None, inheritance_mode=None, **kwargs): sample_values = dict( individual_guid=F('individual__guid'), From 598d609214f2e5804b71c842bdc76f09832d23bb Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 30 May 2024 12:16:11 -0400 Subject: [PATCH 197/736] use utility function --- seqr/utils/search/hail_search_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seqr/utils/search/hail_search_utils.py b/seqr/utils/search/hail_search_utils.py index 80409e7f50..8a5002e078 100644 --- a/seqr/utils/search/hail_search_utils.py +++ b/seqr/utils/search/hail_search_utils.py @@ -154,7 +154,7 @@ def _get_sample_data(samples, inheritance_filter=None, inheritance_mode=None, ** dataset_type = s.pop('dataset_type') sample_type = s.pop('sample_type') s['sample_id'] = s.pop('individual__individual_id') - data_type_key = f'{dataset_type}_{sample_type}' if dataset_type == Sample.DATASET_TYPE_SV_CALLS else dataset_type + data_type_key = search_data_type(dataset_type, sample_type) sample_data_by_data_type[data_type_key].append(s) return sample_data_by_data_type From 7af17ea831fe99a97bbdb92a71bfac68356c25f3 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 30 May 2024 15:53:06 -0400 Subject: [PATCH 198/736] update tests --- .../reload_saved_variant_annotations.py | 4 +- .../reload_saved_variant_annotations_tests.py | 77 +++++++++++++++++++ 2 files changed, 79 insertions(+), 2 deletions(-) create mode 100644 seqr/management/tests/reload_saved_variant_annotations_tests.py diff --git a/seqr/management/commands/reload_saved_variant_annotations.py b/seqr/management/commands/reload_saved_variant_annotations.py index 290f6a23ed..f0e6a346fe 100644 --- a/seqr/management/commands/reload_saved_variant_annotations.py +++ b/seqr/management/commands/reload_saved_variant_annotations.py @@ -2,7 +2,7 @@ from reference_data.models import GENOME_VERSION_LOOKUP from seqr.models import Sample from seqr.management.commands.check_for_new_samples_from_pipeline import reload_shared_variant_annotations -from seqr.utils.search.hail_search_utils import hail_variant_multi_lookup, search_data_type +from seqr.utils.search.hail_search_utils import search_data_type DATA_TYPE_CHOICES = { search_data_type(dt, st) for dt in Sample.DATASET_TYPE_LOOKUP for st in [Sample.SAMPLE_TYPE_WGS, Sample.SAMPLE_TYPE_WES] @@ -13,7 +13,7 @@ class Command(BaseCommand): help = 'Reload shared variant annotations for all saved variants' def add_arguments(self, parser): - parser.add_argument('data_type', choices=soretd(DATA_TYPE_CONFIGS)) + parser.add_argument('data_type', choices=sorted(DATA_TYPE_CHOICES)) parser.add_argument('genome_version', choices=sorted(GENOME_VERSION_LOOKUP.values())) def handle(self, *args, **options): diff --git a/seqr/management/tests/reload_saved_variant_annotations_tests.py b/seqr/management/tests/reload_saved_variant_annotations_tests.py new file mode 100644 index 0000000000..ff6edffb4c --- /dev/null +++ b/seqr/management/tests/reload_saved_variant_annotations_tests.py @@ -0,0 +1,77 @@ +from datetime import datetime +from django.core.management import call_command +from django.core.management.base import CommandError +import json +import mock +import responses + +from seqr.views.utils.test_utils import AnvilAuthenticationTestCase +from seqr.models import Project, Family, Individual, Sample, SavedVariant + +MOCK_HAIL_HOST = 'http://test-hail-host' + + +@mock.patch('seqr.utils.search.hail_search_utils.HAIL_BACKEND_SERVICE_HOSTNAME', MOCK_HAIL_HOST) +class ReloadVariantAnnotationsTest(AnvilAuthenticationTestCase): + fixtures = ['users', '1kg_project'] + + @mock.patch('seqr.management.commands.check_for_new_samples_from_pipeline.logger') + @responses.activate + def test_command(self, mock_logger): + responses.add(responses.POST, f'{MOCK_HAIL_HOST}:5000/multi_lookup', status=200, json={ + 'results': [ + {'variantId': '1-46859832-G-A', 'updated_new_field': 'updated_value', 'rsid': 'rs123'}, + {'variantId': '1-248367227-TC-T', 'updated_field': 'updated_value'}, + ], + }) + + # Test errors + with self.assertRaises(CommandError) as ce: + call_command('reload_saved_variant_annotations') + self.assertEqual(str(ce.exception), 'Error: the following arguments are required: data_type, genome_version') + + with self.assertRaises(CommandError) as ce: + call_command('reload_saved_variant_annotations', 'SV', 'GRCh37') + self.assertEqual(str(ce.exception), "Error: argument data_type: invalid choice: 'SV' (choose from 'MITO', 'ONT_SNV_INDEL', 'SNV_INDEL', 'SV_WES', 'SV_WGS')") + + # Test success + call_command('reload_saved_variant_annotations', 'SNV_INDEL', 'GRCh37') + + mock_logger.info.assert_has_calls([mock.call(log) for log in [ + 'Reloading shared annotations for 3 SNV_INDEL GRCh37 saved variants (3 unique)', + 'Fetched 2 additional variants', + 'Updated 2 saved variants', + ]]) + + self.assertEqual(len(responses.calls), 1) + multi_lookup_request = responses.calls[0].request + self.assertEqual(multi_lookup_request.url, f'{MOCK_HAIL_HOST}:5000/multi_lookup') + self.assertEqual(multi_lookup_request.headers.get('From'), 'manage_command') + self.assertDictEqual(json.loads(multi_lookup_request.body), { + 'genome_version': 'GRCh37', + 'data_type': 'SNV_INDEL', + 'variant_ids': [['1', 248367227, 'TC', 'T'], ['1', 46859832, 'G', 'A'], ['21', 3343353, 'GAGA', 'G']], + }) + + annotation_updated_json_1 = SavedVariant.objects.get(guid='SV0000002_1248367227_r0390_100').saved_variant_json + self.assertEqual(len(annotation_updated_json_1), 18) + self.assertListEqual(annotation_updated_json_1['familyGuids'], ['F000001_1']) + self.assertEqual(annotation_updated_json_1['updated_field'], 'updated_value') + + annotation_updated_json_2 = SavedVariant.objects.get(guid='SV0059956_11560662_f019313_1').saved_variant_json + self.assertEqual(len(annotation_updated_json_2), 18) + self.assertEqual(annotation_updated_json_2['updated_new_field'], 'updated_value') + self.assertEqual(annotation_updated_json_2['rsid'], 'rs123') + self.assertEqual(annotation_updated_json_2['mainTranscriptId'], 'ENST00000505820') + self.assertEqual(len(annotation_updated_json_2['genotypes']), 3) + + # Test SVs + Sample.objects.filter(guid='S000147_na21234').update(individual_id=20) + call_command('reload_saved_variant_annotations', 'SV_WGS', 'GRCh37') + + self.assertEqual(len(responses.calls), 2) + self.assertDictEqual(json.loads(responses.calls[1].request.body), { + 'genome_version': 'GRCh37', + 'data_type': 'SV_WGS', + 'variant_ids': ['prefix_19107_DEL'], + }) From 787c594279f022bb7b8f92129c975dc40dcd361f Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 30 May 2024 16:04:09 -0400 Subject: [PATCH 199/736] remove unused imports --- .../management/tests/reload_saved_variant_annotations_tests.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/seqr/management/tests/reload_saved_variant_annotations_tests.py b/seqr/management/tests/reload_saved_variant_annotations_tests.py index ff6edffb4c..72ad752e34 100644 --- a/seqr/management/tests/reload_saved_variant_annotations_tests.py +++ b/seqr/management/tests/reload_saved_variant_annotations_tests.py @@ -1,4 +1,3 @@ -from datetime import datetime from django.core.management import call_command from django.core.management.base import CommandError import json @@ -6,7 +5,7 @@ import responses from seqr.views.utils.test_utils import AnvilAuthenticationTestCase -from seqr.models import Project, Family, Individual, Sample, SavedVariant +from seqr.models import Sample, SavedVariant MOCK_HAIL_HOST = 'http://test-hail-host' From 831cb1ced15193b2bccd9aede9b638e87f2d98c7 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 30 May 2024 17:43:30 -0400 Subject: [PATCH 200/736] add functional tag --- ...ariantfunctionaldata_functional_data_tag.py | 18 ++++++++++++++++++ seqr/models.py | 4 ++++ 2 files changed, 22 insertions(+) create mode 100644 seqr/migrations/0067_alter_variantfunctionaldata_functional_data_tag.py diff --git a/seqr/migrations/0067_alter_variantfunctionaldata_functional_data_tag.py b/seqr/migrations/0067_alter_variantfunctionaldata_functional_data_tag.py new file mode 100644 index 0000000000..027652323a --- /dev/null +++ b/seqr/migrations/0067_alter_variantfunctionaldata_functional_data_tag.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.23 on 2024-05-30 21:41 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('seqr', '0066_family_post_discovery_mondo_id'), + ] + + operations = [ + migrations.AlterField( + model_name='variantfunctionaldata', + name='functional_data_tag', + field=models.TextField(choices=[('Functional Data', (('Biochemical Function', '{"description": "Gene product performs a biochemical function shared with other known genes in the disease of interest, or consistent with the phenotype.", "color": "#311B92"}'), ('Protein Interaction', '{"description": "Gene product interacts with proteins previously implicated (genetically or biochemically) in the disease of interest.", "color": "#4A148C"}'), ('Expression', '{"description": "Gene is expressed in tissues relevant to the disease of interest and/or is altered in expression in patients who have the disease.", "color": "#7C4DFF"}'), ('Patient Cells', '{"description": "Gene and/or gene product function is demonstrably altered in patients carrying candidate mutations.", "color": "#B388FF"}'), ('Non-patient cells', '{"description": "Gene and/or gene product function is demonstrably altered in human cell culture models carrying candidate mutations.", "color": "#9575CD"}'), ('Animal Model', '{"description": "Non-human animal models with a similarly disrupted copy of the affected gene show a phenotype consistent with human disease state.", "color": "#AA00FF"}'), ('Non-human cell culture model', '{"description": "Non-human cell-culture models with a similarly disrupted copy of the affected gene show a phenotype consistent with human disease state.", "color": "#BA68C8"}'), ('Rescue', '{"description": "The cellular phenotype in patient-derived cells or engineered equivalents can be rescued by addition of the wild-type gene product.", "color": "#663399"}'))), ('Functional Scores', (('Genome-wide Linkage', '{"metadata_title": "LOD Score", "description": "Max LOD score used in analysis to restrict where you looked for causal variants; provide best score available, whether it be a cumulative LOD score across multiple families or just the best family\'s LOD score.", "color": "#880E4F"}'), ('Bonferroni corrected p-value', '{"metadata_title": "P-value", "description": "Bonferroni-corrected p-value for gene if association testing/burden testing/etc was used to identify the gene.", "color": "#E91E63"}'), ('Kindreds w/ Overlapping SV & Similar Phenotype', '{"metadata_title": "#", "description": "Number of kindreds (1+) previously reported/in databases as having structural variant overlapping the gene and a similar phenotype.", "color": "#FF5252"}'))), ('Additional Kindreds (Literature, MME)', (('Additional Unrelated Kindreds w/ Causal Variants in Gene', '{"metadata_title": "# additional families", "description": "Number of additional kindreds with causal variants in this gene (Any other kindreds from collaborators, MME, literature etc). Do not count your family in this total.", "color": "#D84315"}'),)), ('Additional Information', (('Incomplete Penetrance', '{"description": "Variant has been shown to be disease-causing (in literature, functional studies, etc.) but one or more individuals in this family with the variant do not present with clinical features of the disorder.", "color": "#E985DC"}'), ('Partial Phenotype Contribution', '{"description": "Variant is believed to be part but not all of the solve, explaining only some of the phenotypes.", "color": "#1F42D9"}')))]), + ), + ] diff --git a/seqr/models.py b/seqr/models.py index 945cf17894..d04c4a0258 100644 --- a/seqr/models.py +++ b/seqr/models.py @@ -935,6 +935,10 @@ class VariantFunctionalData(ModelWithGUID): 'description': 'Variant has been shown to be disease-causing (in literature, functional studies, etc.) but one or more individuals in this family with the variant do not present with clinical features of the disorder.', 'color': '#E985DC', })), + ('Partial Phenotype Contribution', json.dumps({ + 'description': 'Variant is believed to be part but not all of the solve, explaining only some of the phenotypes.', + 'color': '#1F42D9', + })), )), ) From 0c98322dc475af5de0ca70fc23bab714f5a1cb3a Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 30 May 2024 17:52:40 -0400 Subject: [PATCH 201/736] add metadata title --- .../0067_alter_variantfunctionaldata_functional_data_tag.py | 4 ++-- seqr/models.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/seqr/migrations/0067_alter_variantfunctionaldata_functional_data_tag.py b/seqr/migrations/0067_alter_variantfunctionaldata_functional_data_tag.py index 027652323a..e8f2e6358a 100644 --- a/seqr/migrations/0067_alter_variantfunctionaldata_functional_data_tag.py +++ b/seqr/migrations/0067_alter_variantfunctionaldata_functional_data_tag.py @@ -1,4 +1,4 @@ -# Generated by Django 3.2.23 on 2024-05-30 21:41 +# Generated by Django 3.2.23 on 2024-05-30 21:51 from django.db import migrations, models @@ -13,6 +13,6 @@ class Migration(migrations.Migration): migrations.AlterField( model_name='variantfunctionaldata', name='functional_data_tag', - field=models.TextField(choices=[('Functional Data', (('Biochemical Function', '{"description": "Gene product performs a biochemical function shared with other known genes in the disease of interest, or consistent with the phenotype.", "color": "#311B92"}'), ('Protein Interaction', '{"description": "Gene product interacts with proteins previously implicated (genetically or biochemically) in the disease of interest.", "color": "#4A148C"}'), ('Expression', '{"description": "Gene is expressed in tissues relevant to the disease of interest and/or is altered in expression in patients who have the disease.", "color": "#7C4DFF"}'), ('Patient Cells', '{"description": "Gene and/or gene product function is demonstrably altered in patients carrying candidate mutations.", "color": "#B388FF"}'), ('Non-patient cells', '{"description": "Gene and/or gene product function is demonstrably altered in human cell culture models carrying candidate mutations.", "color": "#9575CD"}'), ('Animal Model', '{"description": "Non-human animal models with a similarly disrupted copy of the affected gene show a phenotype consistent with human disease state.", "color": "#AA00FF"}'), ('Non-human cell culture model', '{"description": "Non-human cell-culture models with a similarly disrupted copy of the affected gene show a phenotype consistent with human disease state.", "color": "#BA68C8"}'), ('Rescue', '{"description": "The cellular phenotype in patient-derived cells or engineered equivalents can be rescued by addition of the wild-type gene product.", "color": "#663399"}'))), ('Functional Scores', (('Genome-wide Linkage', '{"metadata_title": "LOD Score", "description": "Max LOD score used in analysis to restrict where you looked for causal variants; provide best score available, whether it be a cumulative LOD score across multiple families or just the best family\'s LOD score.", "color": "#880E4F"}'), ('Bonferroni corrected p-value', '{"metadata_title": "P-value", "description": "Bonferroni-corrected p-value for gene if association testing/burden testing/etc was used to identify the gene.", "color": "#E91E63"}'), ('Kindreds w/ Overlapping SV & Similar Phenotype', '{"metadata_title": "#", "description": "Number of kindreds (1+) previously reported/in databases as having structural variant overlapping the gene and a similar phenotype.", "color": "#FF5252"}'))), ('Additional Kindreds (Literature, MME)', (('Additional Unrelated Kindreds w/ Causal Variants in Gene', '{"metadata_title": "# additional families", "description": "Number of additional kindreds with causal variants in this gene (Any other kindreds from collaborators, MME, literature etc). Do not count your family in this total.", "color": "#D84315"}'),)), ('Additional Information', (('Incomplete Penetrance', '{"description": "Variant has been shown to be disease-causing (in literature, functional studies, etc.) but one or more individuals in this family with the variant do not present with clinical features of the disorder.", "color": "#E985DC"}'), ('Partial Phenotype Contribution', '{"description": "Variant is believed to be part but not all of the solve, explaining only some of the phenotypes.", "color": "#1F42D9"}')))]), + field=models.TextField(choices=[('Functional Data', (('Biochemical Function', '{"description": "Gene product performs a biochemical function shared with other known genes in the disease of interest, or consistent with the phenotype.", "color": "#311B92"}'), ('Protein Interaction', '{"description": "Gene product interacts with proteins previously implicated (genetically or biochemically) in the disease of interest.", "color": "#4A148C"}'), ('Expression', '{"description": "Gene is expressed in tissues relevant to the disease of interest and/or is altered in expression in patients who have the disease.", "color": "#7C4DFF"}'), ('Patient Cells', '{"description": "Gene and/or gene product function is demonstrably altered in patients carrying candidate mutations.", "color": "#B388FF"}'), ('Non-patient cells', '{"description": "Gene and/or gene product function is demonstrably altered in human cell culture models carrying candidate mutations.", "color": "#9575CD"}'), ('Animal Model', '{"description": "Non-human animal models with a similarly disrupted copy of the affected gene show a phenotype consistent with human disease state.", "color": "#AA00FF"}'), ('Non-human cell culture model', '{"description": "Non-human cell-culture models with a similarly disrupted copy of the affected gene show a phenotype consistent with human disease state.", "color": "#BA68C8"}'), ('Rescue', '{"description": "The cellular phenotype in patient-derived cells or engineered equivalents can be rescued by addition of the wild-type gene product.", "color": "#663399"}'))), ('Functional Scores', (('Genome-wide Linkage', '{"metadata_title": "LOD Score", "description": "Max LOD score used in analysis to restrict where you looked for causal variants; provide best score available, whether it be a cumulative LOD score across multiple families or just the best family\'s LOD score.", "color": "#880E4F"}'), ('Bonferroni corrected p-value', '{"metadata_title": "P-value", "description": "Bonferroni-corrected p-value for gene if association testing/burden testing/etc was used to identify the gene.", "color": "#E91E63"}'), ('Kindreds w/ Overlapping SV & Similar Phenotype', '{"metadata_title": "#", "description": "Number of kindreds (1+) previously reported/in databases as having structural variant overlapping the gene and a similar phenotype.", "color": "#FF5252"}'))), ('Additional Kindreds (Literature, MME)', (('Additional Unrelated Kindreds w/ Causal Variants in Gene', '{"metadata_title": "# additional families", "description": "Number of additional kindreds with causal variants in this gene (Any other kindreds from collaborators, MME, literature etc). Do not count your family in this total.", "color": "#D84315"}'),)), ('Additional Information', (('Incomplete Penetrance', '{"description": "Variant has been shown to be disease-causing (in literature, functional studies, etc.) but one or more individuals in this family with the variant do not present with clinical features of the disorder.", "color": "#E985DC"}'), ('Partial Phenotype Contribution', '{"metadata_title": "HPO Terms", "description": "Variant is believed to be part of the solve, explaining only some of the phenotypes.", "color": "#1F42D9"}')))]), ), ] diff --git a/seqr/models.py b/seqr/models.py index d04c4a0258..1cd1f46b0e 100644 --- a/seqr/models.py +++ b/seqr/models.py @@ -936,7 +936,8 @@ class VariantFunctionalData(ModelWithGUID): 'color': '#E985DC', })), ('Partial Phenotype Contribution', json.dumps({ - 'description': 'Variant is believed to be part but not all of the solve, explaining only some of the phenotypes.', + 'metadata_title': 'HPO Terms', + 'description': 'Variant is believed to be part of the solve, explaining only some of the phenotypes.', 'color': '#1F42D9', })), )), From 323e8cd4c0d0020ea90cdc4cc88fbebc7468fdfb Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 11:30:41 -0400 Subject: [PATCH 202/736] delect HPO terms for phenotype functional tag --- seqr/views/utils/orm_to_json_utils.py | 3 +- ui/redux/selectors.js | 10 +++++++ .../panel/view-fields/TagFieldView.jsx | 28 ++++++++++++++++--- 3 files changed, 35 insertions(+), 6 deletions(-) diff --git a/seqr/views/utils/orm_to_json_utils.py b/seqr/views/utils/orm_to_json_utils.py index daaa38d98a..67a943fc8f 100644 --- a/seqr/views/utils/orm_to_json_utils.py +++ b/seqr/views/utils/orm_to_json_utils.py @@ -441,8 +441,7 @@ def _format_functional_tags(tags): display_data = VariantFunctionalData.FUNCTIONAL_DATA_TAG_LOOKUP[name] tag.update({ 'name': name, - 'metadataTitle': display_data.get('metadata_title', 'Notes'), - 'color': display_data['color'], + **{k: display_data[k] for k in ['metadataTitle', 'color']}, }) return tags diff --git a/ui/redux/selectors.js b/ui/redux/selectors.js index db076a4937..cf9a0a989d 100644 --- a/ui/redux/selectors.js +++ b/ui/redux/selectors.js @@ -425,6 +425,16 @@ export const getUserOptions = createSelector( ), ) +export const getHpoTermOptionsByFamily = createSelector( + getIndividualsByFamily, + individualsByFamily => Object.entries(individualsByFamily).reduce((acc, [familyGuid, individuals]) => ({ + ...acc, + [familyGuid]: individuals.reduce((fAcc, { features }) => ([...fAcc, ...(features || []).map( + ({ id, label }) => ({ value: id, text: label, description: id }), + )]), []), + }), {}), +) + export const getRnaSeqSignificantJunctionData = createSelector( getGenesById, getIndividualsByGuid, diff --git a/ui/shared/components/panel/view-fields/TagFieldView.jsx b/ui/shared/components/panel/view-fields/TagFieldView.jsx index 9b40e354a5..ab492b93e1 100644 --- a/ui/shared/components/panel/view-fields/TagFieldView.jsx +++ b/ui/shared/components/panel/view-fields/TagFieldView.jsx @@ -1,10 +1,12 @@ import React from 'react' +import { connect } from 'react-redux' import { NavLink } from 'react-router-dom' import PropTypes from 'prop-types' import styled from 'styled-components' import { Popup, Form } from 'semantic-ui-react' import { Field } from 'react-final-form' +import { getHpoTermOptionsByFamily } from 'redux/selectors' import { HorizontalSpacer } from '../../Spacers' import { ColoredLabel, ColoredOutlineLabel } from '../../StyledComponents' import { LargeMultiselect, Multiselect } from '../../form/Inputs' @@ -32,6 +34,15 @@ MultiselectField.propTypes = { input: PropTypes.object, } +const mapHpoDropdownStateToProps = (state, ownProps) => ({ + options: getHpoTermOptionsByFamily(state)[ownProps.metadataId], +}) + +const LIST_FORMAT_PROPS = { + format: val => (val || '').split(', ').filter(v => v), + parse: val => (val || []).join(', '), +} + const METADATA_FIELD_PROPS = { [NOTES_METADATA_TITLE]: { width: 16, maxLength: 50, placeholder: 'Enter up to 50 characters' }, Reason: { width: 16, maxLength: 50, placeholder: 'Brief reason for excluding. Enter up to 50 characters' }, @@ -43,12 +54,16 @@ const METADATA_FIELD_PROPS = { addValueOptions: true, options: ['Sanger', 'Segregation', 'SV', 'Splicing'].map(value => ({ value })), placeholder: 'Select test types or add your own', - format: val => (val || '').split(', ').filter(v => v), - parse: val => (val || []).join(', '), + ...LIST_FORMAT_PROPS, + }, + 'HPO Terms': { + width: 16, + component: connect(mapHpoDropdownStateToProps)(MultiselectField), + ...LIST_FORMAT_PROPS, }, } -const MetadataField = React.memo(({ value, name, error }) => { +const MetadataField = React.memo(({ value, name, error, metadataId }) => { if (!value.metadataTitle) { return null } @@ -62,6 +77,7 @@ const MetadataField = React.memo(({ value, name, error }) => { component={Form.Input} label={value.metadataTitle} error={error} + metadataId={metadataId} {...fieldProps} /> @@ -72,6 +88,7 @@ MetadataField.propTypes = { value: PropTypes.object, name: PropTypes.string, error: PropTypes.bool, + metadataId: PropTypes.string, } export const TagFieldDisplay = React.memo(({ @@ -129,6 +146,7 @@ class TagFieldView extends React.PureComponent { noEditTagTypes: PropTypes.arrayOf(PropTypes.string), linkTagType: PropTypes.string, tagLinkUrl: PropTypes.string, + modalId: PropTypes.string, } getSimplifiedProps() { @@ -199,7 +217,7 @@ class TagFieldView extends React.PureComponent { render() { const { - simplifiedValue, field, tagOptions, popup, tagAnnotation, validate, displayMetadata, ...props + simplifiedValue, field, tagOptions, popup, tagAnnotation, validate, displayMetadata, modalId, ...props } = this.props const additionalFields = tagOptions.some(({ metadataTitle }) => metadataTitle) ? [{ @@ -208,6 +226,7 @@ class TagFieldView extends React.PureComponent { isArrayField: true, validate: val => ((!val || !val.metadataTitle || val.metadataTitle === NOTES_METADATA_TITLE || val.metadata) ? undefined : 'Required'), component: MetadataField, + metadataId: modalId, }] : [] return ( @@ -216,6 +235,7 @@ class TagFieldView extends React.PureComponent { additionalEditFields={additionalFields} modalStyle={MODAL_STYLE} fieldDisplay={this.fieldDisplay} + modalId={modalId} {...props} {...(simplifiedValue ? this.getSimplifiedProps() : this.getMappedProps())} /> From 97499ea95fe862c1715337393838610873ec4700 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 11:52:08 -0400 Subject: [PATCH 203/736] add partial contribution to gregor report --- seqr/views/apis/report_api.py | 4 ++-- seqr/views/utils/anvil_metadata_utils.py | 7 ++++++- ui/pages/Report/components/VariantMetadata.jsx | 1 + 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 7316cd48b0..da586f4cdb 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -241,7 +241,7 @@ def _add_row(row, family_id, row_type): } GENETIC_FINDINGS_TABLE_COLUMNS = { 'chrom', 'pos', 'ref', 'alt', 'variant_type', 'variant_reference_assembly', GENE_COLUMN, 'transcript', 'hgvsc', 'hgvsp', - 'hgvs', 'sv_type', 'chrom_end', 'pos_end', 'copy_number', *FINDING_METADATA_COLUMNS[:4], 'phenotype_contribution', + 'hgvs', 'sv_type', 'chrom_end', 'pos_end', 'copy_number', *FINDING_METADATA_COLUMNS[:4], 'phenotype_contribution', 'partial_contribution_explained', 'genetic_findings_id', 'participant_id', 'experiment_id', 'zygosity', 'allele_balance_or_heteroplasmy_percentage', 'variant_inheritance', 'linked_variant', 'additional_family_members_with_variant', 'method_of_discovery', 'gene_disease_validity', @@ -379,7 +379,7 @@ def _add_row(row, family_id, row_type): elif row_type == DISCOVERY_ROW_TYPE and row: for variant in row: genetic_findings_rows.append({ - **variant, 'phenotype_contribution': 'Full', 'variant_type': 'SNV/INDEL', + **variant, 'variant_type': 'SNV/INDEL', }) parse_anvil_metadata( diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index 8a8d0cdc95..b16a929dcb 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -328,7 +328,10 @@ def _get_parsed_saved_discovery_variants_by_family( project_saved_variants = SavedVariant.objects.filter( varianttag__variant_tag_type__in=tag_types, family__id__in=families, **(variant_filter or {}), - ).order_by('created_date').distinct().annotate(tags=ArrayAgg('varianttag__variant_tag_type__name', distinct=True)) + ).order_by('created_date').distinct().annotate( + tags=ArrayAgg('varianttag__variant_tag_type__name', distinct=True), + partial_hpo_terms=ArrayAgg('variantfunctionaldata__metadata', distinct=True, filter=Q(variantfunctionaldata__functional_data_tag='Partial Phenotype Contribution')), + ) variants = [] gene_ids = set() @@ -348,6 +351,8 @@ def _get_parsed_saved_discovery_variants_by_family( 'gene_ids': [gene_id] if gene_id else variant_json.get('transcripts', {}).keys(), 'seqr_chosen_consequence': main_transcript.get('majorConsequence'), 'gene_known_for_phenotype': 'Known' if 'Known gene for phenotype' in variant.tags else 'Candidate', + 'phenotype_contribution': 'Partial' if variant.partial_hpo_terms else 'Full', + 'partial_contribution_explained': variant.partial_hpo_terms[0].replace(', ', '|') if variant.partial_hpo_terms else None, **{k: _get_transcript_field(k, config, main_transcript) for k, config in TRANSCRIPT_FIELDS.items()}, **{k: variant_json.get(k) for k in ['genotypes', 'svType', 'svName', 'end'] + (variant_json_fields or [])}, **{k: getattr(variant, k) for k in ['family_id', 'ref', 'alt', 'tags']}, diff --git a/ui/pages/Report/components/VariantMetadata.jsx b/ui/pages/Report/components/VariantMetadata.jsx index b09db6a2ab..7a2e7e03f6 100644 --- a/ui/pages/Report/components/VariantMetadata.jsx +++ b/ui/pages/Report/components/VariantMetadata.jsx @@ -19,6 +19,7 @@ const COLUMNS = [ { name: 'condition_id' }, { name: 'condition_inheritance' }, { name: 'phenotype_contribution' }, + { name: 'partial_contribution_explained' }, { name: 'additional_family_members_with_variant' }, { name: 'method_of_discovery' }, { name: 'Submitted to MME', format: ({ MME }) => (MME ? 'Yes' : 'No') }, From 64a11ff2435aa49ad33def5318f22be3fc2fbacd Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 11:57:23 -0400 Subject: [PATCH 204/736] support uncertain contribution --- seqr/views/utils/anvil_metadata_utils.py | 10 ++++++++-- ui/redux/selectors.js | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index b16a929dcb..3b4638b9ea 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -343,6 +343,12 @@ def _get_parsed_saved_discovery_variants_by_family( gene_id = main_transcript.get('geneId') gene_ids.add(gene_id) + partial_hpo_terms = variant.partial_hpo_terms[0] if variant.partial_hpo_terms else '' + phenotype_contribution = 'Partial' if partial_hpo_terms else 'Full' + if partial_hpo_terms == 'Uncertain': + phenotype_contribution = 'Uncertain' + partial_hpo_terms = '' + variants.append({ 'chrom': chrom, 'pos': pos, @@ -351,8 +357,8 @@ def _get_parsed_saved_discovery_variants_by_family( 'gene_ids': [gene_id] if gene_id else variant_json.get('transcripts', {}).keys(), 'seqr_chosen_consequence': main_transcript.get('majorConsequence'), 'gene_known_for_phenotype': 'Known' if 'Known gene for phenotype' in variant.tags else 'Candidate', - 'phenotype_contribution': 'Partial' if variant.partial_hpo_terms else 'Full', - 'partial_contribution_explained': variant.partial_hpo_terms[0].replace(', ', '|') if variant.partial_hpo_terms else None, + 'phenotype_contribution': phenotype_contribution, + 'partial_contribution_explained': partial_hpo_terms.replace(', ', '|'), **{k: _get_transcript_field(k, config, main_transcript) for k, config in TRANSCRIPT_FIELDS.items()}, **{k: variant_json.get(k) for k in ['genotypes', 'svType', 'svName', 'end'] + (variant_json_fields or [])}, **{k: getattr(variant, k) for k in ['family_id', 'ref', 'alt', 'tags']}, diff --git a/ui/redux/selectors.js b/ui/redux/selectors.js index cf9a0a989d..fbc57692b0 100644 --- a/ui/redux/selectors.js +++ b/ui/redux/selectors.js @@ -431,7 +431,7 @@ export const getHpoTermOptionsByFamily = createSelector( ...acc, [familyGuid]: individuals.reduce((fAcc, { features }) => ([...fAcc, ...(features || []).map( ({ id, label }) => ({ value: id, text: label, description: id }), - )]), []), + )]), [{ value: 'Uncertain' }]), }), {}), ) From 5fa24f2d74ae3a541087f22c085dce7a58728c59 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 31 May 2024 11:59:36 -0400 Subject: [PATCH 205/736] add to annotations ui component --- ui/shared/components/panel/variants/Annotations.jsx | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ui/shared/components/panel/variants/Annotations.jsx b/ui/shared/components/panel/variants/Annotations.jsx index 3af864824a..eb1ed275e6 100644 --- a/ui/shared/components/panel/variants/Annotations.jsx +++ b/ui/shared/components/panel/variants/Annotations.jsx @@ -438,7 +438,7 @@ const svSizeDisplay = (size) => { const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcriptsById }) => { const { rsid, svType, numExon, pos, end, svTypeDetail, svSourceDetail, cpxIntervals, algorithms, bothsidesSupport, - endChrom, + endChrom, CAID } = variant const mainTranscript = getVariantMainTranscript(variant) @@ -613,6 +613,13 @@ const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcripts
)} + {CAID && ( + + )} {variant.liftedOverGenomeVersion === GENOME_VERSION_37 && ( variant.liftedOverPos ? (
From ce901d0cc8681caf8bafc895e5737c585088eb90 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 12:02:19 -0400 Subject: [PATCH 206/736] include phenotype contribution in individual metadata --- ui/pages/Report/components/VariantMetadata.jsx | 2 -- ui/shared/utils/constants.js | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ui/pages/Report/components/VariantMetadata.jsx b/ui/pages/Report/components/VariantMetadata.jsx index 7a2e7e03f6..ee7fe71e10 100644 --- a/ui/pages/Report/components/VariantMetadata.jsx +++ b/ui/pages/Report/components/VariantMetadata.jsx @@ -18,8 +18,6 @@ const COLUMNS = [ { name: 'known_condition_name' }, { name: 'condition_id' }, { name: 'condition_inheritance' }, - { name: 'phenotype_contribution' }, - { name: 'partial_contribution_explained' }, { name: 'additional_family_members_with_variant' }, { name: 'method_of_discovery' }, { name: 'Submitted to MME', format: ({ MME }) => (MME ? 'Yes' : 'No') }, diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js index 4d107cc89a..78a34356c5 100644 --- a/ui/shared/utils/constants.js +++ b/ui/shared/utils/constants.js @@ -1867,6 +1867,8 @@ export const VARIANT_METADATA_COLUMNS = [ { name: 'sv_type', fieldName: 'svType', format: ({ svType }) => SVTYPE_LOOKUP[svType] || svType }, { name: 'variant_inheritance' }, { name: 'gene_known_for_phenotype' }, + { name: 'phenotype_contribution' }, + { name: 'partial_contribution_explained' }, { name: 'notes' }, ] From 5fb0015cbb471e64a733d8e6b7cd2df61842763c Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 12:32:57 -0400 Subject: [PATCH 207/736] add tests --- seqr/fixtures/report_variants.json | 26 +++++++++++++++++++++++ seqr/views/apis/report_api_tests.py | 9 +++++--- seqr/views/apis/summary_data_api_tests.py | 15 +++++++++---- 3 files changed, 43 insertions(+), 7 deletions(-) diff --git a/seqr/fixtures/report_variants.json b/seqr/fixtures/report_variants.json index bae02ef233..cb096d1e70 100644 --- a/seqr/fixtures/report_variants.json +++ b/seqr/fixtures/report_variants.json @@ -139,5 +139,31 @@ "variant_tag_type": 4, "search_hash": null } +}, +{ + "model": "seqr.variantfunctionaldata", + "pk": 29, + "fields": { + "guid": "VFD0000029_1248367227_r0390_10", + "created_date": "2018-05-24T15:34:01.353Z", + "created_by": null, + "last_modified_date": "2024-05-24T15:34:01.365Z", + "saved_variants": [6], + "functional_data_tag": "Partial Phenotype Contribution", + "metadata": "HP:0000501, HP:0000365" + } +}, +{ + "model": "seqr.variantfunctionaldata", + "pk": 30, + "fields": { + "guid": "VFD0000030_1248367227_r0390_10", + "created_date": "2018-05-24T15:34:01.353Z", + "created_by": null, + "last_modified_date": "2024-05-24T15:34:01.365Z", + "saved_variants": [2], + "functional_data_tag": "Partial Phenotype Contribution", + "metadata": "Uncertain" + } } ] \ No newline at end of file diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index 6e3ef0ed8d..c99b6b84c1 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -508,6 +508,7 @@ 'method_of_discovery': 'SR-ES', 'notes': None, 'phenotype_contribution': 'Full', + 'partial_contribution_explained': '', 'phenotype_description': None, 'pmid_id': None, 'seqr_chosen_consequence': None, @@ -612,12 +613,12 @@ ], [ 'Broad_HG00731_1_248367227', 'Broad_HG00731', 'Broad_exome_VCGS_FAM203_621_D2', 'SNV/INDEL', 'GRCh37', '1', '248367227', 'TC', 'T', '', 'RP11', '', '', '', 'Homozygous', '', 'paternal', '', '', 'Known', '', - 'MONDO:0044970', '', 'Full', '', 'Broad_HG00732', 'SR-ES', '', '', '', '', '', '', '', + 'MONDO:0044970', '', 'Uncertain', '', 'Broad_HG00732', 'SR-ES', '', '', '', '', '', '', '', ], [ 'Broad_NA20889_1_248367227', 'Broad_NA20889', '', 'SNV/INDEL', 'GRCh37', '1', '248367227', 'TC', 'T', '', 'OR4G11P', 'ENST00000505820', 'c.3955G>A', 'c.1586-17C>G', 'Heterozygous', '', 'unknown', 'Broad_NA20889_1_249045487', '', 'Candidate', 'IRIDA syndrome', 'MONDO:0008788', 'Autosomal dominant', - 'Full', '', '', 'SR-ES', '', '', '', '', '', '', '', + 'Partial', 'HP:0000501|HP:0000365', '', 'SR-ES', '', '', '', '', '', '', '', ], [ 'Broad_NA20889_1_249045487', 'Broad_NA20889', '', 'SNV/INDEL', 'GRCh37', '1', '249045487', 'A', 'G', '', 'OR4G11P', '', '', '', 'Heterozygous', '', 'unknown', 'Broad_NA20889_1_248367227', '', 'Candidate', @@ -1219,7 +1220,7 @@ def test_variant_metadata(self): 'genetic_findings_id': 'HG00731_1_248367227', 'known_condition_name': 'mitochondrial disease', 'participant_id': 'HG00731', - 'phenotype_contribution': 'Full', + 'phenotype_contribution': 'Uncertain', 'phenotype_description': 'microcephaly; seizures', 'pos': 248367227, 'projectGuid': 'R0001_1kg', @@ -1290,6 +1291,8 @@ def test_variant_metadata(self): 'hgvsp': 'c.1586-17C>G', 'participant_id': 'NA20889', 'pos': 248367227, + 'partial_contribution_explained': 'HP:0000501|HP:0000365', + 'phenotype_contribution': 'Partial', 'projectGuid': 'R0003_test', 'internal_project_id': 'Test Reprocessed Project', 'ref': 'TC', diff --git a/seqr/views/apis/summary_data_api_tests.py b/seqr/views/apis/summary_data_api_tests.py index 2c8a9a5354..96a6f5d580 100644 --- a/seqr/views/apis/summary_data_api_tests.py +++ b/seqr/views/apis/summary_data_api_tests.py @@ -107,6 +107,10 @@ 'notes-2': None, 'tags-1': ['Tier 1 - Novel gene and phenotype'], 'tags-2': ['Tier 1 - Novel gene and phenotype'], + 'phenotype_contribution-1': 'Partial', + 'phenotype_contribution-2': 'Full', + 'partial_contribution_explained-1': 'HP:0000501|HP:0000365', + 'partial_contribution_explained-2': '', 'condition_id': 'OMIM:616126', 'condition_inheritance': 'Autosomal recessive', 'known_condition_name': 'Immunodeficiency 38', @@ -153,6 +157,8 @@ 'chrom-1': '1', 'gene_known_for_phenotype-1': 'Candidate', 'tags-1': ['Tier 1 - Novel gene and phenotype'], + 'phenotype_contribution-1': 'Full', + 'partial_contribution_explained-1': '', 'pos-1': 248367227, 'end-1': None, 'ref-1': 'TC', @@ -352,7 +358,8 @@ def test_saved_variants_page(self): response = self.client.get('{}?gene=ENSG00000135953'.format(all_tag_url)) self.assertEqual(response.status_code, 200) expected_variant_guids.add('SV0000002_1248367227_r0390_100') - self.assertSetEqual(set(response.json()['savedVariantsByGuid'].keys()), expected_variant_guids) + report_variants = {'SV0027168_191912632_r0384_rare', 'SV0027167_191912633_r0384_rare', 'SV0027166_191912634_r0384_rare'} + self.assertSetEqual(set(response.json()['savedVariantsByGuid'].keys()), {*report_variants, *expected_variant_guids}) multi_tag_url = reverse(saved_variants_page, args=['Review;Tier 1 - Novel gene and phenotype']) response = self.client.get('{}?gene=ENSG00000135953'.format(multi_tag_url)) @@ -369,7 +376,7 @@ def test_saved_variants_page(self): self.assertEqual(response.status_code, 200) self.assertSetEqual(set(response.json()['savedVariantsByGuid'].keys()), { 'SV0000001_2103343353_r0390_100', 'SV0000002_1248367227_r0390_100', 'SV0000007_prefix_19107_DEL_r00', - 'SV0000006_1248367227_r0003_tes', + 'SV0000006_1248367227_r0003_tes', *report_variants, }) multi_discovery_tag_url = reverse(saved_variants_page, args=['CMG Discovery Tags;Review']) @@ -707,7 +714,7 @@ def test_sample_metadata_export(self, mock_google_authenticated): # Tests for AnVIL access disabled class LocalSummaryDataAPITest(AuthenticationTestCase, SummaryDataAPITest): - fixtures = ['users', '1kg_project', 'reference_data'] + fixtures = ['users', '1kg_project', 'reference_data', 'report_variants'] NUM_MANAGER_SUBMISSIONS = 4 ADDITIONAL_SAMPLES = ['NA21234', 'NA21987'] @@ -723,7 +730,7 @@ def assert_has_expected_calls(self, users, skip_group_call_idxs=None): # Test for permissions from AnVIL only class AnvilSummaryDataAPITest(AnvilAuthenticationTestCase, SummaryDataAPITest): - fixtures = ['users', 'social_auth', '1kg_project', 'reference_data'] + fixtures = ['users', 'social_auth', '1kg_project', 'reference_data', 'report_variants'] NUM_MANAGER_SUBMISSIONS = 4 ADDITIONAL_SAMPLES = [] From d9c53dabb4c23807fad51f40e9e660821f2abda1 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 12:36:32 -0400 Subject: [PATCH 208/736] bump changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 64626f8078..d52b04bd41 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # _seqr_ Changes ## dev +* Add "Partial Phenotype Contribution" functional tag (REQUIRES DB MIGRATION) ## 5/24/24 * Adds external_data to Family model (REQUIRES DB MIGRATION) From e03b6bbe57d5284cf228c927c62c191d8fbe3685 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 12:41:08 -0400 Subject: [PATCH 209/736] updat eui tests --- .../components/IndividualMetadata.test.js | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/ui/pages/SummaryData/components/IndividualMetadata.test.js b/ui/pages/SummaryData/components/IndividualMetadata.test.js index 625bdf05e8..f2e4a34c7c 100644 --- a/ui/pages/SummaryData/components/IndividualMetadata.test.js +++ b/ui/pages/SummaryData/components/IndividualMetadata.test.js @@ -62,6 +62,10 @@ const DATA = [ participant_id: 'NA20889', individual_guid: 'I000017_na20889', proband_relationship: 'Self', + 'phenotype_contribution-1': 'Partial', + 'phenotype_contribution-2': 'Full', + 'partial_contribution_explained-1': 'HP:0000501|HP:0000365', + 'partial_contribution_explained-2': '', }, ] @@ -77,17 +81,18 @@ test('IndividualMetadata render and export', () => { 'filter_flags', 'consanguinity', 'family_history', 'genetic_findings_id-1', 'variant_reference_assembly-1', 'chrom-1', 'pos-1', 'ref-1', 'alt-1', 'gene_of_interest-1', 'gene_id-1', 'seqr_chosen_consequence-1', 'transcript-1', 'hgvsc-1', 'hgvsp-1', 'zygosity-1', 'sv_name-1', 'sv_type-1', 'variant_inheritance-1', 'gene_known_for_phenotype-1', - 'notes-1', 'genetic_findings_id-2', 'variant_reference_assembly-2', 'chrom-2', 'pos-2', + 'phenotype_contribution-1', 'partial_contribution_explained-1', 'notes-1', 'genetic_findings_id-2', 'variant_reference_assembly-2', 'chrom-2', 'pos-2', 'ref-2', 'alt-2', 'gene_of_interest-2', 'gene_id-2', 'seqr_chosen_consequence-2', 'transcript-2', 'hgvsc-2', 'hgvsp-2', - 'zygosity-2', 'sv_name-2', 'sv_type-2', 'variant_inheritance-2', 'gene_known_for_phenotype-2', 'notes-2']) + 'zygosity-2', 'sv_name-2', 'sv_type-2', 'variant_inheritance-2', 'gene_known_for_phenotype-2', + 'phenotype_contribution-2', 'partial_contribution_explained-2', 'notes-2']) expect(exportConfig.processRow(DATA[0])).toEqual([ 'Test Reprocessed Project', 'R0003_test', '12', 'F000012_12', 'NA20889', 'I000017_na20889', null, '', '', '', '', 'Self', 'Female', 'Ashkenazi Jewish', undefined, undefined, null, 'Affected', 'HP:0011675 (Arrhythmia)|HP:0001509 ()', '', null, undefined, 'Waiting for data', 'Tier 1', 'Y', 'WES', '2017-02-05', '', undefined, 'Yes', 'NA20889_1_248367227', undefined, '1', 248367227, 'TC', 'T', 'OR4G11P', 'ENSG00000240361', 'intron_variant', 'ENST00000505820', 'c.3955G>A', 'c.1586-17C>G', 'Heterozygous', undefined, undefined, - 'unknown', 'Candidate', undefined, 'NA20889_1_249045487', undefined, '12', '49045487', undefined, + 'unknown', 'Candidate', 'Partial', 'HP:0000501|HP:0000365', undefined, 'NA20889_1_249045487', undefined, '12', '49045487', undefined, undefined, undefined, undefined, undefined, undefined, undefined, undefined, 'Heterozygous', 'DEL:chr12:49045487-49045898', 'Deletion', - 'unknown', 'Candidate', undefined]) + 'unknown', 'Candidate', 'Full', '', undefined]) }) From fdcdb9b4812b38530279022e03e5347349535c08 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 13:21:06 -0400 Subject: [PATCH 210/736] clean up extra participant fields --- seqr/views/apis/report_api.py | 18 ++++++++++++------ seqr/views/utils/anvil_metadata_utils.py | 23 +++++++++++++++-------- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index da586f4cdb..86db286bfb 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -408,10 +408,11 @@ def _add_row(row, family_id, row_type): for participant in participant_rows: phenotype_rows += _parse_participant_phenotype_rows(participant) - if not participant[PARTICIPANT_ID_FIELD]: + airtable_participant_id = participant.pop(PARTICIPANT_ID_FIELD) + if not airtable_participant_id: continue - airtable_metadata = airtable_metadata_by_participant.get(participant[PARTICIPANT_ID_FIELD]) or {} + airtable_metadata = airtable_metadata_by_participant.get(airtable_participant_id) or {} data_types = grouped_data_type_individuals[participant['participant_id']] _parse_participant_airtable_rows( participant, airtable_metadata, data_types, experiment_ids_by_participant, @@ -467,11 +468,11 @@ def _get_individual_data_types(projects): def _parse_participant_phenotype_rows(participant): base_phenotype_row = {'participant_id': participant['participant_id'], 'presence': 'Present', 'ontology': 'HPO'} present_rows = [ - dict(**base_phenotype_row, **_get_phenotype_row(feature)) for feature in participant['features'] or [] + dict(**base_phenotype_row, **_get_phenotype_row(feature)) for feature in participant.pop('features') or [] ] base_phenotype_row['presence'] = 'Absent' return present_rows + [ - dict(**base_phenotype_row, **_get_phenotype_row(feature)) for feature in participant['absent_features'] or [] + dict(**base_phenotype_row, **_get_phenotype_row(feature)) for feature in participant.pop('absent_features') or [] ] @@ -492,8 +493,10 @@ def _parse_participant_airtable_rows(participant, airtable_metadata, data_types, {'participant_id': participant['participant_id'], **_get_experiment_lookup_row(is_rna, row)} ) - if participant['analyte_id'] and not has_analyte: - analyte_rows.append(participant) + # TODO constant + analyte_row = {k: participant.pop(k) for k in ['analyte_id', 'analyte_type', 'primary_biosample', 'tissue_affected_status']} + if analyte_row['analyte_id'] and not has_analyte: + analyte_rows.append(analyte_row) def _get_gregor_airtable_data(participants, user): @@ -647,12 +650,15 @@ def _populate_gregor_files(file_data): files.append((file_name, list(table_config.keys()), data)) + expected_columns = {k for d in data for k, v in d.items() if v} # TODO extra_columns = expected_columns.difference(table_config.keys()) if extra_columns: col_summary = ', '.join(sorted(extra_columns)) warnings.insert( 0, f'The following columns are computed for the "{file_name}" table but are missing from the data model: {col_summary}', ) + errors.append(warnings[0]) # TODO + continue invalid_data_type_columns = { col: config['data_type'] for col, config in table_config.items() if config.get('data_type') and config['data_type'] not in DATA_TYPE_VALIDATORS diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index 3b4638b9ea..0a31d270a5 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -136,7 +136,8 @@ def _get_family_metadata(family_filter, family_fields, include_metadata, include family_data_by_id = {} for f in family_data: family_id = f.pop('id') - solve_status = ANALYSIS_SOLVE_STATUS_LOOKUP.get(f['analysisStatus'], Individual.UNSOLVED) + analysis_status = f['analysisStatus'] if include_metadata else f.pop('analysisStatus') + solve_status = ANALYSIS_SOLVE_STATUS_LOOKUP.get(analysis_status, Individual.UNSOLVED) f.update({ 'solve_status': Individual.SOLVE_STATUS_LOOKUP[solve_status], **{k: v['format'](f) for k, v in (family_fields or {}).items()}, @@ -230,7 +231,7 @@ def parse_anvil_metadata( subject_row = _get_subject_row( individual, has_dbgap_submission, airtable_metadata, individual_ids_map, get_additional_individual_fields, - format_id, + format_id, include_metadata, ) if individual.id in matchmaker_individuals: subject_row['MME'] = matchmaker_individuals[individual.id] if mme_values else 'Yes' @@ -400,7 +401,7 @@ def _get_transcript_field(field, config, transcript): return value -def _get_subject_row(individual, has_dbgap_submission, airtable_metadata, individual_ids_map, get_additional_individual_fields, format_id): +def _get_subject_row(individual, has_dbgap_submission, airtable_metadata, individual_ids_map, get_additional_individual_fields, format_id, include_metadata): paternal_ids = individual_ids_map.get(individual.father_id, ('', '')) maternal_ids = individual_ids_map.get(individual.mother_id, ('', '')) subject_row = { @@ -414,19 +415,25 @@ def _get_subject_row(individual, has_dbgap_submission, airtable_metadata, indivi 'absent_features': individual.absent_features, 'proband_relationship': Individual.RELATIONSHIP_LOOKUP.get(individual.proband_relationship, ''), 'paternal_id': format_id(paternal_ids[0]), - 'paternal_guid': paternal_ids[1], 'maternal_id': format_id(maternal_ids[0]), - 'maternal_guid': maternal_ids[1], } + if include_metadata: + subject_row.update({ + 'paternal_guid': paternal_ids[1], + 'maternal_guid': maternal_ids[1], + }) if airtable_metadata is not None: sequencing = airtable_metadata.get('SequencingProduct') or set() subject_row.update({ - 'dbgap_submission': 'Yes' if has_dbgap_submission else 'No', 'dbgap_study_id': airtable_metadata.get('dbgap_study_id', '') if has_dbgap_submission else '', 'dbgap_subject_id': airtable_metadata.get('dbgap_subject_id', '') if has_dbgap_submission else '', - 'multiple_datasets': 'Yes' if len(sequencing) > 1 or ( - len(sequencing) == 1 and list(sequencing)[0] in MULTIPLE_DATASET_PRODUCTS) else 'No', }) + if include_metadata: + subject_row.update({ + 'dbgap_submission': 'Yes' if has_dbgap_submission else 'No', + 'multiple_datasets': 'Yes' if len(sequencing) > 1 or ( + len(sequencing) == 1 and list(sequencing)[0] in MULTIPLE_DATASET_PRODUCTS) else 'No', + }) if get_additional_individual_fields: subject_row.update(get_additional_individual_fields(individual, airtable_metadata)) return subject_row From 438a9ff7342d98a32b7f0db8e501d66a98081ec6 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 13:34:43 -0400 Subject: [PATCH 211/736] clean up extra family fields --- seqr/views/utils/anvil_metadata_utils.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index 0a31d270a5..321df328cc 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -202,10 +202,13 @@ def parse_anvil_metadata( family_subject_row, saved_variants, *condition_map, set_conditions_for_variants=proband_only_variants, ) - affected_individuals = [individual for individual in family_individuals if individual.affected == Individual.AFFECTED_STATUS_AFFECTED] + affected_individuals = [ + individual for individual in family_individuals if individual.affected == Individual.AFFECTED_STATUS_AFFECTED + ] if include_metadata else [] + subject_family_row = {k: family_subject_row.pop(k) for k in ['family_id', 'internal_project_id', 'phenotype_description', 'pmid_id', 'solve_status']} # TODO constant family_row = { - 'family_id': family_subject_row['family_id'], + 'family_id': subject_family_row['family_id'], 'consanguinity': next(( 'Present' if individual.consanguinity else 'None suspected' for individual in family_individuals if individual.consanguinity is not None @@ -235,7 +238,7 @@ def parse_anvil_metadata( ) if individual.id in matchmaker_individuals: subject_row['MME'] = matchmaker_individuals[individual.id] if mme_values else 'Yes' - subject_row.update(family_subject_row) + subject_row.update(subject_family_row) if individual.solve_status: subject_row['solve_status'] = Individual.SOLVE_STATUS_LOOKUP[individual.solve_status] elif individual.affected != Individual.AFFECTED_STATUS_AFFECTED: From d6dccefb0e3a07a3e9b60d92ccb06bef8262deb4 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 14:13:25 -0400 Subject: [PATCH 212/736] clean up airtable rows --- seqr/views/apis/report_api.py | 50 ++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 86db286bfb..352948a20c 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -207,9 +207,10 @@ def _add_row(row, family_id, row_type): 'targeted_region_bed_file', 'date_data_generation', 'target_insert_size', 'sequencing_platform', ] EXPERIMENT_COLUMNS = {'analyte_id', 'experiment_sample_id'} -EXPERIMENT_TABLE_COLUMNS = {'experiment_dna_short_read_id', 'sequencing_event_details'} +EXPERIMENT_TABLE_COLUMNS = {'experiment_dna_short_read_id'} EXPERIMENT_TABLE_COLUMNS.update(EXPERIMENT_COLUMNS) EXPERIMENT_TABLE_COLUMNS.update(EXPERIMENT_TABLE_AIRTABLE_FIELDS) +EXPERIMENT_RNA_TABLE = 'experiment_rna_short_read' EXPERIMENT_RNA_TABLE_AIRTABLE_FIELDS = [ 'library_prep_type', 'single_or_paired_ends', 'within_site_batch_name', 'RIN', 'estimated_library_size', 'total_reads', 'percent_rRNA', 'percent_mRNA', '5prime3prime_bias', @@ -219,12 +220,14 @@ def _add_row(row, family_id, row_type): EXPERIMENT_RNA_TABLE_COLUMNS.update(EXPERIMENT_RNA_TABLE_AIRTABLE_FIELDS) EXPERIMENT_RNA_TABLE_COLUMNS.update([c for c in EXPERIMENT_TABLE_AIRTABLE_FIELDS if not c.startswith('target')]) EXPERIMENT_LOOKUP_TABLE_COLUMNS = {'experiment_id', 'table_name', 'id_in_table', 'participant_id'} +READ_TABLE = 'aligned_dna_short_read' READ_TABLE_AIRTABLE_FIELDS = [ 'aligned_dna_short_read_file', 'aligned_dna_short_read_index_file', 'md5sum', 'reference_assembly', 'mean_coverage', 'alignment_software', 'analysis_details', ] READ_TABLE_COLUMNS = {'aligned_dna_short_read_id', 'experiment_dna_short_read_id'} READ_TABLE_COLUMNS.update(READ_TABLE_AIRTABLE_FIELDS) +READ_RNA_TABLE = 'aligned_rna_short_read' READ_RNA_TABLE_AIRTABLE_ID_FIELDS = ['aligned_rna_short_read_file', 'aligned_rna_short_read_index_file'] READ_RNA_TABLE_AIRTABLE_FIELDS = [ 'gene_annotation', 'alignment_software', 'alignment_log_file', 'percent_uniquely_aligned', 'percent_multimapped', 'percent_unaligned', @@ -233,12 +236,25 @@ def _add_row(row, family_id, row_type): READ_RNA_TABLE_COLUMNS.update(READ_RNA_TABLE_AIRTABLE_ID_FIELDS) READ_RNA_TABLE_COLUMNS.update(READ_RNA_TABLE_AIRTABLE_FIELDS) READ_RNA_TABLE_COLUMNS.update(READ_TABLE_AIRTABLE_FIELDS[2:-1]) +READ_SET_TABLE = 'aligned_dna_short_read_set' READ_SET_TABLE_COLUMNS = {'aligned_dna_short_read_set_id', 'aligned_dna_short_read_id'} +CALLED_TABLE = 'called_variants_dna_short_read' CALLED_VARIANT_FILE_COLUMN = 'called_variants_dna_file' CALLED_TABLE_COLUMNS = { 'called_variants_dna_short_read_id', 'aligned_dna_short_read_set_id', CALLED_VARIANT_FILE_COLUMN, 'md5sum', 'caller_software', 'variant_types', 'analysis_details', } +AIRTABLE_TABLE_COLUMNS = { + EXPERIMENT_TABLE: EXPERIMENT_TABLE_COLUMNS, + READ_TABLE: READ_TABLE_COLUMNS, + READ_SET_TABLE: READ_SET_TABLE_COLUMNS, + CALLED_TABLE: CALLED_TABLE_COLUMNS, + EXPERIMENT_RNA_TABLE: EXPERIMENT_RNA_TABLE_COLUMNS, + READ_RNA_TABLE: READ_RNA_TABLE_COLUMNS, +} +RNA_AIRTABLE_TABLES = {EXPERIMENT_RNA_TABLE, READ_RNA_TABLE} +DNA_AIRTABLE_TABLES = set(AIRTABLE_TABLE_COLUMNS.keys()) - RNA_AIRTABLE_TABLES + GENETIC_FINDINGS_TABLE_COLUMNS = { 'chrom', 'pos', 'ref', 'alt', 'variant_type', 'variant_reference_assembly', GENE_COLUMN, 'transcript', 'hgvsc', 'hgvsp', 'hgvs', 'sv_type', 'chrom_end', 'pos_end', 'copy_number', *FINDING_METADATA_COLUMNS[:4], 'phenotype_contribution', 'partial_contribution_explained', @@ -401,8 +417,7 @@ def _add_row(row, family_id, row_type): phenotype_rows = [] analyte_rows = [] - airtable_rows = [] - airtable_rna_rows = [] + airtable_rows = {table: [] for table in AIRTABLE_TABLE_COLUMNS.keys()} experiment_lookup_rows = [] experiment_ids_by_participant = {} for participant in participant_rows: @@ -416,7 +431,7 @@ def _add_row(row, family_id, row_type): data_types = grouped_data_type_individuals[participant['participant_id']] _parse_participant_airtable_rows( participant, airtable_metadata, data_types, experiment_ids_by_participant, - analyte_rows, airtable_rows, airtable_rna_rows, experiment_lookup_rows, + analyte_rows, airtable_rows, experiment_lookup_rows, ) # Add experiment IDs @@ -428,14 +443,7 @@ def _add_row(row, family_id, row_type): ('family', GREGOR_FAMILY_TABLE_COLUMNS, list(family_map.values())), (PHENOTYPE_TABLE, PHENOTYPE_TABLE_COLUMNS, phenotype_rows), ('analyte', ANALYTE_TABLE_COLUMNS, analyte_rows), - (EXPERIMENT_TABLE, EXPERIMENT_TABLE_COLUMNS, airtable_rows), - ('aligned_dna_short_read', READ_TABLE_COLUMNS, airtable_rows), - ('aligned_dna_short_read_set', READ_SET_TABLE_COLUMNS, airtable_rows), - ('called_variants_dna_short_read', CALLED_TABLE_COLUMNS, [ - row for row in airtable_rows if row.get(CALLED_VARIANT_FILE_COLUMN) - ]), - ('experiment_rna_short_read', EXPERIMENT_RNA_TABLE_COLUMNS, airtable_rna_rows), - ('aligned_rna_short_read', READ_RNA_TABLE_COLUMNS, airtable_rna_rows), + *[(table, AIRTABLE_TABLE_COLUMNS[table], rows) for table, rows in airtable_rows.items()], (EXPERIMENT_LOOKUP_TABLE, EXPERIMENT_LOOKUP_TABLE_COLUMNS, experiment_lookup_rows), (FINDINGS_TABLE, GENETIC_FINDINGS_TABLE_COLUMNS, genetic_findings_rows), ] @@ -477,7 +485,7 @@ def _parse_participant_phenotype_rows(participant): def _parse_participant_airtable_rows(participant, airtable_metadata, data_types, experiment_ids_by_participant, - analyte_rows, airtable_rows, airtable_rna_rows, experiment_lookup_rows): + analyte_rows, airtable_rows, experiment_lookup_rows): has_analyte = False # airtable data for data_type in data_types: @@ -488,7 +496,16 @@ def _parse_participant_airtable_rows(participant, airtable_metadata, data_types, analyte_rows.append({**participant, **row}) if not is_rna: experiment_ids_by_participant[participant['participant_id']] = row['experiment_dna_short_read_id'] - (airtable_rna_rows if is_rna else airtable_rows).append(row) + for table in (RNA_AIRTABLE_TABLES if is_rna else DNA_AIRTABLE_TABLES): + if table == CALLED_TABLE and not row.get(CALLED_VARIANT_FILE_COLUMN): + continue + try: + airtable_rows[table].append({k: row[k] for k in AIRTABLE_TABLE_COLUMNS[table] if k in row}) + except KeyError as e: + # TODO + import pdb; pdb.set_trace() + raise e + experiment_lookup_rows.append( {'participant_id': participant['participant_id'], **_get_experiment_lookup_row(is_rna, row)} ) @@ -798,7 +815,10 @@ def _validate_column_data(column, file_name, data, column_validator, warnings, e def _get_row_id(row): - id_col = next(col for col in ['genetic_findings_id', 'participant_id', 'experiment_sample_id', 'family_id'] if col in row) + id_col = next(col for col in [ + 'genetic_findings_id', 'participant_id', 'experiment_sample_id', 'analyte_id', + 'aligned_dna_short_read_id', 'aligned_rna_short_read_id', 'family_id', + ] if col in row) return row[id_col] From c27890849d4180642cc97d302532fab61c984374 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 14:21:55 -0400 Subject: [PATCH 213/736] clean up analyte table --- seqr/views/apis/report_api.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 352948a20c..8549d5cf4b 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -487,31 +487,26 @@ def _parse_participant_phenotype_rows(participant): def _parse_participant_airtable_rows(participant, airtable_metadata, data_types, experiment_ids_by_participant, analyte_rows, airtable_rows, experiment_lookup_rows): has_analyte = False + analyte_row = {k: participant.pop(k) for k in ANALYTE_TABLE_COLUMNS} + participant['participant_id'] = analyte_row['participant_id'] # airtable data for data_type in data_types: if data_type not in airtable_metadata: continue is_rna, row = _get_airtable_row(data_type, airtable_metadata) has_analyte = True - analyte_rows.append({**participant, **row}) + analyte_rows.append({**analyte_row, **{k: row[k] for k in ANALYTE_TABLE_COLUMNS if k in row}}) if not is_rna: experiment_ids_by_participant[participant['participant_id']] = row['experiment_dna_short_read_id'] for table in (RNA_AIRTABLE_TABLES if is_rna else DNA_AIRTABLE_TABLES): if table == CALLED_TABLE and not row.get(CALLED_VARIANT_FILE_COLUMN): continue - try: - airtable_rows[table].append({k: row[k] for k in AIRTABLE_TABLE_COLUMNS[table] if k in row}) - except KeyError as e: - # TODO - import pdb; pdb.set_trace() - raise e + airtable_rows[table].append({k: row[k] for k in AIRTABLE_TABLE_COLUMNS[table] if k in row}) experiment_lookup_rows.append( {'participant_id': participant['participant_id'], **_get_experiment_lookup_row(is_rna, row)} ) - # TODO constant - analyte_row = {k: participant.pop(k) for k in ['analyte_id', 'analyte_type', 'primary_biosample', 'tissue_affected_status']} if analyte_row['analyte_id'] and not has_analyte: analyte_rows.append(analyte_row) From 4256bc3b7ff07a236788927feda4e6559081206e Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 31 May 2024 14:22:12 -0400 Subject: [PATCH 214/736] add comma --- ui/shared/components/panel/variants/Annotations.jsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ui/shared/components/panel/variants/Annotations.jsx b/ui/shared/components/panel/variants/Annotations.jsx index eb1ed275e6..68b990b978 100644 --- a/ui/shared/components/panel/variants/Annotations.jsx +++ b/ui/shared/components/panel/variants/Annotations.jsx @@ -438,7 +438,7 @@ const svSizeDisplay = (size) => { const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcriptsById }) => { const { rsid, svType, numExon, pos, end, svTypeDetail, svSourceDetail, cpxIntervals, algorithms, bothsidesSupport, - endChrom, CAID + endChrom, CAID, } = variant const mainTranscript = getVariantMainTranscript(variant) From 9947b335ccf559628f220d18af942ad0db8802c9 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 14:33:15 -0400 Subject: [PATCH 215/736] clean up extra findings fields --- seqr/views/apis/report_api.py | 1 - seqr/views/apis/summary_data_api.py | 1 - seqr/views/utils/anvil_metadata_utils.py | 23 +++++++++++++++-------- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 8549d5cf4b..b072e7b8b4 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -918,7 +918,6 @@ def _add_row(row, family_id, row_type): elif row_type == DISCOVERY_ROW_TYPE: family = families_by_id[family_id] for variant in row: - del variant['gene_ids'] variant_rows.append({ 'MME': variant.pop('variantId') in participant_mme[variant['participant_id']].get('variant_ids', []), 'phenotype_contribution': 'Full', diff --git a/seqr/views/apis/summary_data_api.py b/seqr/views/apis/summary_data_api.py index 4ebcc4db3e..811052e52a 100644 --- a/seqr/views/apis/summary_data_api.py +++ b/seqr/views/apis/summary_data_api.py @@ -285,7 +285,6 @@ def _add_row(row, family_id, row_type): family_rows_by_id[family_id] = row elif row_type == DISCOVERY_ROW_TYPE: for i, discovery_row in enumerate(row): - del discovery_row['gene_ids'] participant_id = discovery_row.pop('participant_id') parsed_row = {'{}-{}'.format(k, i + 1): v for k, v in discovery_row.items()} parsed_row['num_saved_variants'] = len(row) diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index 321df328cc..7a6e184c08 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -182,7 +182,7 @@ def parse_anvil_metadata( sample_ids.add(sample.sample_id) saved_variants_by_family = _get_parsed_saved_discovery_variants_by_family( - list(family_data_by_id.keys()), variant_filter=variant_filter, variant_json_fields=variant_json_fields, + list(family_data_by_id.keys()), include_metadata, variant_filter=variant_filter, variant_json_fields=variant_json_fields, ) condition_map = _get_condition_map(family_data_by_id.values()) @@ -325,7 +325,7 @@ def _post_process_variant_metadata(v, gene_variants, include_parent_mnvs=False): def _get_parsed_saved_discovery_variants_by_family( - families: Iterable[Family], variant_filter: dict, variant_json_fields: list[str], + families: Iterable[Family], include_metadata: bool, variant_filter: dict, variant_json_fields: list[str], ): tag_types = VariantTagType.objects.filter(project__isnull=True, category=DISCOVERY_CATEGORY) @@ -353,26 +353,32 @@ def _get_parsed_saved_discovery_variants_by_family( phenotype_contribution = 'Uncertain' partial_hpo_terms = '' - variants.append({ + variant = { 'chrom': chrom, 'pos': pos, 'variant_reference_assembly': GENOME_VERSION_LOOKUP[variant_json['genomeVersion']], 'gene_id': gene_id, 'gene_ids': [gene_id] if gene_id else variant_json.get('transcripts', {}).keys(), - 'seqr_chosen_consequence': main_transcript.get('majorConsequence'), 'gene_known_for_phenotype': 'Known' if 'Known gene for phenotype' in variant.tags else 'Candidate', 'phenotype_contribution': phenotype_contribution, 'partial_contribution_explained': partial_hpo_terms.replace(', ', '|'), **{k: _get_transcript_field(k, config, main_transcript) for k, config in TRANSCRIPT_FIELDS.items()}, **{k: variant_json.get(k) for k in ['genotypes', 'svType', 'svName', 'end'] + (variant_json_fields or [])}, - **{k: getattr(variant, k) for k in ['family_id', 'ref', 'alt', 'tags']}, - }) + **{k: getattr(variant, k) for k in ['family_id', 'ref', 'alt']}, + } + if include_metadata: + variant.update({ + 'seqr_chosen_consequence': main_transcript.get('majorConsequence'), + 'tags': variant.tags, + }) + variants.append(variant) genes_by_id = get_genes(gene_ids) saved_variants_by_family = defaultdict(list) for row in variants: - row[GENE_COLUMN] = genes_by_id.get(row['gene_id'], {}).get('geneSymbol') + gene_id = row['gene_id'] if include_metadata else row.pop('gene_id') + row[GENE_COLUMN] = genes_by_id.get(gene_id, {}).get('geneSymbol') family_id = row.pop('family_id') saved_variants_by_family[family_id].append(row) @@ -586,8 +592,9 @@ def _update_conditions(family_subject_row, variants, omim_conditions, mondo_cond c for mim_number in mim_numbers for c in omim_conditions[mim_number][None] if c['chrom'] == v['chrom'] and c['start'] <= v['pos'] <= c['end'] ] + gene_ids = v.pop('gene_ids') for mim_number in mim_numbers: - for gene_id in v['gene_ids']: + for gene_id in gene_ids: variant_conditions += omim_conditions[mim_number][gene_id] if set_conditions_for_variants: From 7c7f19590efe0e5ecdb3b555c5110decea0b7e75 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 15:03:50 -0400 Subject: [PATCH 216/736] clean up metadata --- seqr/views/apis/report_api.py | 3 +++ seqr/views/apis/report_api_tests.py | 13 +------------ seqr/views/utils/anvil_metadata_utils.py | 6 +++--- 3 files changed, 7 insertions(+), 15 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index b072e7b8b4..9638710c32 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -854,6 +854,9 @@ def _add_row(row, family_id, row_type): } f.update(known_ids) individuals_ids -= set(known_ids.values()) + individual = proband or next(iter(individuals_by_id.values()), None) + if individual: + f.update({k: individual[k] for k in ['phenotype_description', 'pmid_id', 'solve_status']}) # TODO constant? sorted_samples = sorted(individuals_by_id.values(), key=lambda x: x.get('date_data_generation', '')) earliest_sample = next((s for s in [proband or {}] + sorted_samples if s.get('date_data_generation')), {}) diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index c99b6b84c1..b7cb51a5fd 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -509,10 +509,7 @@ 'notes': None, 'phenotype_contribution': 'Full', 'partial_contribution_explained': '', - 'phenotype_description': None, - 'pmid_id': None, 'seqr_chosen_consequence': None, - 'solve_status': 'Unsolved', 'svName': None, 'svType': None, 'sv_name': None, @@ -1120,11 +1117,10 @@ def test_family_metadata(self): test_row = next(r for r in response_json['rows'] if r['familyGuid'] == 'F000012_12') self.assertDictEqual(test_row, { 'projectGuid': 'R0003_test', - 'internal_project_id': 'Test Reprocessed Project', 'familyGuid': 'F000012_12', 'family_id': '12', 'displayName': '12', - 'solve_status': 'Unsolved', + 'solve_status': 'Partially solved', 'actual_inheritance': 'unknown', 'condition_id': 'OMIM:616126', 'condition_inheritance': 'Autosomal recessive', @@ -1159,7 +1155,6 @@ def test_family_metadata(self): test_row = next(r for r in response_json['rows'] if r['familyGuid'] == 'F000003_3') self.assertDictEqual(test_row, { 'projectGuid': 'R0001_1kg', - 'internal_project_id': '1kg project nåme with uniçøde', 'familyGuid': 'F000003_3', 'family_id': '3', 'displayName': '3', @@ -1221,10 +1216,8 @@ def test_variant_metadata(self): 'known_condition_name': 'mitochondrial disease', 'participant_id': 'HG00731', 'phenotype_contribution': 'Uncertain', - 'phenotype_description': 'microcephaly; seizures', 'pos': 248367227, 'projectGuid': 'R0001_1kg', - 'internal_project_id': '1kg project nåme with uniçøde', 'ref': 'TC', 'tags': ['Known gene for phenotype'], 'variant_inheritance': 'paternal', @@ -1249,10 +1242,8 @@ def test_variant_metadata(self): 'known_condition_name': 'mitochondrial disease', 'notes': 'The following variants are part of the multinucleotide variant 19-1912632-GC-TT (c.586_587delinsTT, p.Ala196Leu): 19-1912633-G-T, 19-1912634-C-T', 'participant_id': 'HG00731', - 'phenotype_description': 'microcephaly; seizures', 'pos': 1912634, 'projectGuid': 'R0001_1kg', - 'internal_project_id': '1kg project nåme with uniçøde', 'ref': 'C', 'tags': ['Known gene for phenotype'], 'transcript': 'ENST00000371839', @@ -1294,7 +1285,6 @@ def test_variant_metadata(self): 'partial_contribution_explained': 'HP:0000501|HP:0000365', 'phenotype_contribution': 'Partial', 'projectGuid': 'R0003_test', - 'internal_project_id': 'Test Reprocessed Project', 'ref': 'TC', 'seqr_chosen_consequence': 'intron_variant', 'tags': ['Tier 1 - Novel gene and phenotype'], @@ -1322,7 +1312,6 @@ def test_variant_metadata(self): 'participant_id': 'NA20889', 'pos': 249045487, 'projectGuid': 'R0003_test', - 'internal_project_id': 'Test Reprocessed Project', 'ref': None, 'svType': 'DEL', 'sv_name': 'DEL:chr1:249045487-249045898', diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index 7a6e184c08..65ffa09049 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -353,7 +353,7 @@ def _get_parsed_saved_discovery_variants_by_family( phenotype_contribution = 'Uncertain' partial_hpo_terms = '' - variant = { + parsed_variant = { 'chrom': chrom, 'pos': pos, 'variant_reference_assembly': GENOME_VERSION_LOOKUP[variant_json['genomeVersion']], @@ -367,11 +367,11 @@ def _get_parsed_saved_discovery_variants_by_family( **{k: getattr(variant, k) for k in ['family_id', 'ref', 'alt']}, } if include_metadata: - variant.update({ + parsed_variant.update({ 'seqr_chosen_consequence': main_transcript.get('majorConsequence'), 'tags': variant.tags, }) - variants.append(variant) + variants.append(parsed_variant) genes_by_id = get_genes(gene_ids) From 13e2384e5a9381586e3e3a6022b051c465f63a93 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 31 May 2024 15:14:23 -0400 Subject: [PATCH 217/736] update classify --- .../panel/variants/ClinGenVciLink.jsx | 19 ++++++++++++++++--- .../panel/variants/VariantClassify.jsx | 3 ++- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/ui/shared/components/panel/variants/ClinGenVciLink.jsx b/ui/shared/components/panel/variants/ClinGenVciLink.jsx index beef527e48..caa9cc2ee8 100644 --- a/ui/shared/components/panel/variants/ClinGenVciLink.jsx +++ b/ui/shared/components/panel/variants/ClinGenVciLink.jsx @@ -11,13 +11,20 @@ class ClinGenVciLink extends React.PureComponent { static propTypes = { hgvsc: PropTypes.string.isRequired, + caid: PropTypes.string, } state = { loading: false, alleleId: null, error: '', - }; + } + + constructor(props) { + super(props) + const { caid } = props + this.state.alleleId = caid + } load = (hgvsc) => { this.setState({ loading: true }) @@ -34,11 +41,17 @@ class ClinGenVciLink extends React.PureComponent { const { hgvsc } = this.props const { alleleId, loading, error } = this.state - return ( - + const clingenInfo = ( +
In ClinGen VCI
{error || (alleleId && )} +
+ ) + + return alleleId ? clingenInfo : ( + + {clingenInfo} ) } diff --git a/ui/shared/components/panel/variants/VariantClassify.jsx b/ui/shared/components/panel/variants/VariantClassify.jsx index 195631e741..64720f8560 100644 --- a/ui/shared/components/panel/variants/VariantClassify.jsx +++ b/ui/shared/components/panel/variants/VariantClassify.jsx @@ -25,6 +25,7 @@ const getButtonBackgroundColor = (classification) => { } const VariantClassify = React.memo(({ variant, familyGuid }) => { + const { CAID } = variant const { hgvsc } = getVariantMainTranscript(variant) const { classify } = variant.acmgClassification || {} const buttonBackgroundColor = getButtonBackgroundColor(classify) @@ -36,7 +37,7 @@ const VariantClassify = React.memo(({ variant, familyGuid }) => { {hgvsc && ( }> - + )} From 419fdd73b0e04d4af870d19979f5a647bd39041c Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 15:36:31 -0400 Subject: [PATCH 218/736] fix anvil export --- seqr/views/apis/report_api.py | 20 +++++++++++----- seqr/views/apis/summary_data_api.py | 7 ++++-- seqr/views/utils/anvil_metadata_utils.py | 29 ++++++++++++------------ 3 files changed, 33 insertions(+), 23 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 9638710c32..a00903431d 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -12,7 +12,7 @@ from seqr.utils.middleware import ErrorsWarningsException from seqr.views.utils.airtable_utils import AirtableSession -from seqr.views.utils.anvil_metadata_utils import parse_anvil_metadata, \ +from seqr.views.utils.anvil_metadata_utils import parse_anvil_metadata, anvil_export_airtable_fields, \ FAMILY_ROW_TYPE, SUBJECT_ROW_TYPE, SAMPLE_ROW_TYPE, DISCOVERY_ROW_TYPE, PARTICIPANT_TABLE, PHENOTYPE_TABLE, \ EXPERIMENT_TABLE, EXPERIMENT_LOOKUP_TABLE, FINDINGS_TABLE, FINDING_METADATA_COLUMNS, GENE_COLUMN from seqr.views.utils.export_utils import export_multiple_files, write_multiple_files_to_gs @@ -112,12 +112,13 @@ def anvil_export(request, project_guid): project = get_project_and_check_permissions(project_guid, request.user) parsed_rows = defaultdict(list) + family_diseases = {} def _add_row(row, family_id, row_type): if row_type == DISCOVERY_ROW_TYPE: missing_gene_rows = [ '{chrom}-{pos}-{ref}-{alt}'.format(**discovery_row) for discovery_row in row - if not (discovery_row.get('gene_id') or discovery_row.get('svType'))] + if not (discovery_row.get(GENE_COLUMN) or discovery_row.get('svType'))] if missing_gene_rows: raise ErrorsWarningsException( [f'Discovery variant(s) {", ".join(missing_gene_rows)} in family {family_id} have no associated gene']) @@ -146,19 +147,23 @@ def _add_row(row, family_id, row_type): row.update({ 'project_id': row.pop('internal_project_id'), 'solve_state': row.pop('solve_status'), - 'disease_id': row.get('condition_id', '').replace('|', ';'), - 'disease_description': row.get('known_condition_name', '').replace('|', ';'), 'hpo_present': '|'.join([feature['id'] for feature in row.get('features') or []]), 'hpo_absent': '|'.join([feature['id'] for feature in row.get('absent_features') or []]), 'ancestry': row['reported_ethnicity'] or row['reported_race'], }) + if row_type == FAMILY_ROW_TYPE: + family_diseases[row[entity_id_field]] = { + 'disease_id': row.get('condition_id', '').replace('|', ';'), + 'disease_description': row.get('known_condition_name', '').replace('|', ';'), + } parsed_rows[row_type].append(row) max_loaded_date = request.GET.get('loadedBefore') or (datetime.now() - timedelta(days=365)).strftime('%Y-%m-%d') parse_anvil_metadata( [project], request.user, _add_row, max_loaded_date=max_loaded_date, include_discovery_sample_id=True, - get_additional_individual_fields=lambda individual, *args: { + get_additional_individual_fields=lambda individual, airtable_metadata, has_dbgap_submission, *args: { 'congenital_status': Individual.ONSET_AGE_LOOKUP[individual.onset_age] if individual.onset_age else 'Unknown', + **anvil_export_airtable_fields(airtable_metadata, has_dbgap_submission), }, get_additional_sample_fields=lambda sample, *args: { 'entity:sample_id': sample.individual.individual_id, @@ -173,6 +178,9 @@ def _add_row(row, family_id, row_type): }}, ) + for row in parsed_rows[SUBJECT_ROW_TYPE]: + row.update(family_diseases[row['family_id']]) + return export_multiple_files([ ['{}_PI_Subject'.format(project.name), SUBJECT_TABLE_COLUMNS, parsed_rows[SUBJECT_ROW_TYPE]], ['{}_PI_Sample'.format(project.name), SAMPLE_TABLE_COLUMNS, parsed_rows[SAMPLE_ROW_TYPE]], @@ -530,7 +538,7 @@ def _get_gregor_airtable_data(participants, user): return airtable_metadata_by_participant -def _get_participant_row(individual, airtable_sample): +def _get_participant_row(individual, airtable_sample, *args): participant = { 'gregor_center': 'BROAD', 'prior_testing': '|'.join([gene.get('gene', gene['comments']) for gene in individual.rejected_genes or []]), diff --git a/seqr/views/apis/summary_data_api.py b/seqr/views/apis/summary_data_api.py index 811052e52a..2c8663b76d 100644 --- a/seqr/views/apis/summary_data_api.py +++ b/seqr/views/apis/summary_data_api.py @@ -23,7 +23,7 @@ add_individual_hpo_details, INDIVIDUAL_DISPLAY_NAME_EXPR, AIP_TAG_TYPE from seqr.views.utils.permissions_utils import analyst_required, user_is_analyst, get_project_guids_user_can_view, \ login_and_policies_required, get_project_and_check_permissions, get_internal_projects -from seqr.views.utils.anvil_metadata_utils import parse_anvil_metadata, FAMILY_ROW_TYPE, SUBJECT_ROW_TYPE, DISCOVERY_ROW_TYPE +from seqr.views.utils.anvil_metadata_utils import parse_anvil_metadata, anvil_export_airtable_fields, FAMILY_ROW_TYPE, SUBJECT_ROW_TYPE, DISCOVERY_ROW_TYPE from seqr.views.utils.variant_utils import get_variants_response, bulk_create_tagged_variants, DISCOVERY_CATEGORY from settings import SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL @@ -312,11 +312,14 @@ def _add_row(row, family_id, row_type): projects, request.user, _add_row, max_loaded_date=request.GET.get('loadedBefore'), include_metadata=True, omit_airtable=not include_airtable, - get_additional_individual_fields=lambda individual, airtable_metadata: { + get_additional_individual_fields=lambda individual, airtable_metadata, has_dbgap_submission, maternal_ids, paternal_ids: { 'Collaborator': (airtable_metadata or {}).get('Collaborator'), 'individual_guid': individual.guid, 'disorders': individual.disorders, 'filter_flags': json.dumps(individual.filter_flags) if individual.filter_flags else '', + 'paternal_guid': paternal_ids[1], + 'maternal_guid': maternal_ids[1], + **anvil_export_airtable_fields(airtable_metadata, has_dbgap_submission), }, ) diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index 65ffa09049..52dee02b61 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -234,7 +234,7 @@ def parse_anvil_metadata( subject_row = _get_subject_row( individual, has_dbgap_submission, airtable_metadata, individual_ids_map, get_additional_individual_fields, - format_id, include_metadata, + format_id, ) if individual.id in matchmaker_individuals: subject_row['MME'] = matchmaker_individuals[individual.id] if mme_values else 'Yes' @@ -410,7 +410,7 @@ def _get_transcript_field(field, config, transcript): return value -def _get_subject_row(individual, has_dbgap_submission, airtable_metadata, individual_ids_map, get_additional_individual_fields, format_id, include_metadata): +def _get_subject_row(individual, has_dbgap_submission, airtable_metadata, individual_ids_map, get_additional_individual_fields, format_id): paternal_ids = individual_ids_map.get(individual.father_id, ('', '')) maternal_ids = individual_ids_map.get(individual.mother_id, ('', '')) subject_row = { @@ -426,28 +426,27 @@ def _get_subject_row(individual, has_dbgap_submission, airtable_metadata, indivi 'paternal_id': format_id(paternal_ids[0]), 'maternal_id': format_id(maternal_ids[0]), } - if include_metadata: - subject_row.update({ - 'paternal_guid': paternal_ids[1], - 'maternal_guid': maternal_ids[1], - }) if airtable_metadata is not None: - sequencing = airtable_metadata.get('SequencingProduct') or set() subject_row.update({ 'dbgap_study_id': airtable_metadata.get('dbgap_study_id', '') if has_dbgap_submission else '', 'dbgap_subject_id': airtable_metadata.get('dbgap_subject_id', '') if has_dbgap_submission else '', }) - if include_metadata: - subject_row.update({ - 'dbgap_submission': 'Yes' if has_dbgap_submission else 'No', - 'multiple_datasets': 'Yes' if len(sequencing) > 1 or ( - len(sequencing) == 1 and list(sequencing)[0] in MULTIPLE_DATASET_PRODUCTS) else 'No', - }) if get_additional_individual_fields: - subject_row.update(get_additional_individual_fields(individual, airtable_metadata)) + subject_row.update(get_additional_individual_fields(individual, airtable_metadata, has_dbgap_submission, maternal_ids, paternal_ids)) return subject_row +def anvil_export_airtable_fields(airtable_metadata, has_dbgap_submission): + if airtable_metadata is None: + return {} + sequencing = airtable_metadata.get('SequencingProduct') or set() + return { + 'dbgap_submission': 'Yes' if has_dbgap_submission else 'No', + 'multiple_datasets': 'Yes' if len(sequencing) > 1 or ( + len(sequencing) == 1 and list(sequencing)[0] in MULTIPLE_DATASET_PRODUCTS) else 'No', + } + + def _get_sample_row(sample, participant_id, has_dbgap_submission, airtable_metadata, include_metadata, get_additional_sample_fields=None): sample_row = { 'participant_id': participant_id, From c3c85a326630c809f96c938ea5dfa05d051d514b Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 31 May 2024 15:40:47 -0400 Subject: [PATCH 219/736] test fixture --- hail_search/test_search.py | 1 + 1 file changed, 1 insertion(+) diff --git a/hail_search/test_search.py b/hail_search/test_search.py index 905ebe3d4d..f8d641b50d 100644 --- a/hail_search/test_search.py +++ b/hail_search/test_search.py @@ -63,6 +63,7 @@ 'mainTranscriptId': None, 'selectedMainTranscriptId': None, '_sort': [1000010146], + 'CAID': 'CA520798130', } GRCH37_VARIANT = { From 7facc258821f2566371801010e7a3a1065b9b1b4 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 15:45:51 -0400 Subject: [PATCH 220/736] remove hardcoded column lists --- seqr/views/apis/report_api.py | 63 ++++++++---------------- seqr/views/utils/anvil_metadata_utils.py | 4 +- 2 files changed, 24 insertions(+), 43 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index a00903431d..2463e6c0f0 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -196,17 +196,6 @@ def _add_row(row, family_id, row_type): SMID_FIELD = 'SMID' PARTICIPANT_ID_FIELD = 'CollaboratorParticipantID' COLLABORATOR_SAMPLE_ID_FIELD = 'CollaboratorSampleID' -PARTICIPANT_TABLE_COLUMNS = { - 'participant_id', 'internal_project_id', 'gregor_center', 'consent_code', 'recontactable', 'prior_testing', - 'pmid_id', 'family_id', 'paternal_id', 'maternal_id', 'proband_relationship', - 'sex', 'reported_race', 'reported_ethnicity', 'ancestry_detail', 'solve_status', 'missing_variant_case', - 'age_at_last_observation', 'affected_status', 'phenotype_description', 'age_at_enrollment', -} -GREGOR_FAMILY_TABLE_COLUMNS = {'family_id', 'consanguinity'} -PHENOTYPE_TABLE_COLUMNS = { - 'phenotype_id', 'participant_id', 'term_id', 'presence', 'ontology', 'additional_details', 'onset_age_range', - 'additional_modifiers', -} ANALYTE_TABLE_COLUMNS = { 'analyte_id', 'participant_id', 'analyte_type', 'primary_biosample', 'tissue_affected_status', } @@ -227,7 +216,6 @@ def _add_row(row, family_id, row_type): EXPERIMENT_RNA_TABLE_COLUMNS.update(EXPERIMENT_COLUMNS) EXPERIMENT_RNA_TABLE_COLUMNS.update(EXPERIMENT_RNA_TABLE_AIRTABLE_FIELDS) EXPERIMENT_RNA_TABLE_COLUMNS.update([c for c in EXPERIMENT_TABLE_AIRTABLE_FIELDS if not c.startswith('target')]) -EXPERIMENT_LOOKUP_TABLE_COLUMNS = {'experiment_id', 'table_name', 'id_in_table', 'participant_id'} READ_TABLE = 'aligned_dna_short_read' READ_TABLE_AIRTABLE_FIELDS = [ 'aligned_dna_short_read_file', 'aligned_dna_short_read_index_file', 'md5sum', 'reference_assembly', @@ -252,24 +240,6 @@ def _add_row(row, family_id, row_type): 'called_variants_dna_short_read_id', 'aligned_dna_short_read_set_id', CALLED_VARIANT_FILE_COLUMN, 'md5sum', 'caller_software', 'variant_types', 'analysis_details', } -AIRTABLE_TABLE_COLUMNS = { - EXPERIMENT_TABLE: EXPERIMENT_TABLE_COLUMNS, - READ_TABLE: READ_TABLE_COLUMNS, - READ_SET_TABLE: READ_SET_TABLE_COLUMNS, - CALLED_TABLE: CALLED_TABLE_COLUMNS, - EXPERIMENT_RNA_TABLE: EXPERIMENT_RNA_TABLE_COLUMNS, - READ_RNA_TABLE: READ_RNA_TABLE_COLUMNS, -} -RNA_AIRTABLE_TABLES = {EXPERIMENT_RNA_TABLE, READ_RNA_TABLE} -DNA_AIRTABLE_TABLES = set(AIRTABLE_TABLE_COLUMNS.keys()) - RNA_AIRTABLE_TABLES - -GENETIC_FINDINGS_TABLE_COLUMNS = { - 'chrom', 'pos', 'ref', 'alt', 'variant_type', 'variant_reference_assembly', GENE_COLUMN, 'transcript', 'hgvsc', 'hgvsp', - 'hgvs', 'sv_type', 'chrom_end', 'pos_end', 'copy_number', *FINDING_METADATA_COLUMNS[:4], 'phenotype_contribution', 'partial_contribution_explained', - 'genetic_findings_id', 'participant_id', 'experiment_id', 'zygosity', 'allele_balance_or_heteroplasmy_percentage', - 'variant_inheritance', 'linked_variant', 'additional_family_members_with_variant', 'method_of_discovery', - 'gene_disease_validity', -} RNA_ONLY = EXPERIMENT_RNA_TABLE_AIRTABLE_FIELDS + READ_RNA_TABLE_AIRTABLE_FIELDS + [ 'reference_assembly_uri', 'tissue_affected_status', 'Primary_Biosample'] @@ -296,6 +266,17 @@ def _add_row(row, family_id, row_type): data_type_columns = set(DATA_TYPE_AIRTABLE_COLUMNS) - NO_DATA_TYPE_FIELDS - set(DATA_TYPE_OMIT[data_type]) AIRTABLE_QUERY_COLUMNS.update({f'{field}_{data_type}' for field in data_type_columns}) +AIRTABLE_TABLE_COLUMNS = { + EXPERIMENT_TABLE: EXPERIMENT_TABLE_COLUMNS, + READ_TABLE: READ_TABLE_COLUMNS, + READ_SET_TABLE: READ_SET_TABLE_COLUMNS, + CALLED_TABLE: CALLED_TABLE_COLUMNS, + EXPERIMENT_RNA_TABLE: EXPERIMENT_RNA_TABLE_COLUMNS, + READ_RNA_TABLE: READ_RNA_TABLE_COLUMNS, +} +RNA_AIRTABLE_TABLES = {EXPERIMENT_RNA_TABLE, READ_RNA_TABLE} +DNA_AIRTABLE_TABLES = set(AIRTABLE_TABLE_COLUMNS.keys()) - RNA_AIRTABLE_TABLES + WARN_MISSING_TABLE_COLUMNS = { PARTICIPANT_TABLE: ['recontactable', 'reported_race', 'affected_status', 'phenotype_description', 'age_at_enrollment'], FINDINGS_TABLE: ['known_condition_name'], @@ -447,13 +428,13 @@ def _add_row(row, family_id, row_type): variant['experiment_id'] = experiment_ids_by_participant.get(variant['participant_id']) file_data = [ - (PARTICIPANT_TABLE, PARTICIPANT_TABLE_COLUMNS, participant_rows), - ('family', GREGOR_FAMILY_TABLE_COLUMNS, list(family_map.values())), - (PHENOTYPE_TABLE, PHENOTYPE_TABLE_COLUMNS, phenotype_rows), - ('analyte', ANALYTE_TABLE_COLUMNS, analyte_rows), - *[(table, AIRTABLE_TABLE_COLUMNS[table], rows) for table, rows in airtable_rows.items()], - (EXPERIMENT_LOOKUP_TABLE, EXPERIMENT_LOOKUP_TABLE_COLUMNS, experiment_lookup_rows), - (FINDINGS_TABLE, GENETIC_FINDINGS_TABLE_COLUMNS, genetic_findings_rows), + (PARTICIPANT_TABLE, participant_rows), + ('family', list(family_map.values())), + (PHENOTYPE_TABLE, phenotype_rows), + ('analyte', analyte_rows), + *[(table, rows) for table, rows in airtable_rows.items()], + (EXPERIMENT_LOOKUP_TABLE, experiment_lookup_rows), + (FINDINGS_TABLE, genetic_findings_rows), ] files, warnings = _populate_gregor_files(file_data) @@ -662,7 +643,7 @@ def _populate_gregor_files(file_data): ) files = [] - for file_name, expected_columns, data in file_data: + for file_name, data in file_data: table_config = table_configs.get(file_name) if not table_config: errors.insert(0, f'No data model found for "{file_name}" table') @@ -670,15 +651,13 @@ def _populate_gregor_files(file_data): files.append((file_name, list(table_config.keys()), data)) - expected_columns = {k for d in data for k, v in d.items() if v} # TODO + expected_columns = {k for d in data for k, v in d.items() if v} extra_columns = expected_columns.difference(table_config.keys()) if extra_columns: col_summary = ', '.join(sorted(extra_columns)) warnings.insert( 0, f'The following columns are computed for the "{file_name}" table but are missing from the data model: {col_summary}', ) - errors.append(warnings[0]) # TODO - continue invalid_data_type_columns = { col: config['data_type'] for col, config in table_config.items() if config.get('data_type') and config['data_type'] not in DATA_TYPE_VALIDATORS @@ -864,7 +843,7 @@ def _add_row(row, family_id, row_type): individuals_ids -= set(known_ids.values()) individual = proband or next(iter(individuals_by_id.values()), None) if individual: - f.update({k: individual[k] for k in ['phenotype_description', 'pmid_id', 'solve_status']}) # TODO constant? + f.update({k: individual[k] for k in ['phenotype_description', 'pmid_id', 'solve_status']}) sorted_samples = sorted(individuals_by_id.values(), key=lambda x: x.get('date_data_generation', '')) earliest_sample = next((s for s in [proband or {}] + sorted_samples if s.get('date_data_generation')), {}) diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index 52dee02b61..5e37f7b742 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -100,6 +100,8 @@ Sample.SAMPLE_TYPE_WGS: 'SR-GS', } +FAMILY_INDIVIDUAL_FIELDS = ['family_id', 'internal_project_id', 'phenotype_description', 'pmid_id', 'solve_status'] + def _format_hgvs(hgvs, *args): return (hgvs or '').split(':')[-1] @@ -206,7 +208,7 @@ def parse_anvil_metadata( individual for individual in family_individuals if individual.affected == Individual.AFFECTED_STATUS_AFFECTED ] if include_metadata else [] - subject_family_row = {k: family_subject_row.pop(k) for k in ['family_id', 'internal_project_id', 'phenotype_description', 'pmid_id', 'solve_status']} # TODO constant + subject_family_row = {k: family_subject_row.pop(k) for k in FAMILY_INDIVIDUAL_FIELDS} family_row = { 'family_id': subject_family_row['family_id'], 'consanguinity': next(( From 576c0c2323be4d99d4227f5aa9056d18ae7a8766 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 16:02:02 -0400 Subject: [PATCH 221/736] correctly include rna airtable field --- seqr/views/apis/report_api.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 2463e6c0f0..83e4afb5cb 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -226,7 +226,8 @@ def _add_row(row, family_id, row_type): READ_RNA_TABLE = 'aligned_rna_short_read' READ_RNA_TABLE_AIRTABLE_ID_FIELDS = ['aligned_rna_short_read_file', 'aligned_rna_short_read_index_file'] READ_RNA_TABLE_AIRTABLE_FIELDS = [ - 'gene_annotation', 'alignment_software', 'alignment_log_file', 'percent_uniquely_aligned', 'percent_multimapped', 'percent_unaligned', + 'gene_annotation', 'alignment_software', 'alignment_log_file', 'percent_uniquely_aligned', 'percent_multimapped', + 'percent_unaligned', 'reference_assembly_uri', ] READ_RNA_TABLE_COLUMNS = {'aligned_rna_short_read_id', 'experiment_rna_short_read_id'} READ_RNA_TABLE_COLUMNS.update(READ_RNA_TABLE_AIRTABLE_ID_FIELDS) @@ -242,7 +243,7 @@ def _add_row(row, family_id, row_type): } RNA_ONLY = EXPERIMENT_RNA_TABLE_AIRTABLE_FIELDS + READ_RNA_TABLE_AIRTABLE_FIELDS + [ - 'reference_assembly_uri', 'tissue_affected_status', 'Primary_Biosample'] + 'tissue_affected_status', 'Primary_Biosample'] DATA_TYPE_OMIT = { 'wgs': ['targeted_regions_method'] + RNA_ONLY, 'wes': RNA_ONLY, 'rna': [ 'targeted_regions_method', 'target_insert_size', 'mean_coverage', 'aligned_dna_short_read_file', From 5718146697a650b30e3df22521858bfe7942e268 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 16:08:40 -0400 Subject: [PATCH 222/736] fix typo --- .../transfer_families_to_different_project_tests.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/seqr/management/tests/transfer_families_to_different_project_tests.py b/seqr/management/tests/transfer_families_to_different_project_tests.py index 2e02c30ae1..ef38ed69b0 100644 --- a/seqr/management/tests/transfer_families_to_different_project_tests.py +++ b/seqr/management/tests/transfer_families_to_different_project_tests.py @@ -8,12 +8,12 @@ class TransferFamiliesTest(TestCase): fixtures = ['users', '1kg_project'] - def _test_command(self, mock_loger, additional_family, logs): + def _test_command(self, mock_logger, additional_family, logs): call_command( 'transfer_families_to_different_project', '--from-project=R0001_1kg', '--to-project=R0003_test', additional_family, '2', ) - mock_loger.assert_has_calls([ + mock_logger.assert_has_calls([ *logs, mock.call('Updating "Excluded" tags'), mock.call('Updating families'), @@ -38,15 +38,15 @@ def _test_command(self, mock_loger, additional_family, logs): @mock.patch('seqr.utils.search.elasticsearch.es_utils.ELASTICSEARCH_SERVICE_HOSTNAME', 'testhost') @mock.patch('seqr.management.commands.transfer_families_to_different_project.logger.info') - def test_es_command(self, mock_loger): + def test_es_command(self, mock_logger): self._test_command( - mock_loger, additional_family='12', logs=[mock.call('Found 1 out of 2 families. No match for: 12.')] + mock_logger, additional_family='12', logs=[mock.call('Found 1 out of 2 families. No match for: 12.')] ) @mock.patch('seqr.utils.search.elasticsearch.es_utils.ELASTICSEARCH_SERVICE_HOSTNAME', '') @mock.patch('seqr.management.commands.transfer_families_to_different_project.logger.info') - def test_hail_backend_command(self, mock_loger): - searchable_family = self._test_command(mock_loger, additional_family='4', logs=[ + def test_hail_backend_command(self, mock_logger): + searchable_family = self._test_command(mock_logger, additional_family='4', logs=[ mock.call('Found 2 out of 2 families.'), mock.call('Disabled search for 7 samples in the following 1 families: 2'), ]) From 8c2526146f59c96df2a019cc7ab22f086181f148 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 16:54:51 -0400 Subject: [PATCH 223/736] fix invlaid tables test case --- seqr/views/apis/report_api.py | 6 ++---- seqr/views/apis/report_api_tests.py | 8 ++++---- seqr/views/utils/anvil_metadata_utils.py | 14 +++++++++----- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 83e4afb5cb..5fe75e60aa 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -397,7 +397,7 @@ def _add_row(row, family_id, row_type): format_id=_format_gregor_id, get_additional_individual_fields=_get_participant_row, post_process_variant=_post_process_gregor_variant, - variant_filter={'alt__isnull': False}, + include_svs=False, airtable_fields=[SMID_FIELD, PARTICIPANT_ID_FIELD, 'Recontactable'], include_mondo=True, proband_only_variants=True, @@ -414,9 +414,6 @@ def _add_row(row, family_id, row_type): phenotype_rows += _parse_participant_phenotype_rows(participant) airtable_participant_id = participant.pop(PARTICIPANT_ID_FIELD) - if not airtable_participant_id: - continue - airtable_metadata = airtable_metadata_by_participant.get(airtable_participant_id) or {} data_types = grouped_data_type_individuals[participant['participant_id']] _parse_participant_airtable_rows( @@ -479,6 +476,7 @@ def _parse_participant_airtable_rows(participant, airtable_metadata, data_types, has_analyte = False analyte_row = {k: participant.pop(k) for k in ANALYTE_TABLE_COLUMNS} participant['participant_id'] = analyte_row['participant_id'] + # airtable data for data_type in data_types: if data_type not in airtable_metadata: diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index b7cb51a5fd..19164ca14b 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -832,10 +832,10 @@ def test_gregor_export(self, mock_subprocess, mock_temp_dir, mock_open, mock_dat 'The following entries are missing required "proband_relationship" in the "participant" table: Broad_NA19678, Broad_NA20870, Broad_NA20872, Broad_NA20874, Broad_NA20875, Broad_NA20876, Broad_NA20881', 'The following entries have invalid values for "reported_race" in the "participant" table. Allowed values: Asian, White, Black. Invalid values: Broad_NA19675_1 (Middle Eastern or North African)', 'The following entries have invalid values for "age_at_enrollment" in the "participant" table. Allowed values have data type date. Invalid values: Broad_NA19675_1 (18)', - 'The following entries have invalid values for "reference_assembly" (from Airtable) in the "aligned_dna_short_read" table. Allowed values have data type integer. Invalid values: NA20888 (GRCh38), VCGS_FAM203_621_D2 (GRCh38)', - 'The following entries are missing required "mean_coverage" (from Airtable) in the "aligned_dna_short_read" table: VCGS_FAM203_621_D2', - 'The following entries have non-unique values for "alignment_software" (from Airtable) in the "aligned_dna_short_read" table: BWA-MEM-2.3 (NA20888, VCGS_FAM203_621_D2)', - 'The following entries have invalid values for "analysis_details" (from Airtable) in the "aligned_dna_short_read" table. Allowed values are a google bucket path starting with gs://. Invalid values: VCGS_FAM203_621_D2 (DOI:10.5281/zenodo.4469317)', + 'The following entries have invalid values for "reference_assembly" (from Airtable) in the "aligned_dna_short_read" table. Allowed values have data type integer. Invalid values: Broad_exome_NA20888_1 (GRCh38), Broad_exome_VCGS_FAM203_621_D2_1 (GRCh38)', + 'The following entries are missing required "mean_coverage" (from Airtable) in the "aligned_dna_short_read" table: Broad_exome_VCGS_FAM203_621_D2_1', + 'The following entries have non-unique values for "alignment_software" (from Airtable) in the "aligned_dna_short_read" table: BWA-MEM-2.3 (Broad_exome_NA20888_1, Broad_exome_VCGS_FAM203_621_D2_1)', + 'The following entries have invalid values for "analysis_details" (from Airtable) in the "aligned_dna_short_read" table. Allowed values are a google bucket path starting with gs://. Invalid values: Broad_exome_VCGS_FAM203_621_D2_1 (DOI:10.5281/zenodo.4469317)', 'The following entries have invalid values for "date_data_generation" (from Airtable) in the "experiment_rna_short_read" table. Allowed values have data type float. Invalid values: NA19679 (2023-02-11)', 'The following entries are missing required "experiment_id" (from Airtable) in the "genetic_findings" table: Broad_NA19675_1_21_3343353', 'The following entries have non-unique values for "experiment_id" (from Airtable) in the "genetic_findings" table: Broad_exome_VCGS_FAM203_621_D2 (Broad_HG00731_19_1912632, Broad_HG00731_19_1912633, Broad_HG00731_19_1912634, Broad_HG00731_1_248367227)', diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index 5e37f7b742..c859f35514 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -160,7 +160,7 @@ def parse_anvil_metadata( get_additional_sample_fields: Callable[[Sample, dict], dict] = None, get_additional_individual_fields: Callable[[Individual, dict], dict] = None, individual_samples: dict[Individual, Sample] = None, individual_data_types: dict[str, Iterable[str]] = None, - airtable_fields: Iterable[str] = None, mme_values: dict = None, variant_filter: dict = None, + airtable_fields: Iterable[str] = None, mme_values: dict = None, include_svs: bool = True, variant_json_fields: Iterable[str] = None, post_process_variant: Callable[[dict, list[dict]], dict] = None, include_no_individual_families: bool = False, omit_airtable: bool = False, include_metadata: bool = False, include_discovery_sample_id: bool = False, include_mondo: bool = False, include_parent_mnvs: bool = False, @@ -184,7 +184,7 @@ def parse_anvil_metadata( sample_ids.add(sample.sample_id) saved_variants_by_family = _get_parsed_saved_discovery_variants_by_family( - list(family_data_by_id.keys()), include_metadata, variant_filter=variant_filter, variant_json_fields=variant_json_fields, + list(family_data_by_id.keys()), include_metadata, include_svs=include_svs, variant_json_fields=variant_json_fields, ) condition_map = _get_condition_map(family_data_by_id.values()) @@ -327,13 +327,13 @@ def _post_process_variant_metadata(v, gene_variants, include_parent_mnvs=False): def _get_parsed_saved_discovery_variants_by_family( - families: Iterable[Family], include_metadata: bool, variant_filter: dict, variant_json_fields: list[str], + families: Iterable[Family], include_metadata: bool, include_svs: dict, variant_json_fields: list[str], ): tag_types = VariantTagType.objects.filter(project__isnull=True, category=DISCOVERY_CATEGORY) project_saved_variants = SavedVariant.objects.filter( varianttag__variant_tag_type__in=tag_types, family__id__in=families, - **(variant_filter or {}), + **({} if include_svs else {'alt__isnull': False}), ).order_by('created_date').distinct().annotate( tags=ArrayAgg('varianttag__variant_tag_type__name', distinct=True), partial_hpo_terms=ArrayAgg('variantfunctionaldata__metadata', distinct=True, filter=Q(variantfunctionaldata__functional_data_tag='Partial Phenotype Contribution')), @@ -355,6 +355,10 @@ def _get_parsed_saved_discovery_variants_by_family( phenotype_contribution = 'Uncertain' partial_hpo_terms = '' + variant_fields = ['genotypes'] + if include_svs: + variant_fields += ['svType', 'svName', 'end'] + parsed_variant = { 'chrom': chrom, 'pos': pos, @@ -365,7 +369,7 @@ def _get_parsed_saved_discovery_variants_by_family( 'phenotype_contribution': phenotype_contribution, 'partial_contribution_explained': partial_hpo_terms.replace(', ', '|'), **{k: _get_transcript_field(k, config, main_transcript) for k, config in TRANSCRIPT_FIELDS.items()}, - **{k: variant_json.get(k) for k in ['genotypes', 'svType', 'svName', 'end'] + (variant_json_fields or [])}, + **{k: variant_json.get(k) for k in variant_fields + (variant_json_fields or [])}, **{k: getattr(variant, k) for k in ['family_id', 'ref', 'alt']}, } if include_metadata: From 0f8f206c83793ce48f0819b9ec5c7be869d72706 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 17:06:33 -0400 Subject: [PATCH 224/736] fix analyte handling --- seqr/views/apis/report_api.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 5fe75e60aa..6d18526b7e 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -412,12 +412,16 @@ def _add_row(row, family_id, row_type): experiment_ids_by_participant = {} for participant in participant_rows: phenotype_rows += _parse_participant_phenotype_rows(participant) + analyte = {k: participant.pop(k) for k in ANALYTE_TABLE_COLUMNS} + participant['participant_id'] = analyte['participant_id'] - airtable_participant_id = participant.pop(PARTICIPANT_ID_FIELD) - airtable_metadata = airtable_metadata_by_participant.get(airtable_participant_id) or {} + if not participant[PARTICIPANT_ID_FIELD]: + continue + + airtable_metadata = airtable_metadata_by_participant.get(participant.pop(PARTICIPANT_ID_FIELD)) or {} data_types = grouped_data_type_individuals[participant['participant_id']] _parse_participant_airtable_rows( - participant, airtable_metadata, data_types, experiment_ids_by_participant, + analyte, airtable_metadata, data_types, experiment_ids_by_participant, analyte_rows, airtable_rows, experiment_lookup_rows, ) @@ -471,32 +475,29 @@ def _parse_participant_phenotype_rows(participant): ] -def _parse_participant_airtable_rows(participant, airtable_metadata, data_types, experiment_ids_by_participant, +def _parse_participant_airtable_rows(analyte, airtable_metadata, data_types, experiment_ids_by_participant, analyte_rows, airtable_rows, experiment_lookup_rows): has_analyte = False - analyte_row = {k: participant.pop(k) for k in ANALYTE_TABLE_COLUMNS} - participant['participant_id'] = analyte_row['participant_id'] - # airtable data for data_type in data_types: if data_type not in airtable_metadata: continue is_rna, row = _get_airtable_row(data_type, airtable_metadata) has_analyte = True - analyte_rows.append({**analyte_row, **{k: row[k] for k in ANALYTE_TABLE_COLUMNS if k in row}}) + analyte_rows.append({**analyte, **{k: row[k] for k in ANALYTE_TABLE_COLUMNS if k in row}}) if not is_rna: - experiment_ids_by_participant[participant['participant_id']] = row['experiment_dna_short_read_id'] + experiment_ids_by_participant[analyte['participant_id']] = row['experiment_dna_short_read_id'] for table in (RNA_AIRTABLE_TABLES if is_rna else DNA_AIRTABLE_TABLES): if table == CALLED_TABLE and not row.get(CALLED_VARIANT_FILE_COLUMN): continue airtable_rows[table].append({k: row[k] for k in AIRTABLE_TABLE_COLUMNS[table] if k in row}) experiment_lookup_rows.append( - {'participant_id': participant['participant_id'], **_get_experiment_lookup_row(is_rna, row)} + {'participant_id': analyte['participant_id'], **_get_experiment_lookup_row(is_rna, row)} ) - if analyte_row['analyte_id'] and not has_analyte: - analyte_rows.append(analyte_row) + if analyte['analyte_id'] and not has_analyte: + analyte_rows.append(analyte) def _get_gregor_airtable_data(participants, user): @@ -797,8 +798,8 @@ def _validate_column_data(column, file_name, data, column_validator, warnings, e def _get_row_id(row): id_col = next(col for col in [ - 'genetic_findings_id', 'participant_id', 'experiment_sample_id', 'analyte_id', - 'aligned_dna_short_read_id', 'aligned_rna_short_read_id', 'family_id', + 'genetic_findings_id', 'participant_id', 'experiment_sample_id', 'analyte_id', 'family_id', + 'aligned_dna_short_read_id', 'aligned_rna_short_read_id', 'aligned_dna_short_read_set_id', 'aligned_rna_short_read_set_id', ] if col in row) return row[id_col] From c56a52030f2d1c263e50aa305f3d6af807eba692 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 17:21:49 -0400 Subject: [PATCH 225/736] remove unused import --- seqr/views/apis/report_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 6d18526b7e..acb45076fb 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -14,7 +14,7 @@ from seqr.views.utils.airtable_utils import AirtableSession from seqr.views.utils.anvil_metadata_utils import parse_anvil_metadata, anvil_export_airtable_fields, \ FAMILY_ROW_TYPE, SUBJECT_ROW_TYPE, SAMPLE_ROW_TYPE, DISCOVERY_ROW_TYPE, PARTICIPANT_TABLE, PHENOTYPE_TABLE, \ - EXPERIMENT_TABLE, EXPERIMENT_LOOKUP_TABLE, FINDINGS_TABLE, FINDING_METADATA_COLUMNS, GENE_COLUMN + EXPERIMENT_TABLE, EXPERIMENT_LOOKUP_TABLE, FINDINGS_TABLE, GENE_COLUMN from seqr.views.utils.export_utils import export_multiple_files, write_multiple_files_to_gs from seqr.views.utils.json_utils import create_json_response from seqr.views.utils.permissions_utils import analyst_required, get_project_and_check_permissions, \ From 3207fce3572dce069ce9c2cfb52f61ce4f067b58 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 3 Jun 2024 11:25:51 -0400 Subject: [PATCH 226/736] fix migration --- CHANGELOG.md | 1 + ...e_index_file_path.py => 0067_igvsample_index_file_path.py} | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) rename seqr/migrations/{0064_igvsample_index_file_path.py => 0067_igvsample_index_file_path.py} (74%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 64626f8078..56a31c866d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # _seqr_ Changes ## dev +* Adds index_file_path to IGV Sample model (REQUIRES DB MIGRATION) ## 5/24/24 * Adds external_data to Family model (REQUIRES DB MIGRATION) diff --git a/seqr/migrations/0064_igvsample_index_file_path.py b/seqr/migrations/0067_igvsample_index_file_path.py similarity index 74% rename from seqr/migrations/0064_igvsample_index_file_path.py rename to seqr/migrations/0067_igvsample_index_file_path.py index 437cabb23f..56fd82555f 100644 --- a/seqr/migrations/0064_igvsample_index_file_path.py +++ b/seqr/migrations/0067_igvsample_index_file_path.py @@ -1,4 +1,4 @@ -# Generated by Django 3.2.23 on 2024-05-02 20:48 +# Generated by Django 3.2.23 on 2024-06-03 15:25 from django.db import migrations, models @@ -6,7 +6,7 @@ class Migration(migrations.Migration): dependencies = [ - ('seqr', '0063_dynamicanalysisgroup'), + ('seqr', '0066_family_post_discovery_mondo_id'), ] operations = [ From ac9ac079c5de55d2017ad32a20efa4048be48773 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 3 Jun 2024 11:32:53 -0400 Subject: [PATCH 227/736] allow index URI for non-DRS alignment files --- seqr/views/apis/igv_api.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/seqr/views/apis/igv_api.py b/seqr/views/apis/igv_api.py index 822ebcc124..dff66d2e4d 100644 --- a/seqr/views/apis/igv_api.py +++ b/seqr/views/apis/igv_api.py @@ -32,13 +32,14 @@ def _process_alignment_records(rows, num_id_cols=1, **kwargs): parsed_records = defaultdict(list) for row in rows: row_id = row[0] if num_id_cols == 1 else tuple(row[:num_id_cols]) + file_path = row[num_id_cols] sample_id = None index_file_path = None if len(row) > num_cols: - if _is_drs_uri_path(row[num_cols]): - index_file_path = row[num_cols] - else: + if file_path.endswith(GCNV_FILE_EXTENSIONS): sample_id = row[num_cols] + else: + index_file_path = row[num_cols] parsed_records[row_id].append({'filePath': row[num_id_cols], 'sampleId': sample_id, 'indexFilePath': index_file_path}) return parsed_records @@ -141,6 +142,8 @@ def _get_valid_matched_individuals(individual_dataset_mapping): ('bed.gz', IgvSample.SAMPLE_TYPE_GCNV), ] +GCNV_FILE_EXTENSIONS = tuple(ext for ext, sample_type in SAMPLE_TYPE_MAP if sample_type == IgvSample.SAMPLE_TYPE_GCNV) + @pm_or_data_manager_required def update_individual_igv_sample(request, individual_guid): From bcaba7c1eea1a647249c4a9258776d97de55c91c Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 3 Jun 2024 15:12:26 -0400 Subject: [PATCH 228/736] debug code --- hail_search/queries/base.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index efacff45bd..92b6c2367d 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -297,11 +297,14 @@ def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, n_ project_hts = [] sample_data = {} for project_guid, project_sample_data in project_samples.items(): - project_ht = self._read_table( - f'projects/{project_guid}.ht', - use_ssd_dir=True, - skip_missing_field='family_entries' if skip_all_missing else None, - ) + try: + project_ht = self._read_table( + f'projects/{project_guid}.ht', + use_ssd_dir=True, + skip_missing_field='family_entries' if skip_all_missing else None, + ) + except Exception as e: + project_ht = None if project_ht is None: continue project_hts.append(project_ht.select_globals('sample_type', 'family_guids', 'family_samples')) @@ -1078,7 +1081,7 @@ def gene_counts(self): def lookup_variants(self, variant_ids, include_project_data=False, **kwargs): self._parse_intervals(intervals=None, variant_ids=variant_ids, variant_keys=variant_ids) - ht = self._read_table('annotations.ht', drop_globals=['paths', 'versions']) + ht = self._read_table('annotations_vep_110.ht', drop_globals=['paths', 'versions']) ht = ht.filter(hl.is_defined(ht[XPOS])) annotation_fields = self.annotation_fields(include_genotype_overrides=False) From 2c7d85ad6ca9eb574a5e29173b0c2effe7f635ca Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 3 Jun 2024 15:34:31 -0400 Subject: [PATCH 229/736] debug code --- hail_search/queries/base.py | 6 ++++-- hail_search/queries/ont_snv_indel.py | 1 + hail_search/queries/snv_indel.py | 1 + hail_search/queries/snv_indel_37.py | 2 +- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 92b6c2367d..37d6ef87af 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -35,6 +35,8 @@ def _to_camel_case(snake_case_str): class BaseHailTableQuery(object): + ANNS_HT = 'annotations.ht' + DATA_TYPE = None KEY_FIELD = None LOADED_GLOBALS = None @@ -90,7 +92,7 @@ class BaseHailTableQuery(object): @classmethod def load_globals(cls): - ht_path = cls._get_table_path('annotations.ht') + ht_path = cls._get_table_path(cls.ANNS_HT) ht_globals = hl.eval(hl.read_table(ht_path).globals.select(*cls.GLOBALS)) cls.LOADED_GLOBALS = {k: ht_globals[k] for k in cls.GLOBALS} @@ -1081,7 +1083,7 @@ def gene_counts(self): def lookup_variants(self, variant_ids, include_project_data=False, **kwargs): self._parse_intervals(intervals=None, variant_ids=variant_ids, variant_keys=variant_ids) - ht = self._read_table('annotations_vep_110.ht', drop_globals=['paths', 'versions']) + ht = self._read_table(self.ANNS_HT, drop_globals=['paths', 'versions']) ht = ht.filter(hl.is_defined(ht[XPOS])) annotation_fields = self.annotation_fields(include_genotype_overrides=False) diff --git a/hail_search/queries/ont_snv_indel.py b/hail_search/queries/ont_snv_indel.py index dc99ad8e18..fac3d12d4d 100644 --- a/hail_search/queries/ont_snv_indel.py +++ b/hail_search/queries/ont_snv_indel.py @@ -7,6 +7,7 @@ class OntSnvIndelHailTableQuery(SnvIndelHailTableQuery): DATA_TYPE = 'ONT_SNV_INDEL' + ANNS_HT = 'annotations.ht' CORE_FIELDS = BaseHailTableQuery.CORE_FIELDS diff --git a/hail_search/queries/snv_indel.py b/hail_search/queries/snv_indel.py index a95890e038..5b42570aeb 100644 --- a/hail_search/queries/snv_indel.py +++ b/hail_search/queries/snv_indel.py @@ -11,6 +11,7 @@ class SnvIndelHailTableQuery(MitoHailTableQuery): DATA_TYPE = 'SNV_INDEL' + ANNS_HT = 'annotations_vep_110.ht' GENOTYPE_FIELDS = {f.lower(): f for f in ['DP', 'GQ', 'AB']} QUALITY_FILTER_FORMAT = { diff --git a/hail_search/queries/snv_indel_37.py b/hail_search/queries/snv_indel_37.py index d43b92cbe6..3c0a9f2aa5 100644 --- a/hail_search/queries/snv_indel_37.py +++ b/hail_search/queries/snv_indel_37.py @@ -5,7 +5,7 @@ class SnvIndelHailTableQuery37(SnvIndelHailTableQuery): - + ANNS_HT = 'annotations.ht' GENOME_VERSION = GENOME_VERSION_GRCh37 PREDICTION_FIELDS_CONFIG = SnvIndelHailTableQuery.PREDICTION_FIELDS_CONFIG_ALL_BUILDS LIFTOVER_ANNOTATION_FIELDS = {} From cf479bc8686ac7aa41f0bc7a721e8dfe7417428c Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 3 Jun 2024 16:02:27 -0400 Subject: [PATCH 230/736] nested struct support --- hail_search/queries/base.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 37d6ef87af..49ea85afba 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -184,22 +184,28 @@ def _format_enum(cls, r, field, enum, empty_array=False, format_array_values=Non return cls._enum_field(field, value, enum, **kwargs) - @staticmethod - def _enum_field(field_name, value, enum, ht_globals=None, annotate_value=None, format_value=None, drop_fields=None, enum_keys=None, include_version=False, **kwargs): + @classmethod + def _enum_field(cls, field_name, value, enum, ht_globals=None, annotate_value=None, format_value=None, drop_fields=None, enum_keys=None, include_version=False, **kwargs): annotations = {} drop = [] + (drop_fields or []) value_keys = value.keys() for field in (enum_keys or enum.keys()): field_enum = enum[field] + if field == 'utrannotator': + field = 'utrrannotator' + is_nested_struct = field in value_keys is_array = f'{field}_ids' in value_keys - value_field = f"{field}_id{'s' if is_array else ''}" - drop.append(value_field) - enum_array = hl.array(field_enum) - if is_array: - annotations[f'{field}s'] = value[value_field].map(lambda v: enum_array[v]) + if is_nested_struct: + annotations[field] = cls._enum_field(field, value[field], field_enum, format_value=format_value) else: - annotations[field] = enum_array[value[value_field]] + value_field = f"{field}_id{'s' if is_array else ''}" + drop.append(value_field) + enum_array = hl.array(field_enum) + if is_array: + annotations[f'{field}s'] = value[value_field].map(lambda v: enum_array[v]) + else: + annotations[field] = enum_array[value[value_field]] if include_version: annotations['version'] = ht_globals['versions'][field_name] From e1b154c885d2f5be355c30b9b2530d212abada9f Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 3 Jun 2024 16:08:54 -0400 Subject: [PATCH 231/736] support new loftee format --- .../components/panel/variants/Annotations.jsx | 35 ++++++++++--------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/ui/shared/components/panel/variants/Annotations.jsx b/ui/shared/components/panel/variants/Annotations.jsx index 3af864824a..14d8d908ed 100644 --- a/ui/shared/components/panel/variants/Annotations.jsx +++ b/ui/shared/components/panel/variants/Annotations.jsx @@ -435,29 +435,21 @@ const svSizeDisplay = (size) => { return `${(size / 1000000).toFixed(2) / 1}Mb` } -const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcriptsById }) => { - const { - rsid, svType, numExon, pos, end, svTypeDetail, svSourceDetail, cpxIntervals, algorithms, bothsidesSupport, - endChrom, - } = variant - const mainTranscript = getVariantMainTranscript(variant) - - const isLofNagnag = mainTranscript.isLofNagnag || mainTranscript.lofFlags === 'NAGNAG_SITE' - const lofFilters = mainTranscript.lofFilters || ( - mainTranscript.lof === 'LC' && mainTranscript.lofFilter && mainTranscript.lofFilter.split(/&|,/g) - ) - const lofDetails = (lofFilters || isLofNagnag) ? [ - ...(lofFilters ? [...new Set(lofFilters)] : []).map((lofFilterKey) => { - const lofFilter = LOF_FILTER_MAP[lofFilterKey] || { message: lofFilterKey } +const getLofDetails = ({ isLofNagnag, lofFilters, lofFilter, lofFlags, lof }) => { + const isNagnag = isLofNagnag || lofFlags === 'NAGNAG_SITE' + const filters = lofFilters || (lof === 'LC' && lofFilter && lofFilter.split(/&|,/g)) + return (filters || isNagnag) ? [ + ...(filters ? [...new Set(filters)] : []).map((lofFilterKey) => { + const filter = LOF_FILTER_MAP[lofFilterKey] || { message: lofFilterKey } return (
- {`LOFTEE: ${lofFilter.title}`} + {`LOFTEE: ${filter.title}`}
- {lofFilter.message} + {filter.message}
) }), - isLofNagnag ? ( + isNagnag ? (
LOFTEE: NAGNAG site
@@ -465,6 +457,15 @@ const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcripts
) : null, ] : null +} + +const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcriptsById }) => { + const { + rsid, svType, numExon, pos, end, svTypeDetail, svSourceDetail, cpxIntervals, algorithms, bothsidesSupport, + endChrom, + } = variant + const mainTranscript = getVariantMainTranscript(variant) + const lofDetails = getLofDetails(mainTranscript.loftee || mainTranscript) const transcriptPopupProps = mainTranscript.transcriptId && { content: , From ed52cddc684f457e2fc482d56e0eb98fe4f4fc74 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 3 Jun 2024 16:33:02 -0400 Subject: [PATCH 232/736] first pass intron/exon --- ui/shared/components/panel/variants/Transcripts.jsx | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ui/shared/components/panel/variants/Transcripts.jsx b/ui/shared/components/panel/variants/Transcripts.jsx index fc1a0523f7..54fb2bb1f4 100644 --- a/ui/shared/components/panel/variants/Transcripts.jsx +++ b/ui/shared/components/panel/variants/Transcripts.jsx @@ -114,8 +114,9 @@ const Transcripts = React.memo(({ variant, genesById, transcriptsById, updateMai Biotype {transcript.biotype}
- cDNA Position - {transcript.cdnaPosition} + Intron/Exon + {transcript.intron && `Intron ${transcript.intron.index} of ${transcript.intron.total}`} + {transcript.exon && `${transcript.intron ? ', ' : ''}Exon ${transcript.exon.index} of ${transcript.exon.total}`}
From b81cbdbe42d04a5ca9e38b5ac91775f4b0af2b42 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 3 Jun 2024 16:56:05 -0400 Subject: [PATCH 233/736] show AlphaMissense --- ui/shared/components/panel/variants/Predictions.jsx | 10 +++++++++- ui/shared/utils/constants.js | 7 +++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/ui/shared/components/panel/variants/Predictions.jsx b/ui/shared/components/panel/variants/Predictions.jsx index d6a305a145..b55b9e91d3 100644 --- a/ui/shared/components/panel/variants/Predictions.jsx +++ b/ui/shared/components/panel/variants/Predictions.jsx @@ -5,7 +5,7 @@ import { connect } from 'react-redux' import { Icon, Transition, Popup } from 'semantic-ui-react' import { getGenesById } from 'redux/selectors' -import { ORDERED_PREDICTOR_FIELDS, coloredIcon, predictorColorRanges, predictionFieldValue, getVariantMainGeneId } from 'shared/utils/constants' +import { ORDERED_PREDICTOR_FIELDS, coloredIcon, predictorColorRanges, predictionFieldValue, getVariantMainGeneId, getVariantMainTranscript } from 'shared/utils/constants' import { snakecaseToTitlecase } from 'shared/utils/stringUtils' import { HorizontalSpacer } from '../../Spacers' import { ButtonLink } from '../../StyledComponents' @@ -111,6 +111,14 @@ class Predictions extends React.PureComponent { gene.primateAi.percentile75.toPrecision(3), undefined], } } + const mainTranscript = getVariantMainTranscript(variant) + if (mainTranscript?.alphamissense.pathogenicity) { + genePredictors.alphamissense = { + field: 'alphamissense', + fieldValue: mainTranscript.alphamissense.pathogenicity, + thresholds: [0.34, 0.34, 0.564, 0.564], + } + } const predictorFields = getPredictorFields(variant, predictions, genePredictors) diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js index 4d107cc89a..988938fef7 100644 --- a/ui/shared/utils/constants.js +++ b/ui/shared/utils/constants.js @@ -1446,6 +1446,7 @@ export const ORDERED_PREDICTOR_FIELDS = [ thresholds: [undefined, undefined, 2.18, 4, undefined], requiresCitation: true, }, + { field: 'alphamissense', fieldTitle: 'AlphaMissense', displayOnly: true }, { field: 'haplogroup_defining', indicatorMap: { Y: { color: 'green', value: '' } } }, { field: 'mitotip', indicatorMap: MITOTIP_MAP, fieldTitle: 'MitoTIP' }, { field: 'hmtvar', thresholds: [undefined, undefined, 0.35, 0.35, undefined], fieldTitle: 'HmtVar' }, @@ -1453,9 +1454,9 @@ export const ORDERED_PREDICTOR_FIELDS = [ export const coloredIcon = color => React.createElement(color.startsWith('#') ? ColoredIcon : Icon, { name: 'circle', size: 'small', color }) export const predictionFieldValue = ( - predictions, { field, thresholds, reverseThresholds, indicatorMap, infoField, infoTitle }, + predictions, { field, fieldValue, thresholds, reverseThresholds, indicatorMap, infoField, infoTitle }, ) => { - let value = predictions[field] + let value = fieldValue || predictions[field] if (value === null || value === undefined) { return { value } } @@ -1487,6 +1488,8 @@ export const predictorColorRanges = (thresholds, requiresCitation, reverseThresh range = ` >= ${thresholds[i - 1]}` } else if (prevUndefined) { range = ` < ${thresholds[i]}` + } else if (thresholds[i - 1] === thresholds[i]) { + return null } else { range = ` ${thresholds[i - 1]} - ${thresholds[i]}` } From f22066919d4316032a952fa427a27398646d4112 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 3 Jun 2024 16:58:00 -0400 Subject: [PATCH 234/736] fix intron exon dispaly --- ui/shared/components/panel/variants/Transcripts.jsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ui/shared/components/panel/variants/Transcripts.jsx b/ui/shared/components/panel/variants/Transcripts.jsx index 54fb2bb1f4..56cbb420c1 100644 --- a/ui/shared/components/panel/variants/Transcripts.jsx +++ b/ui/shared/components/panel/variants/Transcripts.jsx @@ -115,8 +115,8 @@ const Transcripts = React.memo(({ variant, genesById, transcriptsById, updateMai {transcript.biotype}
Intron/Exon - {transcript.intron && `Intron ${transcript.intron.index} of ${transcript.intron.total}`} - {transcript.exon && `${transcript.intron ? ', ' : ''}Exon ${transcript.exon.index} of ${transcript.exon.total}`} + {transcript.intron && `Intron ${transcript.intron.index}/${transcript.intron.total}`} + {transcript.exon && `${transcript.intron ? ', ' : ''}Exon ${transcript.exon.index}/${transcript.exon.total}`}
From f70b59c8090c3e267c6c298297b05495d32814b3 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 4 Jun 2024 11:04:39 -0400 Subject: [PATCH 235/736] remove utrannotator hardcode fix --- hail_search/queries/base.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 49ea85afba..78523f00de 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -191,8 +191,6 @@ def _enum_field(cls, field_name, value, enum, ht_globals=None, annotate_value=No value_keys = value.keys() for field in (enum_keys or enum.keys()): field_enum = enum[field] - if field == 'utrannotator': - field = 'utrrannotator' is_nested_struct = field in value_keys is_array = f'{field}_ids' in value_keys From 2c5b484be1db7ee01382a90190f8a97f1d89ddb1 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 4 Jun 2024 11:46:08 -0400 Subject: [PATCH 236/736] show utrannotator --- .../components/panel/variants/Annotations.jsx | 73 ++++++++++++++++++- 1 file changed, 72 insertions(+), 1 deletion(-) diff --git a/ui/shared/components/panel/variants/Annotations.jsx b/ui/shared/components/panel/variants/Annotations.jsx index 14d8d908ed..771e96dd10 100644 --- a/ui/shared/components/panel/variants/Annotations.jsx +++ b/ui/shared/components/panel/variants/Annotations.jsx @@ -3,7 +3,7 @@ import PropTypes from 'prop-types' import { connect } from 'react-redux' import { NavLink } from 'react-router-dom' import styled from 'styled-components' -import { Popup, Label, Icon } from 'semantic-ui-react' +import { Popup, Label, Icon, Table } from 'semantic-ui-react' import { getGenesById, @@ -36,6 +36,7 @@ import { import { GENOME_VERSION_37, GENOME_VERSION_38, getVariantMainTranscript, SVTYPE_LOOKUP, SVTYPE_DETAILS, SCREEN_LABELS, } from '../../../utils/constants' +import { camelcaseToTitlecase } from '../../../utils/stringUtils' const OverlappedIntervalLabels = React.memo(({ groupedIntervals, variant, getOverlapArgs, getLabels }) => { const chromIntervals = groupedIntervals[variant.chrom] @@ -459,6 +460,60 @@ const getLofDetails = ({ isLofNagnag, lofFilters, lofFilter, lofFlags, lof }) => ] : null } +// Adapted from https://github.com/ImperialCardioGenetics/UTRannotator/blob/master/README.md#the-detailed-annotation-for-each-consequence +const UTR_ANNOTATOR_DESCRIPTIONS = { + AltStop: 'Whether there is an alternative stop codon downstream within 5’ UTR', + AltStopDistanceToCDS: 'The distance between the alternative stop codon (if exists) and CDS', + CapDistanceToStart: 'The distance (number of nucleotides) to the start of 5’UTR', + DistanceToCDS: 'The distance (number of nucleotides) to CDS', + DistanceToStop: 'The distance (number of nucleotides) to the nearest stop codon (scanning through both the 5’UTR and its downstream CDS)', + Evidence: 'Whether the disrupted uORF has any translation evidence', + FrameWithCDS: 'The frame of the uORF with respect to CDS, described by inFrame or outOfFrame', + KozakContext: 'The Kozak context sequence', + KozakStrength: 'The Kozak strength, described by one of the following values: Weak, Moderate or Strong', + StartDistanceToCDS: 'The distance between the disrupting uORF and CDS', + alt_type: 'The type of uORF with the alternative allele, described by one of following: uORF, inframe_oORF or OutOfFrame_oORF', + alt_type_length: 'The length of uORF with the alt allele', + newSTOPDistanceToCDS: 'The distance between the gained uSTOP to the start of the CDS', + ref_StartDistanceToCDS: 'The distance between the uAUG of the disrupting uORF to CDS', + ref_type: 'The type of uORF with the reference allele, described by one of following: uORF, inframe_oORF or OutOfFrame_oORF', + ref_type_length: 'The length of uORF with the reference allele', + type: 'The type of of 5’ UTR ORF, described by one of the following: uORF(with a stop codon in 5’UTR), inframe_oORF (inframe and overlapping with CDS),OutOfFrame_oORF (out of frame and overlapping with CDS)', +} + +const UtrAnnotatorDetail = ({ fiveutrConsequence, fiveutrAnnotation, ...counts }) => ( + + + + + + + {Object.entries(counts).map(([field, value]) => ( + + + + + ))} + {Object.entries(fiveutrAnnotation).filter(e => e[1] !== null).map(([field, value]) => ( + + + {camelcaseToTitlecase(field)} + {UTR_ANNOTATOR_DESCRIPTIONS[field] && ( + } content={UTR_ANNOTATOR_DESCRIPTIONS[field]} flowing /> + )} + + + + ))} + +
+) + +UtrAnnotatorDetail.propTypes = { + fiveutrConsequence: PropTypes.string, + fiveutrAnnotation: PropTypes.object, +} + const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcriptsById }) => { const { rsid, svType, numExon, pos, end, svTypeDetail, svSourceDetail, cpxIntervals, algorithms, bothsidesSupport, @@ -571,6 +626,22 @@ const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcripts )} + {mainTranscript.utrannotator?.fiveutrConsequence && ( +
+ UTRAnnotator:   + + {mainTranscript.utrannotator.fiveutrConsequence.replace('5_prime_UTR_', '').replace('_variant', '').replace(/_/g, ' ')} + + } + > + + +
+ )} {variant.screenRegionType && (
From 8ecca190f8d00f9bab018d7272cf9361ebef5a3a Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 4 Jun 2024 12:31:04 -0400 Subject: [PATCH 237/736] shared array formatting --- hail_search/queries/base.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 78523f00de..29a18214db 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -76,7 +76,6 @@ class BaseHailTableQuery(object): 'transcripts': { 'response_key': 'transcripts', 'empty_array': True, - 'format_value': lambda value: value.rename({k: _to_camel_case(k) for k in value.keys()}), 'format_array_values': lambda values, *args: values.group_by(lambda t: t.geneId), }, } @@ -168,6 +167,10 @@ def _format_enum_response(self, k, enum): value = lambda r: self._format_enum(r, k, enum, ht_globals=self._globals, **enum_config) return enum_config.get('response_key', _to_camel_case(k)), value + @staticmethod + def _camelcase_value(value): + return value.rename({k: _to_camel_case(k) for k in value.keys()}) + @classmethod def _format_enum(cls, r, field, enum, empty_array=False, format_array_values=None, **kwargs): if hasattr(r, f'{field}_id'): @@ -177,7 +180,7 @@ def _format_enum(cls, r, field, enum, empty_array=False, format_array_values=Non if hasattr(value, 'map'): if empty_array: value = hl.or_else(value, hl.empty_array(value.dtype.element_type)) - value = value.map(lambda x: cls._enum_field(field, x, enum, **kwargs)) + value = value.map(lambda x: cls._enum_field(field, x, enum, **kwargs, format_value=cls._camelcase_value)) if format_array_values: value = format_array_values(value, r) return value From 6bc8099825e20f0968e9472c74c3ea8f6398b40f Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 4 Jun 2024 14:34:45 -0400 Subject: [PATCH 238/736] shared consequence details ui --- .../components/panel/variants/Predictions.jsx | 2 +- .../components/panel/variants/Transcripts.jsx | 192 +++++++++++------- 2 files changed, 114 insertions(+), 80 deletions(-) diff --git a/ui/shared/components/panel/variants/Predictions.jsx b/ui/shared/components/panel/variants/Predictions.jsx index b55b9e91d3..365f508580 100644 --- a/ui/shared/components/panel/variants/Predictions.jsx +++ b/ui/shared/components/panel/variants/Predictions.jsx @@ -112,7 +112,7 @@ class Predictions extends React.PureComponent { } } const mainTranscript = getVariantMainTranscript(variant) - if (mainTranscript?.alphamissense.pathogenicity) { + if (mainTranscript?.alphamissense?.pathogenicity) { genePredictors.alphamissense = { field: 'alphamissense', fieldValue: mainTranscript.alphamissense.pathogenicity, diff --git a/ui/shared/components/panel/variants/Transcripts.jsx b/ui/shared/components/panel/variants/Transcripts.jsx index 56cbb420c1..10275d5212 100644 --- a/ui/shared/components/panel/variants/Transcripts.jsx +++ b/ui/shared/components/panel/variants/Transcripts.jsx @@ -10,6 +10,7 @@ import { VerticalSpacer } from '../../Spacers' import DispatchRequestButton from '../../buttons/DispatchRequestButton' import ShowGeneModal from '../../buttons/ShowGeneModal' import { ProteinSequence, TranscriptLink } from './VariantUtils' +import { toCamelcase, camelcaseToTitlecase } from '../../../utils/stringUtils' const AnnotationSection = styled.div` display: inline-block; @@ -24,6 +25,54 @@ const AnnotationLabel = styled.small` const HeaderLabel = AnnotationLabel.withComponent('span') +const AnnotationDetail = ({ consequence, title, getContent }) => ( + + {title} + {getContent ? getContent(consequence) : consequence[toCamelcase(title)]} +
+
+) + +AnnotationDetail.propTypes = { + consequence: PropTypes.object.isRequired, + title: PropTypes.string.isRequired, + getContent: PropTypes.func, +} + +export const ConsequenceDetails = ({ consequences, variant, idField, idDetails, annotationSections, ...props }) => ( + + + {consequences.map(c => ( + + + + {idDetails && idDetails(c, variant, props)} + + + {c.majorConsequence || c.consequenceTerms.join('; ')} + + + {annotationSections.map(([field1, field2]) => ( + + + {field2 && } + + ))} + + + ))} + +
+) + +ConsequenceDetails.propTypes = { + consequences: PropTypes.arrayOf(PropTypes.object).isRequired, + idField: PropTypes.string.isRequired, + variant: PropTypes.object, + idDetails: PropTypes.func, + annotationSections: PropTypes.arrayOf(PropTypes.arrayOf(PropTypes.object)), +} + const TRANSCRIPT_LABELS = [ { content: 'Canonical', @@ -42,7 +91,62 @@ const TRANSCRIPT_LABELS = [ }, ] -const Transcripts = React.memo(({ variant, genesById, transcriptsById, updateMainTranscript, project }) => ( +const transcriptIdDetails = (transcript, variant, { transcriptsById, project, updateMainTranscript }) => ( +
+ {transcriptsById[transcript.transcriptId]?.refseqId && ( + + )} + {TRANSCRIPT_LABELS.map(({ shouldShow, ...labelProps }) => ( + shouldShow(transcript, transcriptsById) && ( +
+) + +const ANNOTATION_SECTIONS = [ + [{ title: 'Codons' }, { title: 'Amino Acids' }], + [ + { title: 'Biotype' }, + { + title: 'Intron/Exon', + getContent: c => ['intron', 'exon'].filter(f => c[f]).map(f => `${camelcaseToTitlecase(f)} ${c[f].index}/${c[f].total}`).join(', '), + }, + ], + [ + { title: 'HGVS.C', getContent: transcript => transcript.hgvsc && }, + { title: 'HGVS.P', getContent: transcript => transcript.hgvsp && }, + ], +] + +const Transcripts = React.memo(({ variant, genesById, ...props }) => ( variant.transcripts && Object.entries(variant.transcripts).sort((transcriptsA, transcriptsB) => ( Math.min(...transcriptsA[1].map(t => t.transcriptRank)) - Math.min(...transcriptsB[1].map(t => t.transcriptRank)) )).map(([geneId, geneTranscripts]) => ( @@ -54,84 +158,14 @@ const Transcripts = React.memo(({ variant, genesById, transcriptsById, updateMai subheader={`Gene Id: ${geneId}`} /> - - - {geneTranscripts.map(transcript => ( - - - - {transcriptsById[transcript.transcriptId]?.refseqId && ( - - )} -
- {TRANSCRIPT_LABELS.map(({ shouldShow, ...labelProps }) => ( - shouldShow(transcript, transcriptsById) && ( -
-
- - {transcript.majorConsequence} - - - - Codons - {transcript.codons} -
- Amino Acids - {transcript.aminoAcids} -
-
- - Biotype - {transcript.biotype} -
- Intron/Exon - {transcript.intron && `Intron ${transcript.intron.index}/${transcript.intron.total}`} - {transcript.exon && `${transcript.intron ? ', ' : ''}Exon ${transcript.exon.index}/${transcript.exon.total}`} -
-
- - HGVS.C - {transcript.hgvsc && } -
- HGVS.P - {transcript.hgvsp && } -
-
-
-
- ))} -
-
+
From 2c09ccb38bea46509fcc2a6b1056be7b5f557aed Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 4 Jun 2024 14:57:12 -0400 Subject: [PATCH 239/736] show regluatory features --- .../components/panel/variants/Annotations.jsx | 25 ++++++++++++++++++- .../components/panel/variants/Transcripts.jsx | 9 ++++--- .../panel/variants/VariantUtils.jsx | 6 ++--- 3 files changed, 33 insertions(+), 7 deletions(-) diff --git a/ui/shared/components/panel/variants/Annotations.jsx b/ui/shared/components/panel/variants/Annotations.jsx index 771e96dd10..126af1af33 100644 --- a/ui/shared/components/panel/variants/Annotations.jsx +++ b/ui/shared/components/panel/variants/Annotations.jsx @@ -22,7 +22,7 @@ import Modal from '../../modal/Modal' import { ButtonLink, HelpIcon } from '../../StyledComponents' import RnaSeqJunctionOutliersTable from '../../table/RnaSeqJunctionOutliersTable' import { getOtherGeneNames } from '../genes/GeneDetail' -import Transcripts from './Transcripts' +import Transcripts, { ConsequenceDetails } from './Transcripts' import VariantGenes, { GeneLabelContent, omimPhenotypesDetail } from './VariantGene' import { getLocus, @@ -185,6 +185,9 @@ VariantPosition.propTypes = { svType: PropTypes.string, } +const REGULATORY_FEATURE_SECTIONS = [[{ title: 'Biotype' }]] +const REGULATORY_FEATURE_LINK = { ensemblEntity: 'Regulation', ensemblKey: 'rf' } + const LOF_FILTER_MAP = { END_TRUNC: { title: 'End Truncation', message: 'This variant falls in the last 5% of the transcript' }, INCOMPLETE_CDS: { title: 'Incomplete CDS', message: 'The start or stop codons are not known for this transcript' }, @@ -626,6 +629,26 @@ const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcripts )} + {variant.sortedRegulatoryFeatureConsequences && ( +
+ Regulatory Feature:   + {variant.sortedRegulatoryFeatureConsequences[0].consequenceTerms[0].replace(/_/g, ' ')} + } + > + + +
+ )} {mainTranscript.utrannotator?.fiveutrConsequence && (
UTRAnnotator:   diff --git a/ui/shared/components/panel/variants/Transcripts.jsx b/ui/shared/components/panel/variants/Transcripts.jsx index 10275d5212..8b8a32c1af 100644 --- a/ui/shared/components/panel/variants/Transcripts.jsx +++ b/ui/shared/components/panel/variants/Transcripts.jsx @@ -39,13 +39,15 @@ AnnotationDetail.propTypes = { getContent: PropTypes.func, } -export const ConsequenceDetails = ({ consequences, variant, idField, idDetails, annotationSections, ...props }) => ( +export const ConsequenceDetails = ( + { consequences, variant, idField, idDetails, annotationSections, ensemblLink = {}, ...props }, +) => ( {consequences.map(c => ( - + {idDetails && idDetails(c, variant, props)} @@ -53,7 +55,7 @@ export const ConsequenceDetails = ({ consequences, variant, idField, idDetails, {annotationSections.map(([field1, field2]) => ( - + {field2 && } @@ -71,6 +73,7 @@ ConsequenceDetails.propTypes = { variant: PropTypes.object, idDetails: PropTypes.func, annotationSections: PropTypes.arrayOf(PropTypes.arrayOf(PropTypes.object)), + ensemblLink: PropTypes.object, } const TRANSCRIPT_LABELS = [ diff --git a/ui/shared/components/panel/variants/VariantUtils.jsx b/ui/shared/components/panel/variants/VariantUtils.jsx index 663ff8b58c..d50b0984c5 100644 --- a/ui/shared/components/panel/variants/VariantUtils.jsx +++ b/ui/shared/components/panel/variants/VariantUtils.jsx @@ -10,10 +10,10 @@ const SequenceContainer = styled.span` color: ${props => props.color || 'inherit'}; ` -export const TranscriptLink = styled.a.attrs(({ variant, transcript }) => ({ +export const TranscriptLink = styled.a.attrs(({ variant, transcript, idField = 'transcriptId', ensemblEntity = 'Transcript', ensemblKey = 't' }) => ({ target: '_blank', - href: `http://${variant.genomeVersion === GENOME_VERSION_37 ? 'grch37' : 'useast'}.ensembl.org/Homo_sapiens/Transcript/Summary?t=${transcript.transcriptId}`, - children: transcript.hgvsc?.startsWith(transcript.transcriptId) ? transcript.hgvsc.split(':')[0] : transcript.transcriptId, + href: `http://${variant.genomeVersion === GENOME_VERSION_37 ? 'grch37' : 'useast'}.ensembl.org/Homo_sapiens/${ensemblEntity}/Summary?${ensemblKey}=${transcript[idField]}`, + children: transcript.hgvsc?.startsWith(transcript.transcriptId) ? transcript.hgvsc.split(':')[0] : transcript[idField], }))` font-size: 1.3em; font-weight: normal; From ae6550c57d5edd7a0a1aec22e5bc2c1c9cbff4d6 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 4 Jun 2024 17:41:22 -0400 Subject: [PATCH 240/736] show motif features --- .../components/panel/variants/Annotations.jsx | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/ui/shared/components/panel/variants/Annotations.jsx b/ui/shared/components/panel/variants/Annotations.jsx index 126af1af33..4f5b65db72 100644 --- a/ui/shared/components/panel/variants/Annotations.jsx +++ b/ui/shared/components/panel/variants/Annotations.jsx @@ -185,8 +185,11 @@ VariantPosition.propTypes = { svType: PropTypes.string, } -const REGULATORY_FEATURE_SECTIONS = [[{ title: 'Biotype' }]] const REGULATORY_FEATURE_LINK = { ensemblEntity: 'Regulation', ensemblKey: 'rf' } +const CONSEQUENCE_FEATURES = [ + { name: 'Regulatory', annotationSections: [[{ title: 'Biotype' }]] }, + { name: 'Motif', annotationSections: [] }, +].map(f => ({ ...f, field: `sorted${f.name}FeatureConsequences`, idField: `${f.name.toLowerCase()}FeatureId` })) const LOF_FILTER_MAP = { END_TRUNC: { title: 'End Truncation', message: 'This variant falls in the last 5% of the transcript' }, @@ -629,26 +632,23 @@ const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcripts )} - {variant.sortedRegulatoryFeatureConsequences && ( + {CONSEQUENCE_FEATURES.filter(({ field }) => variant[field]).map(({ field, name, ...props }) => (
- Regulatory Feature:   + {`${name} Feature: `} {variant.sortedRegulatoryFeatureConsequences[0].consequenceTerms[0].replace(/_/g, ' ')} - } + modalName={`${variant.variantId}-${name}`} + title={`${name} Feature Consequences`} + trigger={{variant[field][0].consequenceTerms[0].replace(/_/g, ' ')}} >
- )} + ))} {mainTranscript.utrannotator?.fiveutrConsequence && (
UTRAnnotator:   From e319f9093d42b6f03358580567178e27bb637621 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 5 Jun 2024 10:55:40 -0400 Subject: [PATCH 241/736] Revert "debug code" This reverts commit bcaba7c1eea1a647249c4a9258776d97de55c91c. --- hail_search/queries/base.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 29a18214db..9d9738c415 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -35,8 +35,6 @@ def _to_camel_case(snake_case_str): class BaseHailTableQuery(object): - ANNS_HT = 'annotations.ht' - DATA_TYPE = None KEY_FIELD = None LOADED_GLOBALS = None @@ -306,14 +304,11 @@ def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, n_ project_hts = [] sample_data = {} for project_guid, project_sample_data in project_samples.items(): - try: - project_ht = self._read_table( - f'projects/{project_guid}.ht', - use_ssd_dir=True, - skip_missing_field='family_entries' if skip_all_missing else None, - ) - except Exception as e: - project_ht = None + project_ht = self._read_table( + f'projects/{project_guid}.ht', + use_ssd_dir=True, + skip_missing_field='family_entries' if skip_all_missing else None, + ) if project_ht is None: continue project_hts.append(project_ht.select_globals('sample_type', 'family_guids', 'family_samples')) @@ -1090,7 +1085,7 @@ def gene_counts(self): def lookup_variants(self, variant_ids, include_project_data=False, **kwargs): self._parse_intervals(intervals=None, variant_ids=variant_ids, variant_keys=variant_ids) - ht = self._read_table(self.ANNS_HT, drop_globals=['paths', 'versions']) + ht = self._read_table('annotations.ht', drop_globals=['paths', 'versions']) ht = ht.filter(hl.is_defined(ht[XPOS])) annotation_fields = self.annotation_fields(include_genotype_overrides=False) From 1d2fa049eb25f6462dc4d4af3120d9430541d6ca Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 5 Jun 2024 10:57:44 -0400 Subject: [PATCH 242/736] Revert "debug code" This reverts commit 2c7d85ad6ca9eb574a5e29173b0c2effe7f635ca. --- hail_search/queries/base.py | 2 +- hail_search/queries/ont_snv_indel.py | 1 - hail_search/queries/snv_indel.py | 1 - hail_search/queries/snv_indel_37.py | 2 +- 4 files changed, 2 insertions(+), 4 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 9d9738c415..341fc8a6a8 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -89,7 +89,7 @@ class BaseHailTableQuery(object): @classmethod def load_globals(cls): - ht_path = cls._get_table_path(cls.ANNS_HT) + ht_path = cls._get_table_path('annotations.ht') ht_globals = hl.eval(hl.read_table(ht_path).globals.select(*cls.GLOBALS)) cls.LOADED_GLOBALS = {k: ht_globals[k] for k in cls.GLOBALS} diff --git a/hail_search/queries/ont_snv_indel.py b/hail_search/queries/ont_snv_indel.py index fac3d12d4d..dc99ad8e18 100644 --- a/hail_search/queries/ont_snv_indel.py +++ b/hail_search/queries/ont_snv_indel.py @@ -7,7 +7,6 @@ class OntSnvIndelHailTableQuery(SnvIndelHailTableQuery): DATA_TYPE = 'ONT_SNV_INDEL' - ANNS_HT = 'annotations.ht' CORE_FIELDS = BaseHailTableQuery.CORE_FIELDS diff --git a/hail_search/queries/snv_indel.py b/hail_search/queries/snv_indel.py index 5b42570aeb..a95890e038 100644 --- a/hail_search/queries/snv_indel.py +++ b/hail_search/queries/snv_indel.py @@ -11,7 +11,6 @@ class SnvIndelHailTableQuery(MitoHailTableQuery): DATA_TYPE = 'SNV_INDEL' - ANNS_HT = 'annotations_vep_110.ht' GENOTYPE_FIELDS = {f.lower(): f for f in ['DP', 'GQ', 'AB']} QUALITY_FILTER_FORMAT = { diff --git a/hail_search/queries/snv_indel_37.py b/hail_search/queries/snv_indel_37.py index 3c0a9f2aa5..d43b92cbe6 100644 --- a/hail_search/queries/snv_indel_37.py +++ b/hail_search/queries/snv_indel_37.py @@ -5,7 +5,7 @@ class SnvIndelHailTableQuery37(SnvIndelHailTableQuery): - ANNS_HT = 'annotations.ht' + GENOME_VERSION = GENOME_VERSION_GRCh37 PREDICTION_FIELDS_CONFIG = SnvIndelHailTableQuery.PREDICTION_FIELDS_CONFIG_ALL_BUILDS LIFTOVER_ANNOTATION_FIELDS = {} From f6d26b2587157013790200e3e4db88c4a36a9a76 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 5 Jun 2024 15:29:44 -0400 Subject: [PATCH 243/736] adjust annotation order --- .../components/panel/variants/Annotations.jsx | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/ui/shared/components/panel/variants/Annotations.jsx b/ui/shared/components/panel/variants/Annotations.jsx index 4f5b65db72..ce8ff11866 100644 --- a/ui/shared/components/panel/variants/Annotations.jsx +++ b/ui/shared/components/panel/variants/Annotations.jsx @@ -632,23 +632,6 @@ const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcripts )} - {CONSEQUENCE_FEATURES.filter(({ field }) => variant[field]).map(({ field, name, ...props }) => ( -
- {`${name} Feature: `} - {variant[field][0].consequenceTerms[0].replace(/_/g, ' ')}} - > - - -
- ))} {mainTranscript.utrannotator?.fiveutrConsequence && (
UTRAnnotator:   @@ -673,6 +656,23 @@ const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcripts
)} + {CONSEQUENCE_FEATURES.filter(({ field }) => variant[field]).map(({ field, name, ...props }) => ( +
+ {`${name} Feature: `} + {variant[field][0].consequenceTerms[0].replace(/_/g, ' ')}} + > + + +
+ ))} {mainTranscript.hgvsc && (
HGVS.C From 8d20abcc16f396479b681b869bc8a7e651eabc3a Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 5 Jun 2024 15:35:55 -0400 Subject: [PATCH 244/736] show utrannotator in transcript detail --- ui/shared/components/panel/variants/Transcripts.jsx | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/ui/shared/components/panel/variants/Transcripts.jsx b/ui/shared/components/panel/variants/Transcripts.jsx index 8b8a32c1af..efdb7be9f6 100644 --- a/ui/shared/components/panel/variants/Transcripts.jsx +++ b/ui/shared/components/panel/variants/Transcripts.jsx @@ -40,7 +40,7 @@ AnnotationDetail.propTypes = { } export const ConsequenceDetails = ( - { consequences, variant, idField, idDetails, annotationSections, ensemblLink = {}, ...props }, + { consequences, variant, idField, idDetails, consequenceDetails, annotationSections, ensemblLink = {}, ...props }, ) => (
@@ -52,6 +52,7 @@ export const ConsequenceDetails = ( {c.majorConsequence || c.consequenceTerms.join('; ')} + {consequenceDetails && consequenceDetails(c)} {annotationSections.map(([field1, field2]) => ( @@ -72,6 +73,7 @@ ConsequenceDetails.propTypes = { idField: PropTypes.string.isRequired, variant: PropTypes.object, idDetails: PropTypes.func, + consequenceDetails: PropTypes.func, annotationSections: PropTypes.arrayOf(PropTypes.arrayOf(PropTypes.object)), ensemblLink: PropTypes.object, } @@ -134,6 +136,13 @@ const transcriptIdDetails = (transcript, variant, { transcriptsById, project, up ) +const transcriptConsequenceDetails = ({ utrannotator }) => utrannotator?.fiveutrConsequence && ( +
+ UTRAnnotator: + {utrannotator.fiveutrConsequence} +
+) + const ANNOTATION_SECTIONS = [ [{ title: 'Codons' }, { title: 'Amino Acids' }], [ @@ -166,6 +175,7 @@ const Transcripts = React.memo(({ variant, genesById, ...props }) => ( variant={variant} idField="transcriptId" idDetails={transcriptIdDetails} + consequenceDetails={transcriptConsequenceDetails} annotationSections={ANNOTATION_SECTIONS} {...props} /> From fc3ca3dbf7c4612a601713df7a014a5adcbeda89 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 5 Jun 2024 15:48:50 -0400 Subject: [PATCH 245/736] do not raise unhandled error on airtable mismatch --- seqr/views/apis/summary_data_api_tests.py | 8 ++++---- seqr/views/utils/airtable_utils.py | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/seqr/views/apis/summary_data_api_tests.py b/seqr/views/apis/summary_data_api_tests.py index 96a6f5d580..441bce90db 100644 --- a/seqr/views/apis/summary_data_api_tests.py +++ b/seqr/views/apis/summary_data_api_tests.py @@ -679,10 +679,10 @@ def test_sample_metadata_export(self, mock_google_authenticated): responses.add(responses.GET, '{}/app3Y97xtbbaOopVR/Collaborator'.format(AIRTABLE_URL), json=AIRTABLE_COLLABORATOR_RECORDS, status=200) response = self.client.get(include_airtable_url) - self.assertEqual(response.status_code, 500) - self.assertEqual( - response.json()['error'], - 'Found multiple airtable records for sample NA19675 with mismatched values in field dbgap_study_id') + self.assertEqual(response.status_code, 400) + self.assertListEqual( + response.json()['errors'], + ['Found multiple airtable records for sample NA19675 with mismatched values in field dbgap_study_id']) self.assertEqual(len(responses.calls), 4) first_formula = "OR({CollaboratorSampleID}='NA20885',{CollaboratorSampleID}='NA20888')" expected_fields = [ diff --git a/seqr/views/utils/airtable_utils.py b/seqr/views/utils/airtable_utils.py index f6a80f09ff..027e5785ce 100644 --- a/seqr/views/utils/airtable_utils.py +++ b/seqr/views/utils/airtable_utils.py @@ -2,6 +2,7 @@ from collections import defaultdict from django.core.exceptions import PermissionDenied +from seqr.utils.middleware import ErrorsWarningsException from seqr.utils.logging_utils import SeqrLogger from seqr.views.utils.terra_api_utils import is_google_authenticated @@ -138,7 +139,7 @@ def get_airtable_samples(sample_ids, user, fields, list_fields=None): if len(record_field) > 1: error = 'Found multiple airtable records for sample {} with mismatched values in field {}'.format( record_id, field) - raise Exception(error) + raise ErrorsWarningsException([error]) if record_field: parsed_record[field] = record_field.pop() for field in list_fields: From 148e8cf80d0599c0552430b397d74cf5d4c2128c Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 6 Jun 2024 10:04:11 -0400 Subject: [PATCH 246/736] fix AoU link --- ui/shared/components/panel/variants/Annotations.jsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ui/shared/components/panel/variants/Annotations.jsx b/ui/shared/components/panel/variants/Annotations.jsx index 3af864824a..446f60b7eb 100644 --- a/ui/shared/components/panel/variants/Annotations.jsx +++ b/ui/shared/components/panel/variants/Annotations.jsx @@ -267,7 +267,7 @@ const VARIANT_LINKS = [ { name: 'AoU', shouldShow: ({ svType }) => !svType, - getHref: ({ chrom, pos, ref, alt }) => `https://databrowser.researchallofus.org/genomic-variants/${chrom}-${pos}-${ref}-${alt}`, + getHref: ({ chrom, pos, ref, alt }) => `https://databrowser.researchallofus.org/variants/${chrom}-${pos}-${ref}-${alt}`, }, { name: 'Iranome', From b14c7ce92920a8595da0e8e230412833f21119f0 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 6 Jun 2024 13:35:03 -0400 Subject: [PATCH 247/736] update csqs --- ui/shared/utils/constants.js | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js index cbc44623bb..8068fa491c 100644 --- a/ui/shared/utils/constants.js +++ b/ui/shared/utils/constants.js @@ -899,13 +899,6 @@ const ORDERED_VEP_CONSEQUENCES = [ group: VEP_GROUP_MISSENSE, so: 'SO:0001578', }, - { - description: 'A codon variant that changes at least one base of the first codon of a transcript', - text: 'Initiator codon', - value: 'initiator_codon_variant', - group: VEP_GROUP_MISSENSE, - so: 'SO:0001582', - }, { description: 'A codon variant that changes at least one base of the canonical start codon.', text: 'Start lost', @@ -967,6 +960,13 @@ const ORDERED_VEP_CONSEQUENCES = [ group: VEP_GROUP_SYNONYMOUS, so: 'SO:0001819', }, + { + description: 'A sequence variant where at least one base in the start codon is changed, but the start remains', + text: 'Start retained', + value: 'start_retained_variant', + group: VEP_GROUP_SYNONYMOUS, + so: 'SO:0002019', + }, { description: 'A sequence variant where at least one base in the terminator codon is changed, but the terminator remains', text: 'Stop retained', From bc1f3154dc3a89746a204b2609e7639541cc22e1 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 6 Jun 2024 14:07:56 -0400 Subject: [PATCH 248/736] show extended intronic splice reguion --- ui/shared/components/panel/variants/Annotations.jsx | 3 ++- ui/shared/components/panel/variants/Transcripts.jsx | 13 +++++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/ui/shared/components/panel/variants/Annotations.jsx b/ui/shared/components/panel/variants/Annotations.jsx index 19daaed793..2cb9f9f759 100644 --- a/ui/shared/components/panel/variants/Annotations.jsx +++ b/ui/shared/components/panel/variants/Annotations.jsx @@ -22,7 +22,7 @@ import Modal from '../../modal/Modal' import { ButtonLink, HelpIcon } from '../../StyledComponents' import RnaSeqJunctionOutliersTable from '../../table/RnaSeqJunctionOutliersTable' import { getOtherGeneNames } from '../genes/GeneDetail' -import Transcripts, { ConsequenceDetails } from './Transcripts' +import Transcripts, { ConsequenceDetails, ExtendedSpliceLabel } from './Transcripts' import VariantGenes, { GeneLabelContent, omimPhenotypesDetail } from './VariantGene' import { getLocus, @@ -632,6 +632,7 @@ const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcripts )} + {mainTranscript.utrannotator?.fiveutrConsequence && (
UTRAnnotator:   diff --git a/ui/shared/components/panel/variants/Transcripts.jsx b/ui/shared/components/panel/variants/Transcripts.jsx index efdb7be9f6..0449243985 100644 --- a/ui/shared/components/panel/variants/Transcripts.jsx +++ b/ui/shared/components/panel/variants/Transcripts.jsx @@ -136,10 +136,19 @@ const transcriptIdDetails = (transcript, variant, { transcriptsById, project, up
) -const transcriptConsequenceDetails = ({ utrannotator }) => utrannotator?.fiveutrConsequence && ( +export const ExtendedSpliceLabel = ({ spliceregion }) => spliceregion?.extended_intronic_splice_region_variant && ( +
+ {annotationGroups.map(groups => ( + + {groups.map(group => annotationGroupDisplay(fieldComponents[ANNOTATION_GROUP_INDEX_MAP[group]]))} + ))} - - )), - !hideOther ? ( - - {fieldComponents[ANNOTATION_GROUP_INDEX_MAP[VEP_GROUP_OTHER]]} - - ) : null, -].filter(fields => fields) +
+ +) const MAX_FREQ_COMPONENTS_PER_ROW = 4 @@ -182,9 +209,27 @@ export const ANNOTATION_PANEL = { name: 'annotations', headerProps: { title: 'Annotations', inputProps: JsonSelectPropsWithAll(ANNOTATION_FILTER_OPTIONS, ALL_ANNOTATION_FILTER_DETAILS) }, fields: ANNOTATION_GROUPS_SPLICE, - fieldProps: { control: AlignedCheckboxGroup, format: val => val || [] }, + fieldProps: { control: AlignedCheckboxGroup, maxOptionsPerColumn: 7, format: val => val || [] }, fieldLayout: annotationFieldLayout([ - SV_GROUPS, HIGH_IMPACT_GROUPS_SPLICE, MODERATE_IMPACT_DISPLAY_GROUPS, CODING_IMPACT_DISPLAY_GROUPS, + [ + VEP_GROUP_NONSENSE, + VEP_GROUP_ESSENTIAL_SPLICE_SITE, + VEP_GROUP_FRAMESHIFT, + VEP_GROUP_MISSENSE, + VEP_GROUP_INFRAME, + ], [ + VEP_GROUP_SYNONYMOUS, + VEP_GROUP_EXTENDED_SPLICE_SITE, + VEP_GROUP_OTHER, + ], + [ + MOTIF_GROUP, + REGULATORY_GROUP, + SCREEN_GROUP, + UTR_ANNOTATOR_GROUP, + SPLICE_AI_FIELD, + ], + SV_GROUPS, ]), helpText: 'Filter by reported annotation. Variants will be returned if they have ANY of the specified annotations, including if they have a Splice AI score above the threshold and no other annotations. This filter is overridden by the pathogenicity filter, so variants will be returned if they have the specified pathogenicity even if none of the annotation filters match.', } diff --git a/ui/shared/components/panel/search/constants.js b/ui/shared/components/panel/search/constants.js index f4ee27a125..d1e42366f7 100644 --- a/ui/shared/components/panel/search/constants.js +++ b/ui/shared/components/panel/search/constants.js @@ -245,13 +245,13 @@ export const ANNOTATION_GROUPS = Object.entries(GROUPED_VEP_CONSEQUENCES).map(([ name, options, groupLabel: snakecaseToTitlecase(name), })) -const SCREEN_GROUP = 'SCREEN' +export const SCREEN_GROUP = 'SCREEN' const SCREEN_VALUES = ['PLS', 'pELS', 'dELS', 'DNase-H3K4me3', 'CTCF-only', 'DNase-only', 'low-DNase'] -const UTR_ANNOTATOR_GROUP = 'UTRAnnotator' +export const UTR_ANNOTATOR_GROUP = 'UTRAnnotator' const UTR_ANNOTATOR_VALUES = [ 'premature_start_codon_gain', 'premature_start_codon_loss', 'stop_codon_gain', 'stop_codon_loss', 'uORF_frameshift', ] -const MOTIF_GROUP = 'motif_feature' +export const MOTIF_GROUP = 'motif_feature' const MOTIF_VALUES = [ { description: 'A feature ablation whereby the deleted region includes a transcription factor binding site', @@ -282,7 +282,7 @@ const MOTIF_VALUES = [ value: 'TFBS_translocation', }, ] -const REGULATORY_GROUP = 'regulatory_feature' +export const REGULATORY_GROUP = 'regulatory_feature' const REGULATORY_VALUES = [ { description: 'A sequence variant located within a regulatory region', From 9ad37145842871a5051ad8283a6739945ae1d94a Mon Sep 17 00:00:00 2001 From: snyk-bot Date: Fri, 21 Jun 2024 14:58:21 +0000 Subject: [PATCH 332/736] fix: requirements.txt to reduce vulnerabilities The following vulnerabilities are fixed by pinning transitive dependencies: - https://snyk.io/vuln/SNYK-PYTHON-REQUESTS-6928867 - https://snyk.io/vuln/SNYK-PYTHON-URLLIB3-7267250 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index ca943f941f..8e7ef47708 100644 --- a/requirements.txt +++ b/requirements.txt @@ -175,7 +175,7 @@ tenacity==8.3.0 # via -r requirements.in tqdm==4.66.3 # via -r requirements.in -urllib3==1.26.18 +urllib3==1.26.19 # via # elasticsearch # requests From 52b1235f2775d7d88b9c53435a62fa412247a816 Mon Sep 17 00:00:00 2001 From: snyk-bot Date: Fri, 21 Jun 2024 15:00:18 +0000 Subject: [PATCH 333/736] fix: requirements-dev.txt to reduce vulnerabilities The following vulnerabilities are fixed by pinning transitive dependencies: - https://snyk.io/vuln/SNYK-PYTHON-REQUESTS-6928867 - https://snyk.io/vuln/SNYK-PYTHON-URLLIB3-7267250 --- requirements-dev.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index faa3c538b3..38ec6ac2fc 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -53,7 +53,7 @@ pytz==2022.7.1 # django rcssmin==1.1.1 # via django-compressor -requests==2.32.0 +requests==2.32.2 # via # -c requirements.txt # responses @@ -74,7 +74,7 @@ tomli==2.0.1 # pyproject-hooks types-toml==0.10.8.5 # via responses -urllib3==1.26.18 +urllib3==1.26.19 # via # -c requirements.txt # requests From 82a720960532601f49256663577e0760ade4b0f9 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 21 Jun 2024 11:16:08 -0400 Subject: [PATCH 334/736] tweak layout for smaller screen --- .../panel/search/VariantSearchFormPanels.jsx | 44 +++++++------------ 1 file changed, 15 insertions(+), 29 deletions(-) diff --git a/ui/shared/components/panel/search/VariantSearchFormPanels.jsx b/ui/shared/components/panel/search/VariantSearchFormPanels.jsx index 8b7afb75e2..f23b72cfdf 100644 --- a/ui/shared/components/panel/search/VariantSearchFormPanels.jsx +++ b/ui/shared/components/panel/search/VariantSearchFormPanels.jsx @@ -150,24 +150,6 @@ export const inSilicoFieldLayout = groups => ([requireComponent, ...fieldCompone ) -// export const annotationFieldLayout = (annotationGroups, hideOther) => fieldComponents => [ -// ...annotationGroups.map(groups => ( -// -// {groups.map(group => ( -// -// {fieldComponents[ANNOTATION_GROUP_INDEX_MAP[group]]} -// -// -// ))} -// -// )), -// !hideOther ? ( -// -// {fieldComponents[ANNOTATION_GROUP_INDEX_MAP[VEP_GROUP_OTHER]]} -// -// ) : null, -// ].filter(fields => fields) - const annotationColSpan = ({ maxOptionsPerColumn, options = [] }) => Math.ceil(options.length / maxOptionsPerColumn) const annotationGroupDisplay = component => ( @@ -176,13 +158,15 @@ const annotationGroupDisplay = component => ( export const annotationFieldLayout = annotationGroups => fieldComponents => ( - - {annotationGroups.map(groups => ( - - {groups.map(group => annotationGroupDisplay(fieldComponents[ANNOTATION_GROUP_INDEX_MAP[group]]))} - - ))} -
+ + + {annotationGroups.map(groups => ( + + {groups.map(group => annotationGroupDisplay(fieldComponents[ANNOTATION_GROUP_INDEX_MAP[group]]))} + + ))} +
+
) @@ -223,14 +207,15 @@ export const ANNOTATION_PANEL = { VEP_GROUP_OTHER, ], [ + SPLICE_AI_FIELD, MOTIF_GROUP, REGULATORY_GROUP, SCREEN_GROUP, UTR_ANNOTATOR_GROUP, - SPLICE_AI_FIELD, ], SV_GROUPS, ]), + noPadding: true, helpText: 'Filter by reported annotation. Variants will be returned if they have ANY of the specified annotations, including if they have a Splice AI score above the threshold and no other annotations. This filter is overridden by the pathogenicity filter, so variants will be returned if they have the specified pathogenicity even if none of the annotation filters match.', } @@ -309,7 +294,7 @@ const formatField = (field, name, esEnabled, { formatNoEsLabel, ...fieldProps }) label: (!esEnabled && formatNoEsLabel) ? formatNoEsLabel(field.label) : field.label, }) -const PanelContent = React.memo(({ name, fields, fieldProps, helpText, fieldLayout, esEnabled }) => { +const PanelContent = React.memo(({ name, fields, fieldProps, helpText, fieldLayout, esEnabled, noPadding }) => { const fieldComponents = fields && configuredFields( { fields: fields.map(field => formatField(field, name, esEnabled, fieldProps || {})) }, ) @@ -322,9 +307,9 @@ const PanelContent = React.memo(({ name, fields, fieldProps, helpText, fieldLayo )} - + {!noPadding && } {fieldLayout ? fieldLayout(fieldComponents) : fieldComponents} - + {!noPadding && }
) @@ -337,6 +322,7 @@ PanelContent.propTypes = { helpText: PropTypes.node, fieldLayout: PropTypes.func, esEnabled: PropTypes.bool, + noPadding: PropTypes.bool, } class VariantSearchFormPanels extends React.PureComponent { From 49370f6130610a2846cc11cfb41255404a97d3f1 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 21 Jun 2024 11:36:34 -0400 Subject: [PATCH 335/736] clean up imports --- .../components/VariantSearchFormContent.jsx | 11 +++-- .../panel/search/VariantSearchFormPanels.jsx | 41 +++---------------- .../components/panel/search/constants.js | 27 +++++++----- 3 files changed, 26 insertions(+), 53 deletions(-) diff --git a/ui/pages/Search/components/VariantSearchFormContent.jsx b/ui/pages/Search/components/VariantSearchFormContent.jsx index 6a8bfc2aa6..281a67510b 100644 --- a/ui/pages/Search/components/VariantSearchFormContent.jsx +++ b/ui/pages/Search/components/VariantSearchFormContent.jsx @@ -15,10 +15,9 @@ import VariantSearchFormPanels, { annotationFieldLayout, inSilicoFieldLayout, JsonSelectPropsWithAll, } from 'shared/components/panel/search/VariantSearchFormPanels' import { - HIGH_IMPACT_GROUPS_SPLICE, HIGH_IMPACT_GROUPS, MODERATE_IMPACT_GROUPS, CODING_IMPACT_GROUPS, ANY_PATHOGENICITY_FILTER, + HIGH_MODERATE_IMPACT_GROUPS, CODING_OTHER_IMPACT_GROUPS, ANNOTATION_OVERRIDE_GROUPS, ANY_PATHOGENICITY_FILTER, SV_GROUPS, SNP_FREQUENCIES, SNP_QUALITY_FILTER_FIELDS, PATHOGENICITY_FIELDS, PATHOGENICITY_FILTER_OPTIONS, - MITO_FREQUENCIES, MITO_QUALITY_FILTER_FIELDS, SV_FREQUENCIES, SV_QUALITY_FILTER_FIELDS, CODING_IMPACT_DISPLAY_GROUPS, - MODERATE_IMPACT_DISPLAY_GROUPS, + MITO_FREQUENCIES, MITO_QUALITY_FILTER_FIELDS, SV_FREQUENCIES, SV_QUALITY_FILTER_FIELDS, } from 'shared/components/panel/search/constants' import { ALL_INHERITANCE_FILTER, DATASET_TYPE_SNV_INDEL_CALLS, DATASET_TYPE_SV_CALLS, NO_SV_IN_SILICO_GROUPS, VEP_GROUP_SV_NEW, @@ -154,7 +153,7 @@ const ANNOTATION_SECONDARY_PANEL = { ), fieldLayout: annotationFieldLayout( - [SV_GROUPS_NO_NEW, HIGH_IMPACT_GROUPS, MODERATE_IMPACT_GROUPS, CODING_IMPACT_GROUPS], + [HIGH_MODERATE_IMPACT_GROUPS, CODING_OTHER_IMPACT_GROUPS, SV_GROUPS_NO_NEW], ), } @@ -173,11 +172,11 @@ const DATASET_TYPE_PANEL_PROPS = { [DATASET_TYPE_SNV_INDEL_CALLS]: { [ANNOTATION_PANEL.name]: { fieldLayout: annotationFieldLayout( - [HIGH_IMPACT_GROUPS_SPLICE, MODERATE_IMPACT_DISPLAY_GROUPS, CODING_IMPACT_DISPLAY_GROUPS], + [HIGH_MODERATE_IMPACT_GROUPS, CODING_OTHER_IMPACT_GROUPS, ANNOTATION_OVERRIDE_GROUPS], ), }, [ANNOTATION_SECONDARY_NAME]: { - fieldLayout: annotationFieldLayout([HIGH_IMPACT_GROUPS, MODERATE_IMPACT_GROUPS, CODING_IMPACT_GROUPS]), + fieldLayout: annotationFieldLayout([HIGH_MODERATE_IMPACT_GROUPS, CODING_OTHER_IMPACT_GROUPS]), }, [IN_SILICO_PANEL.name]: { fieldLayout: inSilicoFieldLayout(NO_SV_IN_SILICO_GROUPS), diff --git a/ui/shared/components/panel/search/VariantSearchFormPanels.jsx b/ui/shared/components/panel/search/VariantSearchFormPanels.jsx index f23b72cfdf..c5a088e952 100644 --- a/ui/shared/components/panel/search/VariantSearchFormPanels.jsx +++ b/ui/shared/components/panel/search/VariantSearchFormPanels.jsx @@ -7,16 +7,7 @@ import { VerticalSpacer } from 'shared/components/Spacers' import { ButtonLink } from 'shared/components/StyledComponents' import { Select, AlignedCheckboxGroup } from 'shared/components/form/Inputs' import { configuredField, configuredFields } from 'shared/components/form/FormHelpers' -import { - VEP_GROUP_OTHER, SPLICE_AI_FIELD, SV_IN_SILICO_GROUP, NO_SV_IN_SILICO_GROUPS, - VEP_GROUP_NONSENSE, - VEP_GROUP_ESSENTIAL_SPLICE_SITE, - VEP_GROUP_FRAMESHIFT, - VEP_GROUP_MISSENSE, - VEP_GROUP_INFRAME, - VEP_GROUP_SYNONYMOUS, - VEP_GROUP_EXTENDED_SPLICE_SITE, -} from 'shared/utils/constants' +import { SPLICE_AI_FIELD, SV_IN_SILICO_GROUP, NO_SV_IN_SILICO_GROUPS } from 'shared/utils/constants' import { FrequencyFilter, HeaderFrequencyFilter } from './FrequencyFilter' import { @@ -32,15 +23,11 @@ import { QUALITY_FILTER_OPTIONS, ALL_QUALITY_FILTER, LOCATION_FIELDS, - CODING_IMPACT_DISPLAY_GROUPS, - HIGH_IMPACT_GROUPS_SPLICE, - MODERATE_IMPACT_DISPLAY_GROUPS, + CODING_OTHER_IMPACT_GROUPS, + HIGH_MODERATE_IMPACT_GROUPS, + ANNOTATION_OVERRIDE_GROUPS, SV_GROUPS, LOCUS_FIELD_NAME, - MOTIF_GROUP, - REGULATORY_GROUP, - SCREEN_GROUP, - UTR_ANNOTATOR_GROUP, } from './constants' const LabeledSlider = React.lazy(() => import('./LabeledSlider')) @@ -195,25 +182,7 @@ export const ANNOTATION_PANEL = { fields: ANNOTATION_GROUPS_SPLICE, fieldProps: { control: AlignedCheckboxGroup, maxOptionsPerColumn: 7, format: val => val || [] }, fieldLayout: annotationFieldLayout([ - [ - VEP_GROUP_NONSENSE, - VEP_GROUP_ESSENTIAL_SPLICE_SITE, - VEP_GROUP_FRAMESHIFT, - VEP_GROUP_MISSENSE, - VEP_GROUP_INFRAME, - ], [ - VEP_GROUP_SYNONYMOUS, - VEP_GROUP_EXTENDED_SPLICE_SITE, - VEP_GROUP_OTHER, - ], - [ - SPLICE_AI_FIELD, - MOTIF_GROUP, - REGULATORY_GROUP, - SCREEN_GROUP, - UTR_ANNOTATOR_GROUP, - ], - SV_GROUPS, + HIGH_MODERATE_IMPACT_GROUPS, CODING_OTHER_IMPACT_GROUPS, ANNOTATION_OVERRIDE_GROUPS, SV_GROUPS, ]), noPadding: true, helpText: 'Filter by reported annotation. Variants will be returned if they have ANY of the specified annotations, including if they have a Splice AI score above the threshold and no other annotations. This filter is overridden by the pathogenicity filter, so variants will be returned if they have the specified pathogenicity even if none of the annotation filters match.', diff --git a/ui/shared/components/panel/search/constants.js b/ui/shared/components/panel/search/constants.js index d1e42366f7..2790d4d703 100644 --- a/ui/shared/components/panel/search/constants.js +++ b/ui/shared/components/panel/search/constants.js @@ -333,7 +333,7 @@ ANNOTATION_GROUPS.push({ options: REGULATORY_VALUES, }) -export const ALL_IMPACT_GROUPS = [ +const ALL_IMPACT_GROUPS = [ VEP_GROUP_NONSENSE, VEP_GROUP_ESSENTIAL_SPLICE_SITE, VEP_GROUP_EXTENDED_SPLICE_SITE, @@ -345,27 +345,32 @@ export const ALL_IMPACT_GROUPS = [ VEP_GROUP_SV, VEP_GROUP_SV_CONSEQUENCES, ] -export const HIGH_IMPACT_GROUPS = [ +const HIGH_IMPACT_GROUPS = [ VEP_GROUP_NONSENSE, VEP_GROUP_ESSENTIAL_SPLICE_SITE, VEP_GROUP_FRAMESHIFT, ] -export const HIGH_IMPACT_GROUPS_SPLICE = [ - ...HIGH_IMPACT_GROUPS, +export const ANNOTATION_OVERRIDE_GROUPS = [ + SPLICE_AI_FIELD, MOTIF_GROUP, REGULATORY_GROUP, - SPLICE_AI_FIELD, + SCREEN_GROUP, + UTR_ANNOTATOR_GROUP, ] -export const MODERATE_IMPACT_GROUPS = [ +export const HIGH_MODERATE_IMPACT_GROUPS = [ + ...HIGH_IMPACT_GROUPS, VEP_GROUP_MISSENSE, VEP_GROUP_INFRAME, ] -export const MODERATE_IMPACT_DISPLAY_GROUPS = [...MODERATE_IMPACT_GROUPS, SCREEN_GROUP] -export const CODING_IMPACT_GROUPS = [ +const CODING_IMPACT_GROUPS = [ VEP_GROUP_SYNONYMOUS, VEP_GROUP_EXTENDED_SPLICE_SITE, ] -export const CODING_IMPACT_DISPLAY_GROUPS = [...CODING_IMPACT_GROUPS, UTR_ANNOTATOR_GROUP] +export const CODING_OTHER_IMPACT_GROUPS = [ + ...CODING_IMPACT_GROUPS, + VEP_GROUP_OTHER, +] + export const ALL_ANNOTATION_FILTER = { text: 'All', vepGroups: ALL_IMPACT_GROUPS, @@ -379,11 +384,11 @@ export const ANNOTATION_FILTER_OPTIONS = [ }, { text: 'Moderate to High Impact', - vepGroups: HIGH_IMPACT_GROUPS.concat(MODERATE_IMPACT_GROUPS), + vepGroups: HIGH_MODERATE_IMPACT_GROUPS, }, { text: 'All rare coding variants', - vepGroups: HIGH_IMPACT_GROUPS.concat(MODERATE_IMPACT_GROUPS).concat(CODING_IMPACT_GROUPS), + vepGroups: HIGH_MODERATE_IMPACT_GROUPS.concat(CODING_IMPACT_GROUPS), }, ].map(({ vepGroups, ...option }) => ({ ...option, From 73c1110b77ed71025e33dc529c4ab8852df41b0c Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 21 Jun 2024 11:43:02 -0400 Subject: [PATCH 336/736] fix table display --- .../panel/search/VariantSearchFormPanels.jsx | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/ui/shared/components/panel/search/VariantSearchFormPanels.jsx b/ui/shared/components/panel/search/VariantSearchFormPanels.jsx index c5a088e952..619676072d 100644 --- a/ui/shared/components/panel/search/VariantSearchFormPanels.jsx +++ b/ui/shared/components/panel/search/VariantSearchFormPanels.jsx @@ -88,6 +88,11 @@ const ExpandCollapseCategoryContainer = styled.span` top: -2em; ` +const CenteredTable = styled(Table)` + margin-left: auto !important; + margin-right: auto !important; +` + const LazyLabeledSlider = props => }> export const JsonSelectPropsWithAll = (options, all) => ({ @@ -145,15 +150,13 @@ const annotationGroupDisplay = component => ( export const annotationFieldLayout = annotationGroups => fieldComponents => ( - - - {annotationGroups.map(groups => ( - - {groups.map(group => annotationGroupDisplay(fieldComponents[ANNOTATION_GROUP_INDEX_MAP[group]]))} - - ))} -
-
+ + {annotationGroups.map(groups => ( + + {groups.map(group => annotationGroupDisplay(fieldComponents[ANNOTATION_GROUP_INDEX_MAP[group]]))} + + ))} +
) From 5c268fd66c5e5cb6af24c8f24822acbc55619e3d Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 21 Jun 2024 11:49:19 -0400 Subject: [PATCH 337/736] remove unused exports --- ui/shared/components/panel/search/constants.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ui/shared/components/panel/search/constants.js b/ui/shared/components/panel/search/constants.js index 2790d4d703..cd3ed3cafe 100644 --- a/ui/shared/components/panel/search/constants.js +++ b/ui/shared/components/panel/search/constants.js @@ -245,13 +245,13 @@ export const ANNOTATION_GROUPS = Object.entries(GROUPED_VEP_CONSEQUENCES).map(([ name, options, groupLabel: snakecaseToTitlecase(name), })) -export const SCREEN_GROUP = 'SCREEN' +const SCREEN_GROUP = 'SCREEN' const SCREEN_VALUES = ['PLS', 'pELS', 'dELS', 'DNase-H3K4me3', 'CTCF-only', 'DNase-only', 'low-DNase'] -export const UTR_ANNOTATOR_GROUP = 'UTRAnnotator' +const UTR_ANNOTATOR_GROUP = 'UTRAnnotator' const UTR_ANNOTATOR_VALUES = [ 'premature_start_codon_gain', 'premature_start_codon_loss', 'stop_codon_gain', 'stop_codon_loss', 'uORF_frameshift', ] -export const MOTIF_GROUP = 'motif_feature' +const MOTIF_GROUP = 'motif_feature' const MOTIF_VALUES = [ { description: 'A feature ablation whereby the deleted region includes a transcription factor binding site', @@ -282,7 +282,7 @@ const MOTIF_VALUES = [ value: 'TFBS_translocation', }, ] -export const REGULATORY_GROUP = 'regulatory_feature' +const REGULATORY_GROUP = 'regulatory_feature' const REGULATORY_VALUES = [ { description: 'A sequence variant located within a regulatory region', From d6dfb8f1cb32eb1e545191747024457301f25e96 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 21 Jun 2024 12:58:02 -0400 Subject: [PATCH 338/736] additional values and post processing --- seqr/views/apis/family_api.py | 34 ++++++++++++++------------- seqr/views/utils/orm_to_json_utils.py | 5 ++-- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/seqr/views/apis/family_api.py b/seqr/views/apis/family_api.py index 7182cb06b9..158957f70e 100644 --- a/seqr/views/apis/family_api.py +++ b/seqr/views/apis/family_api.py @@ -5,7 +5,7 @@ from collections import defaultdict from django.contrib.auth.models import User from django.contrib.postgres.aggregates import ArrayAgg -from django.db.models import Count, Q +from django.db.models import Count, Q, Case, When, Value, Exists, OuterRef from django.db.models.fields.files import ImageFieldFile from django.db.models.functions import JSONObject, Concat, Upper, Substr @@ -43,23 +43,25 @@ def family_page_data(request, family_guid): has_case_review_perm = has_case_review_permissions(project, request.user) sample_models = Sample.objects.filter(individual__family=family) - samples = get_json_for_samples(sample_models, project_guid=project.guid, family_guid=family_guid, skip_nested=True, is_analyst=is_analyst) - response = { - 'samplesByGuid': {s['sampleGuid']: {**s, 'rnaSeqTypes': []} for s in samples}, + additional_values = { + 'rnaSeqTpm': Case(When(Exists(RnaSeqTpm.objects.filter(sample_id=OuterRef('pk'))), then=Value('TPM')), default=None), + 'rnaSeqOutlier': Case(When(Exists(RnaSeqOutlier.objects.filter(sample_id=OuterRef('pk'))), then=Value('Outlier')), default=None), + 'rnaSeqSpliceOutlier': Case(When(Exists(RnaSeqSpliceOutlier.objects.filter(sample_id=OuterRef('pk'))), then=Value('Splice Outlier')), default=None), } + samples = get_json_for_samples( + sample_models, project_guid=project.guid, family_guid=family_guid, skip_nested=True, is_analyst=is_analyst, + additional_values=additional_values + ) + samples_by_guid = {} + for sample in samples: + tpm, outlier, splice_outlier = sample.pop('rnaSeqTpm'), sample.pop('rnaSeqOutlier'), sample.pop('rnaSeqSpliceOutlier') + if sample['sampleType'] == 'RNA': + sample['rnaSeqTypes'] = [value for value in [tpm, outlier, splice_outlier] if value] + samples_by_guid[sample['sampleGuid']] = sample - # Add Rna Seq metadata to samples - rna_seq_models = [ - (RnaSeqTpm, 'TPM'), - (RnaSeqOutlier, 'Outlier'), - (RnaSeqSpliceOutlier, 'Splice Outlier'), - ] - for model, rna_seq_type in rna_seq_models: - rna_seq_samples = model.objects.filter( - sample__in=sample_models, sample__is_active=True - ).values('sample__guid').distinct() - for rna_seq_sample in rna_seq_samples: - response['samplesByGuid'][rna_seq_sample['sample__guid']]['rnaSeqTypes'].append(rna_seq_type) + response = { + 'samplesByGuid': {s['sampleGuid']: s for s in samples}, + } add_families_context(response, families, project.guid, request.user, is_analyst, has_case_review_perm) family_response = response['familiesByGuid'][family_guid] diff --git a/seqr/views/utils/orm_to_json_utils.py b/seqr/views/utils/orm_to_json_utils.py index 67a943fc8f..8939c99d4b 100644 --- a/seqr/views/utils/orm_to_json_utils.py +++ b/seqr/views/utils/orm_to_json_utils.py @@ -346,16 +346,17 @@ def _get_sample_json_kwargs(project_guid=None, family_guid=None, individual_guid return {'guid_key': 'sampleGuid', **additional_kwargs, **kwargs} -def get_json_for_samples(samples, **kwargs): +def get_json_for_samples(samples, additional_values=None, **kwargs): """Returns a JSON representation of the given list of Samples. Args: samples (array): array of django models for the Samples. + additional_values (dict): additional values to include in the json Returns: array: array of json objects """ - return get_json_for_queryset(samples, **_get_sample_json_kwargs(**kwargs)) + return get_json_for_queryset(samples, additional_values=additional_values, **_get_sample_json_kwargs(**kwargs)) def get_json_for_sample(sample, **kwargs): From 88c86c5622952a43f0f133aa165ade5e866ad6fd Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 21 Jun 2024 14:04:48 -0400 Subject: [PATCH 339/736] extra --- seqr/views/apis/family_api.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/seqr/views/apis/family_api.py b/seqr/views/apis/family_api.py index 158957f70e..8711601d00 100644 --- a/seqr/views/apis/family_api.py +++ b/seqr/views/apis/family_api.py @@ -4,8 +4,9 @@ import json from collections import defaultdict from django.contrib.auth.models import User -from django.contrib.postgres.aggregates import ArrayAgg -from django.db.models import Count, Q, Case, When, Value, Exists, OuterRef +from django.contrib.postgres.aggregates import ArrayAgg, JSONBAgg, StringAgg +from django.contrib.postgres.fields import ArrayField +from django.db.models import Count, Q, Case, When, Value, CharField, F, Exists, OuterRef from django.db.models.fields.files import ImageFieldFile from django.db.models.functions import JSONObject, Concat, Upper, Substr @@ -52,12 +53,10 @@ def family_page_data(request, family_guid): sample_models, project_guid=project.guid, family_guid=family_guid, skip_nested=True, is_analyst=is_analyst, additional_values=additional_values ) - samples_by_guid = {} for sample in samples: tpm, outlier, splice_outlier = sample.pop('rnaSeqTpm'), sample.pop('rnaSeqOutlier'), sample.pop('rnaSeqSpliceOutlier') if sample['sampleType'] == 'RNA': sample['rnaSeqTypes'] = [value for value in [tpm, outlier, splice_outlier] if value] - samples_by_guid[sample['sampleGuid']] = sample response = { 'samplesByGuid': {s['sampleGuid']: s for s in samples}, From c73e3f684999cff53c6d54298958f4ce4c1a6c38 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 21 Jun 2024 14:20:56 -0400 Subject: [PATCH 340/736] test --- seqr/views/apis/family_api_tests.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/seqr/views/apis/family_api_tests.py b/seqr/views/apis/family_api_tests.py index eb22e8e85d..1664d72da6 100644 --- a/seqr/views/apis/family_api_tests.py +++ b/seqr/views/apis/family_api_tests.py @@ -31,6 +31,7 @@ INDIVIDUAL3_GUID = 'I000003_na19679' INDIVIDUAL_GUIDS = [INDIVIDUAL_GUID, INDIVIDUAL2_GUID, INDIVIDUAL3_GUID] +SAMPLE_GUIDS = ['S000129_na19675', 'S000130_na19678', 'S000131_na19679', 'S000151_na19675_1', 'S000152_na19675_d2', 'S000153_na19679'] class FamilyAPITest(AuthenticationTestCase): fixtures = ['users', '1kg_project', 'reference_data'] @@ -69,8 +70,7 @@ def test_family_page_data(self): self.assertEqual(len(response_json['individualsByGuid']), 3) individual = response_json['individualsByGuid'][INDIVIDUAL_GUID] - individual_fields = {'sampleGuids', 'igvSampleGuids', 'mmeSubmissionGuid', 'hasRnaOutlierData', - 'phenotypePrioritizationTools'} + individual_fields = {'sampleGuids', 'igvSampleGuids', 'mmeSubmissionGuid', 'phenotypePrioritizationTools'} individual_fields.update(INDIVIDUAL_FIELDS) self.assertSetEqual(set(individual.keys()), individual_fields) self.assertListEqual([ @@ -83,10 +83,6 @@ def test_family_page_data(self): ], [response_json['individualsByGuid'][guid].get('phenotypePrioritizationTools') for guid in INDIVIDUAL_GUIDS] ) - self.assertListEqual( - [True, False, True], - [response_json['individualsByGuid'][guid].get('hasRnaOutlierData', False) for guid in INDIVIDUAL_GUIDS] - ) self.assertSetEqual({PROJECT_GUID}, {i['projectGuid'] for i in response_json['individualsByGuid'].values()}) self.assertSetEqual({FAMILY_GUID}, {i['familyGuid'] for i in response_json['individualsByGuid'].values()}) @@ -96,6 +92,10 @@ def test_family_page_data(self): self.assertSetEqual({FAMILY_GUID}, {s['familyGuid'] for s in response_json['samplesByGuid'].values()}) self.assertEqual(len(individual['sampleGuids']), 3) self.assertTrue(set(individual['sampleGuids']).issubset(set(response_json['samplesByGuid'].keys()))) + self.assertListEqual( + [[], [], [], ['TPM', 'Splice Outlier'], ['TPM', 'Outlier', 'Splice Outlier'], ['Splice Outlier']], + [response_json['samplesByGuid'][guid].get('rnaSeqTypes', []) for guid in SAMPLE_GUIDS] + ) self.assertEqual(len(response_json['igvSamplesByGuid']), 1) self.assertSetEqual(set(next(iter(response_json['igvSamplesByGuid'].values())).keys()), IGV_SAMPLE_FIELDS) From a539726c4b8a49f3e8caea78819e5b5940c81462 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 21 Jun 2024 14:26:18 -0400 Subject: [PATCH 341/736] unused --- seqr/views/apis/family_api.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/seqr/views/apis/family_api.py b/seqr/views/apis/family_api.py index 8711601d00..5fe95572ac 100644 --- a/seqr/views/apis/family_api.py +++ b/seqr/views/apis/family_api.py @@ -4,9 +4,8 @@ import json from collections import defaultdict from django.contrib.auth.models import User -from django.contrib.postgres.aggregates import ArrayAgg, JSONBAgg, StringAgg -from django.contrib.postgres.fields import ArrayField -from django.db.models import Count, Q, Case, When, Value, CharField, F, Exists, OuterRef +from django.contrib.postgres.aggregates import ArrayAgg +from django.db.models import Count, Q, Case, When, Value, Exists, OuterRef from django.db.models.fields.files import ImageFieldFile from django.db.models.functions import JSONObject, Concat, Upper, Substr From 5eac9a1c2465ddd914078008888e07ac9c1452a0 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Mon, 24 Jun 2024 10:45:32 -0400 Subject: [PATCH 342/736] add litvar2 link --- ui/shared/components/panel/variants/Annotations.jsx | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ui/shared/components/panel/variants/Annotations.jsx b/ui/shared/components/panel/variants/Annotations.jsx index f44e860249..5581233aca 100644 --- a/ui/shared/components/panel/variants/Annotations.jsx +++ b/ui/shared/components/panel/variants/Annotations.jsx @@ -298,6 +298,13 @@ const VARIANT_LINKS = [ `https://aggregator.bchresearch.org/variant.html?variant=${chrom}:${genomeVersion === GENOME_VERSION_37 ? pos : liftedOverPos}:${ref}:${alt}` ), }, + { + name: 'LitVar2', + shouldShow: ({ CAID, rsid }) => !!CAID && !!rsid, + getHref: ({ CAID, rsid }) => ( + `https://ncbi.nlm.nih.gov/research/litvar2/docsum?variant=litvar@${CAID}%23${rsid}%23%23&query=${CAID}` + ), + }, ] const getSampleType = (genotypes) => { From a888a018f732c8f3f6d06208b50d183b38b90415 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 24 Jun 2024 13:56:56 -0400 Subject: [PATCH 343/736] remove unannotated consequences --- ui/shared/utils/constants.js | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js index 830b856aca..6b0efdae08 100644 --- a/ui/shared/utils/constants.js +++ b/ui/shared/utils/constants.js @@ -920,12 +920,6 @@ const ORDERED_VEP_CONSEQUENCES = [ group: VEP_GROUP_INFRAME, so: 'SO:0001822', }, - { - description: 'A feature amplification of a region containing a transcript', - text: 'Transcript amplification', - value: 'transcript_amplification', - so: 'SO:0001889', - }, { description: 'A sequence_variant which is predicted to change the protein encoded in the coding sequence', text: 'Protein Altering', @@ -1060,18 +1054,6 @@ const ORDERED_VEP_CONSEQUENCES = [ value: 'coding_transcript_variant', so: 'SO:0001968', }, - { - description: 'A sequence variant that causes the extension of a genomic feature, with regard to the reference sequence', - text: 'Feature elongation', - value: 'feature_elongation', - so: 'SO:0001907', - }, - { - description: 'A sequence variant that causes the reduction of a genomic feature, with regard to the reference sequence', - text: 'Feature truncation', - value: 'feature_truncation', - so: 'SO:0001906', - }, { description: 'A sequence variant located in the intergenic region, between genes', text: 'Intergenic variant', From e2d52f1276a3fba40493e5bafb1ebd78a4bd1762 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Mon, 24 Jun 2024 13:57:22 -0400 Subject: [PATCH 344/736] with isnull --- seqr/views/apis/family_api.py | 13 +++++-------- seqr/views/apis/family_api_tests.py | 12 +++++++++--- seqr/views/utils/test_utils.py | 2 +- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/seqr/views/apis/family_api.py b/seqr/views/apis/family_api.py index 5fe95572ac..50b294dc88 100644 --- a/seqr/views/apis/family_api.py +++ b/seqr/views/apis/family_api.py @@ -44,19 +44,16 @@ def family_page_data(request, family_guid): sample_models = Sample.objects.filter(individual__family=family) additional_values = { - 'rnaSeqTpm': Case(When(Exists(RnaSeqTpm.objects.filter(sample_id=OuterRef('pk'))), then=Value('TPM')), default=None), - 'rnaSeqOutlier': Case(When(Exists(RnaSeqOutlier.objects.filter(sample_id=OuterRef('pk'))), then=Value('Outlier')), default=None), - 'rnaSeqSpliceOutlier': Case(When(Exists(RnaSeqSpliceOutlier.objects.filter(sample_id=OuterRef('pk'))), then=Value('Splice Outlier')), default=None), + 'rnaSeqTypes': JSONObject( + hasRnaSeqTpm=Case(When(rnaseqtpm__isnull=False, then=True), default=False), + hasRnaSeqOutlier=Case(When(rnaseqoutlier__isnull=False, then=True), default=False), + hasRnaSeqSpliceOutlier=Case(When(rnaseqspliceoutlier__isnull=False, then=True), default=False), + ) } samples = get_json_for_samples( sample_models, project_guid=project.guid, family_guid=family_guid, skip_nested=True, is_analyst=is_analyst, additional_values=additional_values ) - for sample in samples: - tpm, outlier, splice_outlier = sample.pop('rnaSeqTpm'), sample.pop('rnaSeqOutlier'), sample.pop('rnaSeqSpliceOutlier') - if sample['sampleType'] == 'RNA': - sample['rnaSeqTypes'] = [value for value in [tpm, outlier, splice_outlier] if value] - response = { 'samplesByGuid': {s['sampleGuid']: s for s in samples}, } diff --git a/seqr/views/apis/family_api_tests.py b/seqr/views/apis/family_api_tests.py index 1664d72da6..d585018b1c 100644 --- a/seqr/views/apis/family_api_tests.py +++ b/seqr/views/apis/family_api_tests.py @@ -92,9 +92,15 @@ def test_family_page_data(self): self.assertSetEqual({FAMILY_GUID}, {s['familyGuid'] for s in response_json['samplesByGuid'].values()}) self.assertEqual(len(individual['sampleGuids']), 3) self.assertTrue(set(individual['sampleGuids']).issubset(set(response_json['samplesByGuid'].keys()))) - self.assertListEqual( - [[], [], [], ['TPM', 'Splice Outlier'], ['TPM', 'Outlier', 'Splice Outlier'], ['Splice Outlier']], - [response_json['samplesByGuid'][guid].get('rnaSeqTypes', []) for guid in SAMPLE_GUIDS] + expected_rna_seq_types = [ + {'hasRnaSeqTpm': False, 'hasRnaSeqOutlier': False, 'hasRnaSeqSpliceOutlier': False} for _ in range(3) + ] + [ + {'hasRnaSeqTpm': True, 'hasRnaSeqOutlier': False, 'hasRnaSeqSpliceOutlier': True}, + {'hasRnaSeqTpm': True, 'hasRnaSeqOutlier': True, 'hasRnaSeqSpliceOutlier': True}, + {'hasRnaSeqTpm': False, 'hasRnaSeqOutlier': False, 'hasRnaSeqSpliceOutlier': True}, + ] + self.assertListEqual(expected_rna_seq_types, + [response_json['samplesByGuid'][guid]['rnaSeqTypes'] for guid in SAMPLE_GUIDS] ) self.assertEqual(len(response_json['igvSamplesByGuid']), 1) diff --git a/seqr/views/utils/test_utils.py b/seqr/views/utils/test_utils.py index f0ed234d4f..bb0d509835 100644 --- a/seqr/views/utils/test_utils.py +++ b/seqr/views/utils/test_utils.py @@ -793,7 +793,7 @@ def _get_list_param(call, param): SAMPLE_FIELDS = { 'projectGuid', 'familyGuid', 'individualGuid', 'sampleGuid', 'createdDate', 'sampleType', 'sampleId', 'isActive', - 'loadedDate', 'datasetType', 'elasticsearchIndex', + 'loadedDate', 'datasetType', 'elasticsearchIndex', 'rnaSeqTypes' } IGV_SAMPLE_FIELDS = { From 6268b1041b24107a25359bbd169f5ad8bc6b49a1 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Mon, 24 Jun 2024 13:58:04 -0400 Subject: [PATCH 345/736] with outerref --- seqr/views/apis/family_api.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/seqr/views/apis/family_api.py b/seqr/views/apis/family_api.py index 50b294dc88..1987a849a8 100644 --- a/seqr/views/apis/family_api.py +++ b/seqr/views/apis/family_api.py @@ -45,9 +45,9 @@ def family_page_data(request, family_guid): sample_models = Sample.objects.filter(individual__family=family) additional_values = { 'rnaSeqTypes': JSONObject( - hasRnaSeqTpm=Case(When(rnaseqtpm__isnull=False, then=True), default=False), - hasRnaSeqOutlier=Case(When(rnaseqoutlier__isnull=False, then=True), default=False), - hasRnaSeqSpliceOutlier=Case(When(rnaseqspliceoutlier__isnull=False, then=True), default=False), + hasRnaSeqTpm=Case(When(Exists(RnaSeqTpm.objects.filter(sample_id=OuterRef('pk'))), then=True), default=False), + hasRnaSeqOutlier=Case(When(Exists(RnaSeqOutlier.objects.filter(sample_id=OuterRef('pk'))), then=True), default=False), + hasRnaSeqSpliceOutlier=Case(When(Exists(RnaSeqSpliceOutlier.objects.filter(sample_id=OuterRef('pk'))), then=True), default=False), ) } samples = get_json_for_samples( From c270b4312698a139101948a0286a5ebe83304e5b Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 24 Jun 2024 16:18:52 -0400 Subject: [PATCH 346/736] move AlphaMissense higher in predictor list --- ui/shared/utils/constants.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js index 6778431a7c..74943e903d 100644 --- a/ui/shared/utils/constants.js +++ b/ui/shared/utils/constants.js @@ -1470,6 +1470,7 @@ const REVERSE_PRED_COLOR_MAP = [...PRED_COLOR_MAP].reverse() export const ORDERED_PREDICTOR_FIELDS = [ { field: 'cadd', group: CODING_IN_SILICO_GROUP, thresholds: [0.151, 22.8, 25.3, 28.1, undefined], min: 1, max: 99, fieldTitle: 'CADD', requiresCitation: true }, { field: 'revel', group: MISSENSE_IN_SILICO_GROUP, thresholds: [0.0161, 0.291, 0.644, 0.773, 0.932], fieldTitle: 'REVEL', requiresCitation: true }, + { field: 'alphamissense', fieldTitle: 'AlphaMissense', displayOnly: true }, { field: 'vest', thresholds: [undefined, 0.45, 0.764, 0.861, 0.965], fieldTitle: 'VEST', requiresCitation: true }, { field: 'mut_pred', thresholds: [0.0101, 0.392, 0.737, 0.829, 0.932], fieldTitle: 'MutPred', requiresCitation: true }, { field: 'mpc', group: MISSENSE_IN_SILICO_GROUP, thresholds: [undefined, undefined, 1.36, 1.828, undefined], max: 5, fieldTitle: 'MPC' }, @@ -1500,7 +1501,6 @@ export const ORDERED_PREDICTOR_FIELDS = [ thresholds: [undefined, undefined, 2.18, 4, undefined], requiresCitation: true, }, - { field: 'alphamissense', fieldTitle: 'AlphaMissense', displayOnly: true }, { field: 'haplogroup_defining', indicatorMap: { Y: { color: 'green', value: '' } } }, { field: 'mitotip', indicatorMap: MITOTIP_MAP, fieldTitle: 'MitoTIP' }, { field: 'hmtvar', thresholds: [undefined, undefined, 0.35, 0.35, undefined], fieldTitle: 'HmtVar' }, From cce9e49ea1bdd3e084e1aa4a5a57d86b29c6873a Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 24 Jun 2024 16:42:48 -0400 Subject: [PATCH 347/736] disable send mme contact email for external projects --- ui/pages/Project/components/Matchmaker.jsx | 10 +++++++--- ui/shared/components/buttons/UpdateButton.jsx | 4 +++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/ui/pages/Project/components/Matchmaker.jsx b/ui/pages/Project/components/Matchmaker.jsx index c87f17e32e..f6295b562a 100644 --- a/ui/pages/Project/components/Matchmaker.jsx +++ b/ui/pages/Project/components/Matchmaker.jsx @@ -43,6 +43,7 @@ import { getMmeDefaultContactEmail, getMatchmakerContactNotes, getVariantGeneId, + getCurrentProject, } from '../selectors' import SelectSavedVariantsTable from './SelectSavedVariantsTable' @@ -160,28 +161,31 @@ const CONTACT_FIELDS = [ { name: 'body', component: BaseSemanticInput, inputType: 'TextArea', rows: 12 }, ] -const BaseContactHostButton = React.memo(({ defaultContactEmail, onSubmit }) => ( +const BaseContactHostButton = React.memo(({ defaultContactEmail, onSubmit, canSend }) => ( )) BaseContactHostButton.propTypes = { defaultContactEmail: PropTypes.object, onSubmit: PropTypes.func, + canSend: PropTypes.bool, } const mapContactButtonStateToProps = (state, ownProps) => ({ defaultContactEmail: getMmeDefaultContactEmail(state, ownProps), + canSend: getCurrentProject(state).isAnalystProject, }) const mapContactDispatchToProps = { diff --git a/ui/shared/components/buttons/UpdateButton.jsx b/ui/shared/components/buttons/UpdateButton.jsx index 11f9c139f9..4983b70707 100644 --- a/ui/shared/components/buttons/UpdateButton.jsx +++ b/ui/shared/components/buttons/UpdateButton.jsx @@ -8,7 +8,7 @@ import Modal from '../modal/Modal' const UpdateButton = React.memo(({ onSubmit, initialValues, formFields, modalTitle, modalId, buttonText, editIconName, size, modalSize, showErrorPanel, disabled, confirmDialog, submitButtonText, buttonFloated, trigger, formContainer =
, modalPopup, - decorators, formMetaId, + decorators, formMetaId, submitOnChange, }) => ( ), @@ -65,6 +66,7 @@ UpdateButton.propTypes = { formMetaId: PropTypes.string, trigger: PropTypes.node, decorators: PropTypes.arrayOf(PropTypes.func), + submitOnChange: PropTypes.bool, } export default UpdateButton From 1551467010f04f99f1adfde40bfb1b5f518f9b50 Mon Sep 17 00:00:00 2001 From: Yash Date: Tue, 25 Jun 2024 08:18:46 +1000 Subject: [PATCH 348/736] Update CodeQL action version (#226) --- .github/workflows/trivy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/trivy.yml b/.github/workflows/trivy.yml index 3d98c4c15b..5e5a0e2eb3 100644 --- a/.github/workflows/trivy.yml +++ b/.github/workflows/trivy.yml @@ -50,7 +50,7 @@ jobs: output: 'trivy-results-prod.sarif' - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v1 + uses: github/codeql-action/upload-sarif@v2 with: sarif_file: 'trivy-results-prod.sarif' From 50213d9d89841c77fbf8d0c6ff82270d2e348426 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Tue, 25 Jun 2024 13:33:15 -0400 Subject: [PATCH 349/736] changes --- seqr/views/apis/family_api.py | 21 +++++++++++---------- seqr/views/apis/family_api_tests.py | 12 +++--------- seqr/views/utils/orm_to_json_utils.py | 4 ++-- 3 files changed, 16 insertions(+), 21 deletions(-) diff --git a/seqr/views/apis/family_api.py b/seqr/views/apis/family_api.py index 1987a849a8..4e862174fb 100644 --- a/seqr/views/apis/family_api.py +++ b/seqr/views/apis/family_api.py @@ -43,20 +43,21 @@ def family_page_data(request, family_guid): has_case_review_perm = has_case_review_permissions(project, request.user) sample_models = Sample.objects.filter(individual__family=family) - additional_values = { - 'rnaSeqTypes': JSONObject( - hasRnaSeqTpm=Case(When(Exists(RnaSeqTpm.objects.filter(sample_id=OuterRef('pk'))), then=True), default=False), - hasRnaSeqOutlier=Case(When(Exists(RnaSeqOutlier.objects.filter(sample_id=OuterRef('pk'))), then=True), default=False), - hasRnaSeqSpliceOutlier=Case(When(Exists(RnaSeqSpliceOutlier.objects.filter(sample_id=OuterRef('pk'))), then=True), default=False), - ) - } samples = get_json_for_samples( - sample_models, project_guid=project.guid, family_guid=family_guid, skip_nested=True, is_analyst=is_analyst, - additional_values=additional_values + sample_models, project_guid=project.guid, family_guid=family_guid, skip_nested=True, is_analyst=is_analyst ) response = { - 'samplesByGuid': {s['sampleGuid']: s for s in samples}, + 'samplesByGuid': {s['sampleGuid']: s for s in samples} + } + rna_type_samples = { + 'TPM': set(RnaSeqTpm.objects.filter(sample__in=sample_models).values_list('sample__guid', flat=True)), + 'Expression Outlier': set(RnaSeqOutlier.objects.filter(sample__in=sample_models).values_list('sample__guid', flat=True)), + 'Splice Outlier': set(RnaSeqSpliceOutlier.objects.filter(sample__in=sample_models).values_list('sample__guid', flat=True)), } + for sample in response['samplesByGuid'].values(): + sample['rnaSeqTypes'] = [ + rnaseq_type for rnaseq_type, sample_ids in rna_type_samples.items() if sample['sampleGuid'] in sample_ids + ] add_families_context(response, families, project.guid, request.user, is_analyst, has_case_review_perm) family_response = response['familiesByGuid'][family_guid] diff --git a/seqr/views/apis/family_api_tests.py b/seqr/views/apis/family_api_tests.py index d585018b1c..cbbcb8f842 100644 --- a/seqr/views/apis/family_api_tests.py +++ b/seqr/views/apis/family_api_tests.py @@ -92,15 +92,9 @@ def test_family_page_data(self): self.assertSetEqual({FAMILY_GUID}, {s['familyGuid'] for s in response_json['samplesByGuid'].values()}) self.assertEqual(len(individual['sampleGuids']), 3) self.assertTrue(set(individual['sampleGuids']).issubset(set(response_json['samplesByGuid'].keys()))) - expected_rna_seq_types = [ - {'hasRnaSeqTpm': False, 'hasRnaSeqOutlier': False, 'hasRnaSeqSpliceOutlier': False} for _ in range(3) - ] + [ - {'hasRnaSeqTpm': True, 'hasRnaSeqOutlier': False, 'hasRnaSeqSpliceOutlier': True}, - {'hasRnaSeqTpm': True, 'hasRnaSeqOutlier': True, 'hasRnaSeqSpliceOutlier': True}, - {'hasRnaSeqTpm': False, 'hasRnaSeqOutlier': False, 'hasRnaSeqSpliceOutlier': True}, - ] - self.assertListEqual(expected_rna_seq_types, - [response_json['samplesByGuid'][guid]['rnaSeqTypes'] for guid in SAMPLE_GUIDS] + self.assertListEqual( + [[], [], [], ['TPM', 'Splice Outlier'], ['TPM', 'Expression Outlier', 'Splice Outlier'], ['Splice Outlier']], + [response_json['samplesByGuid'][guid].get('rnaSeqTypes', {}) for guid in SAMPLE_GUIDS] ) self.assertEqual(len(response_json['igvSamplesByGuid']), 1) diff --git a/seqr/views/utils/orm_to_json_utils.py b/seqr/views/utils/orm_to_json_utils.py index 8939c99d4b..e14ad01066 100644 --- a/seqr/views/utils/orm_to_json_utils.py +++ b/seqr/views/utils/orm_to_json_utils.py @@ -346,7 +346,7 @@ def _get_sample_json_kwargs(project_guid=None, family_guid=None, individual_guid return {'guid_key': 'sampleGuid', **additional_kwargs, **kwargs} -def get_json_for_samples(samples, additional_values=None, **kwargs): +def get_json_for_samples(samples, **kwargs): """Returns a JSON representation of the given list of Samples. Args: @@ -356,7 +356,7 @@ def get_json_for_samples(samples, additional_values=None, **kwargs): array: array of json objects """ - return get_json_for_queryset(samples, additional_values=additional_values, **_get_sample_json_kwargs(**kwargs)) + return get_json_for_queryset(samples, **_get_sample_json_kwargs(**kwargs)) def get_json_for_sample(sample, **kwargs): From 6602382908760227c1ebe636b228a7b5425119e2 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Tue, 25 Jun 2024 13:34:59 -0400 Subject: [PATCH 350/736] lint stuff --- seqr/views/apis/family_api.py | 2 +- seqr/views/utils/orm_to_json_utils.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/seqr/views/apis/family_api.py b/seqr/views/apis/family_api.py index 4e862174fb..dcae71adeb 100644 --- a/seqr/views/apis/family_api.py +++ b/seqr/views/apis/family_api.py @@ -5,7 +5,7 @@ from collections import defaultdict from django.contrib.auth.models import User from django.contrib.postgres.aggregates import ArrayAgg -from django.db.models import Count, Q, Case, When, Value, Exists, OuterRef +from django.db.models import Count, Q from django.db.models.fields.files import ImageFieldFile from django.db.models.functions import JSONObject, Concat, Upper, Substr diff --git a/seqr/views/utils/orm_to_json_utils.py b/seqr/views/utils/orm_to_json_utils.py index e14ad01066..67a943fc8f 100644 --- a/seqr/views/utils/orm_to_json_utils.py +++ b/seqr/views/utils/orm_to_json_utils.py @@ -351,7 +351,6 @@ def get_json_for_samples(samples, **kwargs): Args: samples (array): array of django models for the Samples. - additional_values (dict): additional values to include in the json Returns: array: array of json objects """ From a28272a935f32368f887a0eab848a37aabdd3a93 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Tue, 25 Jun 2024 13:41:18 -0400 Subject: [PATCH 351/736] key from project api tests? --- seqr/views/utils/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seqr/views/utils/test_utils.py b/seqr/views/utils/test_utils.py index bb0d509835..f0ed234d4f 100644 --- a/seqr/views/utils/test_utils.py +++ b/seqr/views/utils/test_utils.py @@ -793,7 +793,7 @@ def _get_list_param(call, param): SAMPLE_FIELDS = { 'projectGuid', 'familyGuid', 'individualGuid', 'sampleGuid', 'createdDate', 'sampleType', 'sampleId', 'isActive', - 'loadedDate', 'datasetType', 'elasticsearchIndex', 'rnaSeqTypes' + 'loadedDate', 'datasetType', 'elasticsearchIndex', } IGV_SAMPLE_FIELDS = { From 41ffb9b2e901b7f454a9c768e7a9933345bb99c3 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Tue, 25 Jun 2024 13:52:35 -0400 Subject: [PATCH 352/736] fix the last test --- seqr/views/apis/family_api_tests.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/seqr/views/apis/family_api_tests.py b/seqr/views/apis/family_api_tests.py index cbbcb8f842..bd43d8d697 100644 --- a/seqr/views/apis/family_api_tests.py +++ b/seqr/views/apis/family_api_tests.py @@ -31,8 +31,11 @@ INDIVIDUAL3_GUID = 'I000003_na19679' INDIVIDUAL_GUIDS = [INDIVIDUAL_GUID, INDIVIDUAL2_GUID, INDIVIDUAL3_GUID] + +FAMILY_API_SAMPLE_FIELDS = {*SAMPLE_FIELDS, 'rnaSeqTypes'} SAMPLE_GUIDS = ['S000129_na19675', 'S000130_na19678', 'S000131_na19679', 'S000151_na19675_1', 'S000152_na19675_d2', 'S000153_na19679'] + class FamilyAPITest(AuthenticationTestCase): fixtures = ['users', '1kg_project', 'reference_data'] @@ -87,7 +90,7 @@ def test_family_page_data(self): self.assertSetEqual({FAMILY_GUID}, {i['familyGuid'] for i in response_json['individualsByGuid'].values()}) self.assertEqual(len(response_json['samplesByGuid']), 6) - self.assertSetEqual(set(next(iter(response_json['samplesByGuid'].values())).keys()), SAMPLE_FIELDS) + self.assertSetEqual(set(next(iter(response_json['samplesByGuid'].values())).keys()), FAMILY_API_SAMPLE_FIELDS) self.assertSetEqual({PROJECT_GUID}, {s['projectGuid'] for s in response_json['samplesByGuid'].values()}) self.assertSetEqual({FAMILY_GUID}, {s['familyGuid'] for s in response_json['samplesByGuid'].values()}) self.assertEqual(len(individual['sampleGuids']), 3) From 40038e12359ec4721bc0358b5ea4efc3477559dd Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 26 Jun 2024 10:30:27 -0400 Subject: [PATCH 353/736] add comment --- ui/pages/Project/components/Matchmaker.jsx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ui/pages/Project/components/Matchmaker.jsx b/ui/pages/Project/components/Matchmaker.jsx index f6295b562a..7c461c470b 100644 --- a/ui/pages/Project/components/Matchmaker.jsx +++ b/ui/pages/Project/components/Matchmaker.jsx @@ -162,7 +162,9 @@ const CONTACT_FIELDS = [ ] const BaseContactHostButton = React.memo(({ defaultContactEmail, onSubmit, canSend }) => ( + // when submitOnChange is true, no submit button is shown )) From f9ce39165d238eb0733e0be5617dfe0a4c8e9543 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 26 Jun 2024 10:58:00 -0400 Subject: [PATCH 354/736] validate no ambiguity for aip families --- seqr/views/apis/summary_data_api.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/seqr/views/apis/summary_data_api.py b/seqr/views/apis/summary_data_api.py index 2c8663b76d..6e94e7643c 100644 --- a/seqr/views/apis/summary_data_api.py +++ b/seqr/views/apis/summary_data_api.py @@ -184,17 +184,25 @@ def _load_aip_data(data: dict, user: User): category_map = data['metadata']['categories'] results = data['results'] - family_id_map = dict(Individual.objects.filter( + family_id_map = defaultdict(list) + for individual_id, family_id in Individual.objects.filter( family__project__in=get_internal_projects(), individual_id__in=results.keys(), - ).values_list('individual_id', 'family_id')) + ).values_list('individual_id', 'family_id'): + family_id_map[individual_id].append(family_id) + errors = [] missing_individuals = set(results.keys()) - set(family_id_map.keys()) if missing_individuals: - raise ErrorsWarningsException([f'Unable to find the following individuals: {", ".join(sorted(missing_individuals))}']) + errors.append(f'Unable to find the following individuals: {", ".join(sorted(missing_individuals))}') + multi_family_individuals = {individual_id for individual_id, families in family_id_map.items() if len(families) > 1} + if multi_family_individuals: + errors.append(f'The following individuals are found in multiple families: {", ".join(sorted(multi_family_individuals))}') + if errors: + raise ErrorsWarningsException(errors) family_variant_data = {} for family_id, variant_pred in results.items(): family_variant_data.update({ - (family_id_map[family_id], variant_id): pred for variant_id, pred in variant_pred.items() + (family_id_map[family_id][0], variant_id): pred for variant_id, pred in variant_pred.items() }) today = datetime.now().strftime('%Y-%m-%d') From 6a3bc72342151cf2825d4fe616f665b5a6500e0b Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 26 Jun 2024 11:02:47 -0400 Subject: [PATCH 355/736] specify projects in the AIP metadata --- seqr/views/apis/summary_data_api.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/seqr/views/apis/summary_data_api.py b/seqr/views/apis/summary_data_api.py index 6e94e7643c..b279556c49 100644 --- a/seqr/views/apis/summary_data_api.py +++ b/seqr/views/apis/summary_data_api.py @@ -182,11 +182,15 @@ def bulk_update_family_external_analysis(request): def _load_aip_data(data: dict, user: User): category_map = data['metadata']['categories'] + projects = data['metadata'].get('projects') results = data['results'] + if not projects: + raise ErrorsWarningsException(['No projects specified in the metadata']) + family_id_map = defaultdict(list) for individual_id, family_id in Individual.objects.filter( - family__project__in=get_internal_projects(), individual_id__in=results.keys(), + family__project__in=get_internal_projects().filter(name__in=projects), individual_id__in=results.keys(), ).values_list('individual_id', 'family_id'): family_id_map[individual_id].append(family_id) errors = [] From afceec7ce37a6dbabca6b5915a4a83cb13c83920 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 26 Jun 2024 11:09:00 -0400 Subject: [PATCH 356/736] update tests --- seqr/views/apis/summary_data_api_tests.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/seqr/views/apis/summary_data_api_tests.py b/seqr/views/apis/summary_data_api_tests.py index 8785e361e0..fd999b9a6f 100644 --- a/seqr/views/apis/summary_data_api_tests.py +++ b/seqr/views/apis/summary_data_api_tests.py @@ -505,9 +505,19 @@ def test_bulk_update_family_external_analysis(self, mock_load_uploaded_file, moc body['dataType'] = 'AIP' response = self.client.post(url, content_type='application/json', data=json.dumps(body)) self.assertEqual(response.status_code, 400) + self.assertEqual(response.json()['errors'], ['No projects specified in the metadata']) + + aip_upload['metadata']['projects'] = ['1kg project nåme with uniçøde', 'Test Reprocessed Project'] + response = self.client.post(url, content_type='application/json', data=json.dumps(body)) + self.assertEqual(response.status_code, 400) self.assertEqual(response.json()['errors'], ['Unable to find the following individuals: SAM_123']) - aip_upload['results']['NA20889'] = aip_upload['results'].pop('SAM_123') + aip_upload['results']['NA20870'] = aip_upload['results'].pop('SAM_123') + response = self.client.post(url, content_type='application/json', data=json.dumps(body)) + self.assertEqual(response.status_code, 400) + self.assertEqual(response.json()['errors'], ['The following individuals are found in multiple families: NA20870']) + + aip_upload['results']['NA20889'] = aip_upload['results'].pop('NA20870') response = self.client.post(url, content_type='application/json', data=json.dumps(body)) self.assertEqual(response.status_code, 400) self.assertEqual(response.json()['errors'], [ From 05ced7c33949c24765ef37ee9903f6ca13bf56e6 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 26 Jun 2024 11:26:04 -0400 Subject: [PATCH 357/736] remove unused param from create body --- seqr/views/apis/analysis_group_api.py | 1 + seqr/views/apis/analysis_group_api_tests.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/seqr/views/apis/analysis_group_api.py b/seqr/views/apis/analysis_group_api.py index 53ee833f45..90322da56c 100644 --- a/seqr/views/apis/analysis_group_api.py +++ b/seqr/views/apis/analysis_group_api.py @@ -55,6 +55,7 @@ def update_analysis_group_handler(request, project_guid, analysis_group_guid=Non valid_families = set() def _validate_families(request_json): + request_json.pop('uploadedFamilyIds', None) family_guids = request_json.pop('familyGuids') families = Family.objects.filter(guid__in=family_guids).only('guid') if len(families) != len(family_guids): diff --git a/seqr/views/apis/analysis_group_api_tests.py b/seqr/views/apis/analysis_group_api_tests.py index bcae366b90..214534c2d4 100644 --- a/seqr/views/apis/analysis_group_api_tests.py +++ b/seqr/views/apis/analysis_group_api_tests.py @@ -30,7 +30,9 @@ def test_create_update_and_delete_analysis_group(self): # send valid request to create analysis_group response = self.client.post(create_analysis_group_url, content_type='application/json', data=json.dumps({ - 'name': 'new_analysis_group', 'familyGuids': ['F000001_1', 'F000002_2'] + 'name': 'new_analysis_group', 'familyGuids': ['F000001_1', 'F000002_2'], 'uploadedFamilyIds': { + 'info': ["Uploaded 2 families"], 'parsedData': [['F000001_1'], ['F000002_2']], + }, })) self.assertEqual(response.status_code, 200) new_analysis_group_response = response.json() From a4058264224e624631390698c4d2fbda9dfe60c1 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Wed, 26 Jun 2024 13:36:43 -0400 Subject: [PATCH 358/736] address deprecation warning for url() --- seqr/urls.py | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/seqr/urls.py b/seqr/urls.py index 82e6b91763..2cdce9c593 100644 --- a/seqr/urls.py +++ b/seqr/urls.py @@ -8,7 +8,8 @@ from seqr.views.apis.dataset_api import add_variants_dataset_handler from settings import ENABLE_DJANGO_DEBUG_TOOLBAR, MEDIA_ROOT, API_LOGIN_REQUIRED_URL, LOGIN_URL, DEBUG, \ API_POLICY_REQUIRED_URL -from django.conf.urls import url, include +from django.conf.urls import include +from django.urls import re_path, path from django.contrib import admin from django.views.generic.base import RedirectView import django.views.static @@ -358,25 +359,26 @@ 'matchmaker/v1/metrics': external_api.mme_metrics_proxy, } -urlpatterns = [url('^status', status_view)] +urlpatterns = [path('status', status_view)] # anvil workspace anvil_workspace_url = 'workspace/(?P[^/]+)/(?P[^/]+)' -urlpatterns += [url("^%(anvil_workspace_url)s$" % locals(), anvil_workspace_page)] +urlpatterns += [re_path(r"^%(anvil_workspace_url)s$" % locals(), anvil_workspace_page)] # core react page templates -urlpatterns += [url("^%(url_endpoint)s$" % locals(), main_app) for url_endpoint in react_app_pages] -urlpatterns += [url("^%(url_endpoint)s$" % locals(), no_login_main_app) for url_endpoint in no_login_react_app_pages] +urlpatterns += [re_path(r"^%(url_endpoint)s$" % locals(), main_app) for url_endpoint in react_app_pages] +urlpatterns += [re_path(r"^%(url_endpoint)s$" % locals(), no_login_main_app) for url_endpoint in no_login_react_app_pages] # api for url_endpoint, handler_function in api_endpoints.items(): - urlpatterns.append( url("^api/%(url_endpoint)s$" % locals(), handler_function) ) + urlpatterns.append(re_path(r"^api/%(url_endpoint)s$" % locals(), handler_function)) + # login/ logout urlpatterns += [ - url('^logout$', logout_view), - url(API_LOGIN_REQUIRED_URL.lstrip('/'), login_required_error), - url(API_POLICY_REQUIRED_URL.lstrip('/'), policies_required_error), + path('logout', logout_view), + path(API_LOGIN_REQUIRED_URL.lstrip('/'), login_required_error), + path(API_POLICY_REQUIRED_URL.lstrip('/'), policies_required_error), ] handler401 = 'seqr.views.apis.auth_api.app_login_required_error' @@ -389,12 +391,12 @@ ])) urlpatterns += [ - url(kibana_urls, proxy_to_kibana, name='proxy_to_kibana'), + re_path(kibana_urls, proxy_to_kibana, name='proxy_to_kibana'), ] urlpatterns += [ - url(r'^admin/login/$', RedirectView.as_view(url=LOGIN_URL, permanent=True, query_string=True)), - url(r'^admin/', admin.site.urls), + re_path(r'^admin/login/$', RedirectView.as_view(url=LOGIN_URL, permanent=True, query_string=True)), + re_path(r'^admin/', admin.site.urls), ] # The /media urlpattern is not needed if we are storing static media in a GCS bucket, @@ -402,23 +404,23 @@ # instead, set MEDIA_ROOT in settings.py to that local path, and then this urlpattern will be enabled. if MEDIA_ROOT: urlpatterns += [ - url(r'^media/(?P.*)$', django.views.static.serve, { + re_path(r'^media/(?P.*)$', django.views.static.serve, { 'document_root': MEDIA_ROOT, }), ] urlpatterns += [ - url('', include('social_django.urls')), + path('', include('social_django.urls')), ] if DEBUG: urlpatterns += [ - url(r'^hijack/', include('hijack.urls')), + re_path(r'^hijack/', include('hijack.urls')), ] # django debug toolbar if ENABLE_DJANGO_DEBUG_TOOLBAR: import debug_toolbar urlpatterns = [ - url(r'^__debug__/', include(debug_toolbar.urls)), + re_path(r'^__debug__/', include(debug_toolbar.urls)), ] + urlpatterns From 00db10795ff32e33779abd7ec2f283a01f2c9ade Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 26 Jun 2024 16:12:24 -0400 Subject: [PATCH 359/736] debug code --- hail_search/queries/base.py | 13 ++++++++----- hail_search/queries/mito.py | 2 +- hail_search/web_app.py | 2 +- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index fb8565dcde..bd3173580f 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -308,11 +308,14 @@ def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, n_ project_hts = [] sample_data = {} for project_guid, project_sample_data in project_samples.items(): - project_ht = self._read_table( - f'projects/{project_guid}.ht', - use_ssd_dir=True, - skip_missing_field='family_entries' if skip_all_missing else None, - ) + try: + project_ht = self._read_table( + f'projects/{project_guid}.ht', + use_ssd_dir=True, + skip_missing_field='family_entries' if skip_all_missing else None, + ) + except Exception: + project_ht = None if project_ht is None: continue project_hts.append(project_ht.select_globals('sample_type', 'family_guids', 'family_samples')) diff --git a/hail_search/queries/mito.py b/hail_search/queries/mito.py index f59811ee54..a512b22a69 100644 --- a/hail_search/queries/mito.py +++ b/hail_search/queries/mito.py @@ -309,7 +309,7 @@ def _gene_rank_sort(cls, r, gene_ranks): def _add_project_lookup_data(self, ht, annotation_fields, *args, **kwargs): # Get all the project-families for the looked up variant formatted as a dict of dicts: # {: {: True, : True}, : ...} - lookup_ht = self._read_table('lookup.ht', use_ssd_dir=True, skip_missing_field='project_stats') + lookup_ht = self._read_table('single_variant.ht', use_ssd_dir=True, skip_missing_field='project_stats') if lookup_ht is None: raise HTTPNotFound() variant_projects = lookup_ht.aggregate(hl.agg.take( diff --git a/hail_search/web_app.py b/hail_search/web_app.py index 83efad67af..01159f6600 100644 --- a/hail_search/web_app.py +++ b/hail_search/web_app.py @@ -110,7 +110,7 @@ async def init_web_app(): spark_conf['spark.driver.memory'] = f'{int((int(MACHINE_MEM)-11)*JVM_MEMORY_FRACTION)}g' if JAVA_OPTS_XSS: spark_conf.update({f'spark.{field}.extraJavaOptions': f'-Xss{JAVA_OPTS_XSS}' for field in ['driver', 'executor']}) - hl.init(idempotent=True, spark_conf=spark_conf or None) + hl.init(idempotent=True, spark_conf=spark_conf or None, backend='local') hl._set_flags(use_new_shuffle='1') load_globals() app = web.Application(middlewares=[error_middleware], client_max_size=(1024**2)*10) From 120bb26129a58a133260f5f56d2f76068d95c8e5 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 26 Jun 2024 16:17:21 -0400 Subject: [PATCH 360/736] remove missing entries --- hail_search/queries/mito.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hail_search/queries/mito.py b/hail_search/queries/mito.py index a512b22a69..6457733314 100644 --- a/hail_search/queries/mito.py +++ b/hail_search/queries/mito.py @@ -331,7 +331,7 @@ def _add_project_lookup_data(self, ht, annotation_fields, *args, **kwargs): annotation_fields.update({ 'familyGenotypes': lambda r: hl.dict(r.family_entries.map( - lambda entries: (entries.first().familyGuid, entries.map(self._get_sample_genotype)) + lambda entries: (entries.first().familyGuid, entries.filter(hl.is_defined).map(self._get_sample_genotype)) )), }) From 62fb32252b722a73bb8e6b4b8b813cf8200cf48c Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 26 Jun 2024 16:17:39 -0400 Subject: [PATCH 361/736] Revert "debug code" This reverts commit 00db10795ff32e33779abd7ec2f283a01f2c9ade. --- hail_search/queries/base.py | 13 +++++-------- hail_search/queries/mito.py | 2 +- hail_search/web_app.py | 2 +- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index bd3173580f..fb8565dcde 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -308,14 +308,11 @@ def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, n_ project_hts = [] sample_data = {} for project_guid, project_sample_data in project_samples.items(): - try: - project_ht = self._read_table( - f'projects/{project_guid}.ht', - use_ssd_dir=True, - skip_missing_field='family_entries' if skip_all_missing else None, - ) - except Exception: - project_ht = None + project_ht = self._read_table( + f'projects/{project_guid}.ht', + use_ssd_dir=True, + skip_missing_field='family_entries' if skip_all_missing else None, + ) if project_ht is None: continue project_hts.append(project_ht.select_globals('sample_type', 'family_guids', 'family_samples')) diff --git a/hail_search/queries/mito.py b/hail_search/queries/mito.py index 6457733314..17f5d65c57 100644 --- a/hail_search/queries/mito.py +++ b/hail_search/queries/mito.py @@ -309,7 +309,7 @@ def _gene_rank_sort(cls, r, gene_ranks): def _add_project_lookup_data(self, ht, annotation_fields, *args, **kwargs): # Get all the project-families for the looked up variant formatted as a dict of dicts: # {: {: True, : True}, : ...} - lookup_ht = self._read_table('single_variant.ht', use_ssd_dir=True, skip_missing_field='project_stats') + lookup_ht = self._read_table('lookup.ht', use_ssd_dir=True, skip_missing_field='project_stats') if lookup_ht is None: raise HTTPNotFound() variant_projects = lookup_ht.aggregate(hl.agg.take( diff --git a/hail_search/web_app.py b/hail_search/web_app.py index 01159f6600..83efad67af 100644 --- a/hail_search/web_app.py +++ b/hail_search/web_app.py @@ -110,7 +110,7 @@ async def init_web_app(): spark_conf['spark.driver.memory'] = f'{int((int(MACHINE_MEM)-11)*JVM_MEMORY_FRACTION)}g' if JAVA_OPTS_XSS: spark_conf.update({f'spark.{field}.extraJavaOptions': f'-Xss{JAVA_OPTS_XSS}' for field in ['driver', 'executor']}) - hl.init(idempotent=True, spark_conf=spark_conf or None, backend='local') + hl.init(idempotent=True, spark_conf=spark_conf or None) hl._set_flags(use_new_shuffle='1') load_globals() app = web.Application(middlewares=[error_middleware], client_max_size=(1024**2)*10) From 6a6c14fd74772ec5ac80d439c5d951b9d87ab280 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 26 Jun 2024 16:33:50 -0400 Subject: [PATCH 362/736] update summary data validation tags --- ui/pages/SummaryData/components/SavedVariants.jsx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ui/pages/SummaryData/components/SavedVariants.jsx b/ui/pages/SummaryData/components/SavedVariants.jsx index 0e2742a731..64e4b15caa 100644 --- a/ui/pages/SummaryData/components/SavedVariants.jsx +++ b/ui/pages/SummaryData/components/SavedVariants.jsx @@ -45,9 +45,9 @@ const TAG_OPTIONS = [ 'Tier 2 - Known gene, new phenotype', KNOWN_GENE_FOR_PHENOTYPE_TAG_NAME, REVIEW_TAG_NAME, - 'Send for Sanger validation', - 'Sanger validated', - 'Sanger did not confirm', + 'Send for validation', + 'Validated', + 'Validation did not confirm', 'Confident AR one hit', 'Analyst high priority', 'AIP', From dbdb0d93d32afdb90d72d1886a021b0b9784dd1f Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 26 Jun 2024 17:01:51 -0400 Subject: [PATCH 363/736] document extra response keys --- seqr/views/apis/report_api_tests.py | 16 ++++++++-------- seqr/views/apis/summary_data_api_tests.py | 12 ++++++------ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index 00ade3156d..222182fd1d 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -496,12 +496,12 @@ 'MME': False, 'additional_family_members_with_variant': '', 'allele_balance_or_heteroplasmy_percentage': None, - 'analysisStatus': 'Q', - 'analysis_groups': '', + 'analysisStatus': 'Q', # unused + 'analysis_groups': '', # unused 'clinvar': None, 'condition_id': None, - 'consanguinity': 'Unknown', - 'end': None, + 'consanguinity': 'Unknown', # unused + 'end': None, # unused 'hgvsc': '', 'hgvsp': '', 'method_of_discovery': 'SR-ES', @@ -509,7 +509,7 @@ 'phenotype_contribution': 'Full', 'partial_contribution_explained': '', 'seqr_chosen_consequence': None, - 'svName': None, + 'svName': None, # unused 'svType': None, 'sv_name': None, 'transcript': None, @@ -1178,7 +1178,7 @@ def test_family_metadata(self): 'solve_status': 'Partially solved', 'actual_inheritance': 'unknown', 'condition_id': 'OMIM:616126', - 'condition_inheritance': 'Autosomal recessive', + 'condition_inheritance': 'Autosomal recessive', # unused 'known_condition_name': 'Immunodeficiency 38', 'date_data_generation': '2017-02-05', 'data_type': 'WES', @@ -1266,7 +1266,7 @@ def test_variant_metadata(self): 'familyGuid': 'F000002_2', 'family_id': '2', 'gene_of_interest': 'RP11', - 'gene_id': 'ENSG00000135953', + 'gene_id': 'ENSG00000135953', # unused 'gene_known_for_phenotype': 'Known', 'genetic_findings_id': 'HG00731_1_248367227', 'known_condition_name': 'mitochondrial disease', @@ -1331,7 +1331,7 @@ def test_variant_metadata(self): 'displayName': '12', 'familyGuid': 'F000012_12', 'family_id': '12', - 'family_history': 'Yes', + 'family_history': 'Yes', # unused 'gene_of_interest': 'OR4G11P', 'gene_id': 'ENSG00000240361', 'gene_known_for_phenotype': 'Candidate', diff --git a/seqr/views/apis/summary_data_api_tests.py b/seqr/views/apis/summary_data_api_tests.py index 8785e361e0..b5acdf21cc 100644 --- a/seqr/views/apis/summary_data_api_tests.py +++ b/seqr/views/apis/summary_data_api_tests.py @@ -36,7 +36,7 @@ "projectGuid": "R0003_test", "num_saved_variants": 2, "solve_status": "Partially solved", - "sample_id": "NA20889", + "sample_id": "NA20889", # unused "gene_known_for_phenotype-1": "Candidate", "gene_known_for_phenotype-2": "Candidate", "variant_inheritance-1": "unknown", @@ -52,7 +52,7 @@ "sv_name-2": "DEL:chr1:249045487-249045898", "chrom-2": "1", "pos-2": 249045487, - 'end-2': 249045898, + 'end-2': 249045898, # unused "maternal_id": "", "paternal_id": "", "maternal_guid": "", @@ -71,7 +71,7 @@ "chrom-1": "1", "alt-1": "T", "gene_of_interest-1": "OR4G11P", - "gene_id-1": "ENSG00000240361", + "gene_id-1": "ENSG00000240361", # unused 'variant_reference_assembly-1': 'GRCh37', 'variant_reference_assembly-2': 'GRCh37', "pmid_id": None, @@ -96,16 +96,16 @@ 'seqr_chosen_consequence-2': None, 'gene_of_interest-2': None, 'gene_id-2': None, - 'svName-2': None, + 'svName-2': None, # unused 'svType-1': None, 'sv_name-1': None, 'svName-1': None, 'end-1': None, - 'allele_balance_or_heteroplasmy_percentage-1': None, + 'allele_balance_or_heteroplasmy_percentage-1': None, # unused 'allele_balance_or_heteroplasmy_percentage-2': None, 'notes-1': None, 'notes-2': None, - 'tags-1': ['Tier 1 - Novel gene and phenotype'], + 'tags-1': ['Tier 1 - Novel gene and phenotype'], # unused 'tags-2': ['Tier 1 - Novel gene and phenotype'], 'phenotype_contribution-1': 'Partial', 'phenotype_contribution-2': 'Full', From 360ed6182f7476fe4f5ace394583cceaf384af32 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 26 Jun 2024 17:21:03 -0400 Subject: [PATCH 364/736] share metadata columsn for individual and family metadata --- seqr/views/apis/report_api_tests.py | 2 ++ ui/pages/Report/components/FamilyMetadata.jsx | 17 ++--------------- .../components/IndividualMetadata.jsx | 17 ++--------------- ui/shared/components/table/LoadReportTable.jsx | 2 +- ui/shared/utils/constants.js | 17 +++++++++++++++++ 5 files changed, 24 insertions(+), 31 deletions(-) diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index 222182fd1d..6ff66cbaf4 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -513,6 +513,7 @@ 'svType': None, 'sv_name': None, 'transcript': None, + # TODO missing internal_project_id } PARTICIPANT_TABLE = [ @@ -1195,6 +1196,7 @@ def test_family_metadata(self): 'analysisStatus': 'Q', 'analysis_groups': '', 'consanguinity': 'Unknown', + # TODO missing internal_project_id }) # Test all projects diff --git a/ui/pages/Report/components/FamilyMetadata.jsx b/ui/pages/Report/components/FamilyMetadata.jsx index 412dc55848..93bfde2892 100644 --- a/ui/pages/Report/components/FamilyMetadata.jsx +++ b/ui/pages/Report/components/FamilyMetadata.jsx @@ -1,33 +1,20 @@ import React from 'react' import LoadReportTable from 'shared/components/table/LoadReportTable' -import { FAMILY_ANALYSIS_STATUS_LOOKUP } from 'shared/utils/constants' +import { BASE_FAMILY_METADATA_COLUMNS } from 'shared/utils/constants' const VIEW_ALL_PAGES = [{ name: 'Broad', downloadName: 'All', path: 'all' }] const COLUMNS = [ - { name: 'data_type' }, - { name: 'date_data_generation', format: ({ date_data_generation: date }) => date && new Date(date).toLocaleDateString() }, - { name: 'phenotype_description' }, - { name: 'consanguinity' }, - { - name: 'analysisStatus', - content: 'analysis_status', - format: ({ analysisStatus }) => FAMILY_ANALYSIS_STATUS_LOOKUP[analysisStatus]?.name, - }, - { name: 'solve_status' }, + ...BASE_FAMILY_METADATA_COLUMNS.map(({ secondaryExportColumn, ...col }) => col), { name: 'genes' }, { name: 'actual_inheritance' }, - { name: 'condition_id' }, - { name: 'known_condition_name' }, { name: 'individual_count', content: '# individuals' }, { name: 'family_structure' }, { name: 'proband_id' }, { name: 'paternal_id' }, { name: 'maternal_id' }, { name: 'other_individual_ids' }, - { name: 'analysis_groups' }, - { name: 'pmid_id' }, ] const FamilyMetadata = props => ( diff --git a/ui/pages/SummaryData/components/IndividualMetadata.jsx b/ui/pages/SummaryData/components/IndividualMetadata.jsx index 00bd8c31e4..6da7ac13c9 100644 --- a/ui/pages/SummaryData/components/IndividualMetadata.jsx +++ b/ui/pages/SummaryData/components/IndividualMetadata.jsx @@ -3,7 +3,7 @@ import { connect } from 'react-redux' import { getUser } from 'redux/selectors' import { BaseSemanticInput, BooleanCheckbox } from 'shared/components/form/Inputs' import LoadReportTable from 'shared/components/table/LoadReportTable' -import { FAMILY_ANALYSIS_STATUS_LOOKUP, VARIANT_METADATA_COLUMNS } from 'shared/utils/constants' +import { VARIANT_METADATA_COLUMNS, BASE_FAMILY_METADATA_COLUMNS } from 'shared/utils/constants' const ALL_PROJECTS_PATH = 'all' const GREGOR_PROJECT_PATH = 'gregor' @@ -31,29 +31,16 @@ const AIRTABLE_FIELDS = [ const CORE_COLUMNS = [ { name: 'participant_id', secondaryExportColumn: 'individual_guid' }, - { name: 'pmid_id' }, { name: 'paternal_id', secondaryExportColumn: 'paternal_guid' }, { name: 'maternal_id', secondaryExportColumn: 'maternal_guid' }, { name: 'proband_relationship' }, { name: 'sex' }, { name: 'ancestry' }, - { name: 'condition_id' }, - { name: 'known_condition_name', secondaryExportColumn: 'disorders' }, { name: 'affected_status' }, { name: 'hpo_present', style: { minWidth: '400px' } }, { name: 'hpo_absent', style: { minWidth: '400px' } }, - { name: 'phenotype_description', style: { minWidth: '200px' } }, - { name: 'analysis_groups' }, - { - name: 'analysisStatus', - content: 'analysis_status', - format: ({ analysisStatus }) => FAMILY_ANALYSIS_STATUS_LOOKUP[analysisStatus]?.name, - }, - { name: 'solve_status' }, { name: 'MME' }, - { name: 'data_type' }, - { name: 'date_data_generation', secondaryExportColumn: 'filter_flags' }, - { name: 'consanguinity' }, + ...BASE_FAMILY_METADATA_COLUMNS, { name: 'family_history' }, ] diff --git a/ui/shared/components/table/LoadReportTable.jsx b/ui/shared/components/table/LoadReportTable.jsx index 4a9ab52d41..923cfbe542 100644 --- a/ui/shared/components/table/LoadReportTable.jsx +++ b/ui/shared/components/table/LoadReportTable.jsx @@ -54,7 +54,7 @@ const ReportTable = React.memo(( striped collapsing horizontalScroll - downloadFileName={`${viewAllPages.find(({ path }) => path === projectGuid)?.downloadName || (data?.length && data[0][PROJECT_ID_FIELD].replace(/ /g, '_'))}_${new Date().toISOString().slice(0, 10)}_${fileName}`} + downloadFileName={`${viewAllPages.find(({ path }) => path === projectGuid)?.downloadName || (data?.length && (data[0][PROJECT_ID_FIELD] || '').replace(/ /g, '_'))}_${new Date().toISOString().slice(0, 10)}_${fileName}`} idField={idField} defaultSortColumn="family_id" emptyContent={projectGuid ? '0 cases found' : 'Select a project to view data'} diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js index 882b788c1f..f34ed287a8 100644 --- a/ui/shared/utils/constants.js +++ b/ui/shared/utils/constants.js @@ -1876,6 +1876,23 @@ export const VARIANT_METADATA_COLUMNS = [ { name: 'ClinGen_allele_ID' }, ] +export const BASE_FAMILY_METADATA_COLUMNS = [ + { name: 'pmid_id' }, + { name: 'condition_id' }, + { name: 'known_condition_name', secondaryExportColumn: 'disorders' }, + { name: 'phenotype_description', style: { minWidth: '200px' } }, + { name: 'analysis_groups' }, + { + name: 'analysisStatus', + content: 'analysis_status', + format: ({ analysisStatus }) => FAMILY_ANALYSIS_STATUS_LOOKUP[analysisStatus]?.name, + }, + { name: 'solve_status' }, + { name: 'data_type' }, + { name: 'date_data_generation', secondaryExportColumn: 'filter_flags' }, + { name: 'consanguinity' }, +] + // RNAseq sample tissue type mapping export const TISSUE_DISPLAY = { WB: 'Whole Blood', From c9fe24a414f601656d07b28e950310c7ac73da0a Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 27 Jun 2024 09:50:20 -0400 Subject: [PATCH 365/736] additional unused --- seqr/views/apis/summary_data_api_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seqr/views/apis/summary_data_api_tests.py b/seqr/views/apis/summary_data_api_tests.py index b5acdf21cc..2b71029865 100644 --- a/seqr/views/apis/summary_data_api_tests.py +++ b/seqr/views/apis/summary_data_api_tests.py @@ -112,7 +112,7 @@ 'partial_contribution_explained-1': 'HP:0000501|HP:0000365', 'partial_contribution_explained-2': '', 'condition_id': 'OMIM:616126', - 'condition_inheritance': 'Autosomal recessive', + 'condition_inheritance': 'Autosomal recessive', # unused 'known_condition_name': 'Immunodeficiency 38', 'ClinGen_allele_ID-1': 'CA1501729', 'ClinGen_allele_ID-2': None, From 77cb64c8ff8576300fc0355b1bd0e3434e93b7cd Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 27 Jun 2024 09:53:28 -0400 Subject: [PATCH 366/736] add condition_inheritance to metadata --- seqr/views/apis/report_api_tests.py | 2 +- seqr/views/apis/summary_data_api_tests.py | 2 +- ui/shared/utils/constants.js | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index 6ff66cbaf4..5b41b96c1d 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -1179,7 +1179,7 @@ def test_family_metadata(self): 'solve_status': 'Partially solved', 'actual_inheritance': 'unknown', 'condition_id': 'OMIM:616126', - 'condition_inheritance': 'Autosomal recessive', # unused + 'condition_inheritance': 'Autosomal recessive', 'known_condition_name': 'Immunodeficiency 38', 'date_data_generation': '2017-02-05', 'data_type': 'WES', diff --git a/seqr/views/apis/summary_data_api_tests.py b/seqr/views/apis/summary_data_api_tests.py index 2b71029865..b5acdf21cc 100644 --- a/seqr/views/apis/summary_data_api_tests.py +++ b/seqr/views/apis/summary_data_api_tests.py @@ -112,7 +112,7 @@ 'partial_contribution_explained-1': 'HP:0000501|HP:0000365', 'partial_contribution_explained-2': '', 'condition_id': 'OMIM:616126', - 'condition_inheritance': 'Autosomal recessive', # unused + 'condition_inheritance': 'Autosomal recessive', 'known_condition_name': 'Immunodeficiency 38', 'ClinGen_allele_ID-1': 'CA1501729', 'ClinGen_allele_ID-2': None, diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js index f34ed287a8..e4f6838430 100644 --- a/ui/shared/utils/constants.js +++ b/ui/shared/utils/constants.js @@ -1879,7 +1879,8 @@ export const VARIANT_METADATA_COLUMNS = [ export const BASE_FAMILY_METADATA_COLUMNS = [ { name: 'pmid_id' }, { name: 'condition_id' }, - { name: 'known_condition_name', secondaryExportColumn: 'disorders' }, + { name: 'known_condition_name' }, + { name: 'condition_inheritance', secondaryExportColumn: 'disorders' }, { name: 'phenotype_description', style: { minWidth: '200px' } }, { name: 'analysis_groups' }, { From b2fc617a682745bbea97a1ca7b58b3e3499d74ec Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 27 Jun 2024 10:01:54 -0400 Subject: [PATCH 367/736] remove unused sample_id --- seqr/views/apis/summary_data_api.py | 27 ++++++++++++----------- seqr/views/apis/summary_data_api_tests.py | 2 -- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/seqr/views/apis/summary_data_api.py b/seqr/views/apis/summary_data_api.py index 2c8663b76d..8e557c4a4a 100644 --- a/seqr/views/apis/summary_data_api.py +++ b/seqr/views/apis/summary_data_api.py @@ -289,24 +289,25 @@ def _add_row(row, family_id, row_type): parsed_row = {'{}-{}'.format(k, i + 1): v for k, v in discovery_row.items()} parsed_row['num_saved_variants'] = len(row) rows_by_subject_family_id[(participant_id, family_id)].update(parsed_row) - else: + elif row_type == SUBJECT_ROW_TYPE: row_key = (row['participant_id'], family_id) collaborator = row.pop('Collaborator', None) if collaborator: collaborator_map[row_key] = collaborator - if row_type == SUBJECT_ROW_TYPE: - race = row.pop('reported_race') - ancestry_detail = row.pop('ancestry_detail') - ethnicity = row.pop('reported_ethnicity') - row['ancestry'] = ethnicity or ancestry_detail or race - if 'features' in row: - row.update({ - 'hpo_present': [feature['id'] for feature in row.pop('features') or []], - 'hpo_absent': [feature['id'] for feature in row.pop('absent_features') or []], - }) - all_features.update(row['hpo_present']) - all_features.update(row['hpo_absent']) + race = row.pop('reported_race') + ancestry_detail = row.pop('ancestry_detail') + ethnicity = row.pop('reported_ethnicity') + row['ancestry'] = ethnicity or ancestry_detail or race + row.update({ + 'hpo_present': [feature['id'] for feature in row.pop('features') or []], + 'hpo_absent': [feature['id'] for feature in row.pop('absent_features') or []], + }) + all_features.update(row['hpo_present']) + all_features.update(row['hpo_absent']) rows_by_subject_family_id[row_key].update(row) + else: + row.pop('sample_id') + rows_by_subject_family_id[(row['participant_id'], family_id)].update(row) parse_anvil_metadata( projects, request.user, _add_row, max_loaded_date=request.GET.get('loadedBefore'), diff --git a/seqr/views/apis/summary_data_api_tests.py b/seqr/views/apis/summary_data_api_tests.py index b5acdf21cc..2044fc3019 100644 --- a/seqr/views/apis/summary_data_api_tests.py +++ b/seqr/views/apis/summary_data_api_tests.py @@ -36,7 +36,6 @@ "projectGuid": "R0003_test", "num_saved_variants": 2, "solve_status": "Partially solved", - "sample_id": "NA20889", # unused "gene_known_for_phenotype-1": "Candidate", "gene_known_for_phenotype-2": "Candidate", "variant_inheritance-1": "unknown", @@ -126,7 +125,6 @@ EXPECTED_SAMPLE_METADATA_ROW.update(EXPECTED_NO_AIRTABLE_SAMPLE_METADATA_ROW) EXPECTED_NO_GENE_SAMPLE_METADATA_ROW = { 'participant_id': 'NA21234', - 'sample_id': 'NA21234', 'familyGuid': 'F000014_14', 'family_id': '14', 'displayName': '14', From 178584da450e4266244e703dda1e9ddcdcc81fed Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 27 Jun 2024 10:16:32 -0400 Subject: [PATCH 368/736] add back internal_project_id --- seqr/views/apis/report_api.py | 5 +++-- seqr/views/apis/report_api_tests.py | 7 +++++-- ui/shared/components/table/LoadReportTable.jsx | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index a00e965e1b..f13626681c 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -14,7 +14,7 @@ from seqr.views.utils.airtable_utils import AirtableSession from seqr.views.utils.anvil_metadata_utils import parse_anvil_metadata, anvil_export_airtable_fields, \ FAMILY_ROW_TYPE, SUBJECT_ROW_TYPE, SAMPLE_ROW_TYPE, DISCOVERY_ROW_TYPE, PARTICIPANT_TABLE, PHENOTYPE_TABLE, \ - EXPERIMENT_TABLE, EXPERIMENT_LOOKUP_TABLE, FINDINGS_TABLE, GENE_COLUMN + EXPERIMENT_TABLE, EXPERIMENT_LOOKUP_TABLE, FINDINGS_TABLE, GENE_COLUMN, FAMILY_INDIVIDUAL_FIELDS from seqr.views.utils.export_utils import export_multiple_files, write_multiple_files_to_gs from seqr.views.utils.json_utils import create_json_response from seqr.views.utils.permissions_utils import analyst_required, get_project_and_check_permissions, \ @@ -864,7 +864,7 @@ def _add_row(row, family_id, row_type): individuals_ids -= set(known_ids.values()) individual = proband or next(iter(individuals_by_id.values()), None) if individual: - f.update({k: individual[k] for k in ['phenotype_description', 'pmid_id', 'solve_status']}) + f.update({k: individual[k] for k in FAMILY_INDIVIDUAL_FIELDS}) sorted_samples = sorted(individuals_by_id.values(), key=lambda x: x.get('date_data_generation', '')) earliest_sample = next((s for s in [proband or {}] + sorted_samples if s.get('date_data_generation')), {}) @@ -926,6 +926,7 @@ def _add_row(row, family_id, row_type): families_by_id[family_id] = row elif row_type == SUBJECT_ROW_TYPE: participant_mme[row['participant_id']] = row.get('MME', {}) + families_by_id[family_id]['internal_project_id'] = row['internal_project_id'] elif row_type == DISCOVERY_ROW_TYPE: family = families_by_id[family_id] for variant in row: diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index 5b41b96c1d..84b1d6e3f1 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -492,6 +492,7 @@ } BASE_VARIANT_METADATA_ROW = { + 'internal_project_id': '1kg project nåme with uniçøde', 'ClinGen_allele_ID': None, 'MME': False, 'additional_family_members_with_variant': '', @@ -513,7 +514,6 @@ 'svType': None, 'sv_name': None, 'transcript': None, - # TODO missing internal_project_id } PARTICIPANT_TABLE = [ @@ -1173,6 +1173,7 @@ def test_family_metadata(self): test_row = next(r for r in response_json['rows'] if r['familyGuid'] == 'F000012_12') self.assertDictEqual(test_row, { 'projectGuid': 'R0003_test', + 'internal_project_id': 'Test Reprocessed Project', 'familyGuid': 'F000012_12', 'family_id': '12', 'displayName': '12', @@ -1196,7 +1197,6 @@ def test_family_metadata(self): 'analysisStatus': 'Q', 'analysis_groups': '', 'consanguinity': 'Unknown', - # TODO missing internal_project_id }) # Test all projects @@ -1212,6 +1212,7 @@ def test_family_metadata(self): test_row = next(r for r in response_json['rows'] if r['familyGuid'] == 'F000003_3') self.assertDictEqual(test_row, { 'projectGuid': 'R0001_1kg', + 'internal_project_id': '1kg project nåme with uniçøde', 'familyGuid': 'F000003_3', 'family_id': '3', 'displayName': '3', @@ -1345,6 +1346,7 @@ def test_variant_metadata(self): 'partial_contribution_explained': 'HP:0000501|HP:0000365', 'phenotype_contribution': 'Partial', 'projectGuid': 'R0003_test', + 'internal_project_id': 'Test Reprocessed Project', 'ref': 'TC', 'seqr_chosen_consequence': 'intron_variant', 'tags': ['Tier 1 - Novel gene and phenotype'], @@ -1372,6 +1374,7 @@ def test_variant_metadata(self): 'participant_id': 'NA20889', 'pos': 249045487, 'projectGuid': 'R0003_test', + 'internal_project_id': 'Test Reprocessed Project', 'ref': None, 'svType': 'DEL', 'sv_name': 'DEL:chr1:249045487-249045898', diff --git a/ui/shared/components/table/LoadReportTable.jsx b/ui/shared/components/table/LoadReportTable.jsx index 923cfbe542..4a9ab52d41 100644 --- a/ui/shared/components/table/LoadReportTable.jsx +++ b/ui/shared/components/table/LoadReportTable.jsx @@ -54,7 +54,7 @@ const ReportTable = React.memo(( striped collapsing horizontalScroll - downloadFileName={`${viewAllPages.find(({ path }) => path === projectGuid)?.downloadName || (data?.length && (data[0][PROJECT_ID_FIELD] || '').replace(/ /g, '_'))}_${new Date().toISOString().slice(0, 10)}_${fileName}`} + downloadFileName={`${viewAllPages.find(({ path }) => path === projectGuid)?.downloadName || (data?.length && data[0][PROJECT_ID_FIELD].replace(/ /g, '_'))}_${new Date().toISOString().slice(0, 10)}_${fileName}`} idField={idField} defaultSortColumn="family_id" emptyContent={projectGuid ? '0 cases found' : 'Select a project to view data'} From 27ae1c45a4bcabca0a80aa6fc4428c79b552c05b Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 27 Jun 2024 10:27:00 -0400 Subject: [PATCH 369/736] use end in variant metadata reports --- seqr/views/apis/report_api_tests.py | 2 +- seqr/views/apis/summary_data_api_tests.py | 2 +- ui/shared/utils/constants.js | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index 84b1d6e3f1..6682340912 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -502,7 +502,7 @@ 'clinvar': None, 'condition_id': None, 'consanguinity': 'Unknown', # unused - 'end': None, # unused + 'end': None, 'hgvsc': '', 'hgvsp': '', 'method_of_discovery': 'SR-ES', diff --git a/seqr/views/apis/summary_data_api_tests.py b/seqr/views/apis/summary_data_api_tests.py index 2044fc3019..30dcfc54a0 100644 --- a/seqr/views/apis/summary_data_api_tests.py +++ b/seqr/views/apis/summary_data_api_tests.py @@ -51,7 +51,7 @@ "sv_name-2": "DEL:chr1:249045487-249045898", "chrom-2": "1", "pos-2": 249045487, - 'end-2': 249045898, # unused + 'end-2': 249045898, "maternal_id": "", "paternal_id": "", "maternal_guid": "", diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js index e4f6838430..5ddcac1e36 100644 --- a/ui/shared/utils/constants.js +++ b/ui/shared/utils/constants.js @@ -1858,6 +1858,7 @@ export const VARIANT_METADATA_COLUMNS = [ { name: 'variant_reference_assembly' }, { name: 'chrom' }, { name: 'pos' }, + { name: 'end' }, { name: 'ref' }, { name: 'alt' }, { name: 'gene_of_interest' }, From aff110714ce5fb351edd64a2f2bf3a7c771228e1 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 27 Jun 2024 10:30:56 -0400 Subject: [PATCH 370/736] remove unused svName from metadata --- seqr/views/apis/report_api_tests.py | 1 - seqr/views/apis/summary_data_api_tests.py | 5 +---- seqr/views/utils/anvil_metadata_utils.py | 3 ++- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index 6682340912..e24bd3ba1d 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -510,7 +510,6 @@ 'phenotype_contribution': 'Full', 'partial_contribution_explained': '', 'seqr_chosen_consequence': None, - 'svName': None, # unused 'svType': None, 'sv_name': None, 'transcript': None, diff --git a/seqr/views/apis/summary_data_api_tests.py b/seqr/views/apis/summary_data_api_tests.py index 30dcfc54a0..7f32e2443d 100644 --- a/seqr/views/apis/summary_data_api_tests.py +++ b/seqr/views/apis/summary_data_api_tests.py @@ -70,7 +70,7 @@ "chrom-1": "1", "alt-1": "T", "gene_of_interest-1": "OR4G11P", - "gene_id-1": "ENSG00000240361", # unused + "gene_id-1": "ENSG00000240361", 'variant_reference_assembly-1': 'GRCh37', 'variant_reference_assembly-2': 'GRCh37', "pmid_id": None, @@ -95,10 +95,8 @@ 'seqr_chosen_consequence-2': None, 'gene_of_interest-2': None, 'gene_id-2': None, - 'svName-2': None, # unused 'svType-1': None, 'sv_name-1': None, - 'svName-1': None, 'end-1': None, 'allele_balance_or_heteroplasmy_percentage-1': None, # unused 'allele_balance_or_heteroplasmy_percentage-2': None, @@ -171,7 +169,6 @@ 'hgvsp-1': '', 'notes-1': None, 'seqr_chosen_consequence-1': None, - 'svName-1': None, 'svType-1': None, 'sv_name-1': None, 'transcript-1': None, diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index 4c1f58cad7..ba5f61def3 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -271,8 +271,9 @@ def _get_nested_variant_name(v): def _get_sv_name(variant_json): + sv_name = variant_json.pop('svName', None) if variant_json.get('svType'): - return variant_json.get('svName') or '{svType}:chr{chrom}:{pos}-{end}'.format(**variant_json) + return sv_name or '{svType}:chr{chrom}:{pos}-{end}'.format(**variant_json) return None From cae8a8cfc74807b56db62eb90a77e1e4b6d5e021 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 27 Jun 2024 10:33:49 -0400 Subject: [PATCH 371/736] cleanup --- seqr/views/utils/anvil_metadata_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index ba5f61def3..5011e2eb44 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -267,11 +267,11 @@ def parse_anvil_metadata( def _get_nested_variant_name(v): - return _get_sv_name(v) or f"{v['chrom']}-{v['pos']}-{v['ref']}-{v['alt']}" + return _get_sv_name(v, pop_sv_name=False) or f"{v['chrom']}-{v['pos']}-{v['ref']}-{v['alt']}" -def _get_sv_name(variant_json): - sv_name = variant_json.pop('svName', None) +def _get_sv_name(variant_json, pop_sv_name=True): + sv_name = variant_json.pop('svName', None) if pop_sv_name else variant_json.get('svName') if variant_json.get('svType'): return sv_name or '{svType}:chr{chrom}:{pos}-{end}'.format(**variant_json) return None From 3735f7988c595aee5be1b83739a1e9f05c82b33d Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 27 Jun 2024 11:40:44 -0400 Subject: [PATCH 372/736] note breakdown for include_metadata --- seqr/views/apis/report_api_tests.py | 4 ++-- seqr/views/utils/anvil_metadata_utils.py | 18 +++++++++--------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index e24bd3ba1d..7624509b1b 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -1189,7 +1189,7 @@ def test_family_metadata(self): 'other_individual_ids': 'NA20870; NA20888', 'individual_count': 3, 'family_structure': 'other', - 'family_history': 'Yes', + 'family_history': 'Yes', # unused 'genes': 'DEL:chr1:249045487-249045898; OR4G11P', 'pmid_id': None, 'phenotype_description': None, @@ -1365,7 +1365,7 @@ def test_variant_metadata(self): 'end': 249045898, 'familyGuid': 'F000012_12', 'family_id': '12', - 'family_history': 'Yes', + 'family_history': 'Yes', # unused 'gene_of_interest': None, 'gene_id': None, 'gene_known_for_phenotype': 'Candidate', diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index 5011e2eb44..0bba83abc1 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -132,14 +132,14 @@ def _get_family_metadata(family_filter, family_fields, include_metadata, include Value('\t'), Value(' '), ), analysisStatus=F('analysis_status'), - **(METADATA_FAMILY_VALUES if include_metadata else {}), + **(METADATA_FAMILY_VALUES if include_metadata else {}), # TODO analysis_groups: individual/family, rest all **{k: v['value'] for k, v in (family_fields or {}).items()} ) family_data_by_id = {} for f in family_data: family_id = f.pop('id') - analysis_status = f['analysisStatus'] if include_metadata else f.pop('analysisStatus') + analysis_status = f['analysisStatus'] if include_metadata else f.pop('analysisStatus') # TODO individual/family solve_status = ANALYSIS_SOLVE_STATUS_LOOKUP.get(analysis_status, Individual.UNSOLVED) f.update({ 'solve_status': Individual.SOLVE_STATUS_LOOKUP[solve_status], @@ -148,13 +148,12 @@ def _get_family_metadata(family_filter, family_fields, include_metadata, include if format_id: f.update({k: format_id(f[k]) for k in ['family_id', 'internal_project_id']}) if include_metadata: - f['analysis_groups'] = '; '.join(f['analysis_groups']) + f['analysis_groups'] = '; '.join(f['analysis_groups']) # TODO individual/family family_data_by_id[family_id] = f return family_data_by_id -# TODO clean up args def parse_anvil_metadata( projects: Iterable[Project], user: User, add_row: Callable[[dict, str, str], None], max_loaded_date: str = None, family_fields: dict = None, format_id: Callable[[str], str] = lambda s: s, @@ -194,7 +193,7 @@ def parse_anvil_metadata( list(sample_ids) or [i[0] for i in individual_ids_map.values()], user, airtable_fields) matchmaker_individuals = {m['individual_id']: m for m in MatchmakerSubmission.objects.filter( - individual__in=individual_samples).values('individual_id', **(mme_values or {}))} if include_metadata else {} + individual__in=individual_samples).values('individual_id', **(mme_values or {}))} if include_metadata else {} # TODO individual/variant, already dropped for family for family_id, family_subject_row in family_data_by_id.items(): saved_variants = saved_variants_by_family[family_id] @@ -207,7 +206,7 @@ def parse_anvil_metadata( affected_individuals = [ individual for individual in family_individuals if individual.affected == Individual.AFFECTED_STATUS_AFFECTED - ] if include_metadata else [] + ] if include_metadata else [] # TODO individual only subject_family_row = {k: family_subject_row.pop(k) for k in FAMILY_INDIVIDUAL_FIELDS} family_row = { @@ -377,8 +376,8 @@ def _get_parsed_saved_discovery_variants_by_family( } if include_metadata: parsed_variant.update({ - 'seqr_chosen_consequence': main_transcript.get('majorConsequence'), - 'tags': variant.tags, + 'seqr_chosen_consequence': main_transcript.get('majorConsequence'), # TODO individual/variant, currently not in family + 'tags': variant.tags, # TODO variant only }) variants.append(parsed_variant) @@ -386,7 +385,7 @@ def _get_parsed_saved_discovery_variants_by_family( saved_variants_by_family = defaultdict(list) for row in variants: - gene_id = row['gene_id'] if include_metadata else row.pop('gene_id') + gene_id = row['gene_id'] if include_metadata else row.pop('gene_id') # TODO individual only, currently not in family, add to variant? row[GENE_COLUMN] = genes_by_id.get(gene_id, {}).get('geneSymbol') family_id = row.pop('family_id') saved_variants_by_family[family_id].append(row) @@ -464,6 +463,7 @@ def _get_sample_row(sample, participant_id, has_dbgap_submission, airtable_metad if has_dbgap_submission: sample_row['dbgap_sample_id'] = airtable_metadata.get('dbgap_sample_id', '') if include_metadata: + # TODO individual/family, currently not in variant sample_row.update({ 'data_type': sample.sample_type, 'date_data_generation': sample.loaded_date.strftime('%Y-%m-%d'), From eb4cb9a011cc27ffc08e85c524af6b565d29b4fe Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 27 Jun 2024 11:45:59 -0400 Subject: [PATCH 373/736] only return tags in variant metadta --- seqr/views/apis/report_api.py | 1 + seqr/views/apis/summary_data_api_tests.py | 3 --- seqr/views/utils/anvil_metadata_utils.py | 8 ++++---- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index f13626681c..1557d09628 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -944,6 +944,7 @@ def _add_row(row, family_id, row_type): individual_data_types={i.individual_id: i.data_types for i in individuals}, add_row=_add_row, variant_json_fields=['clinvar', 'variantId'], + variant_attr_fields=['tags'], mme_values={'variant_ids': ArrayAgg('matchmakersubmissiongenes__saved_variant__saved_variant_json__variantId')}, include_metadata=True, include_mondo=True, diff --git a/seqr/views/apis/summary_data_api_tests.py b/seqr/views/apis/summary_data_api_tests.py index 7f32e2443d..eac67d64f8 100644 --- a/seqr/views/apis/summary_data_api_tests.py +++ b/seqr/views/apis/summary_data_api_tests.py @@ -102,8 +102,6 @@ 'allele_balance_or_heteroplasmy_percentage-2': None, 'notes-1': None, 'notes-2': None, - 'tags-1': ['Tier 1 - Novel gene and phenotype'], # unused - 'tags-2': ['Tier 1 - Novel gene and phenotype'], 'phenotype_contribution-1': 'Partial', 'phenotype_contribution-2': 'Full', 'partial_contribution_explained-1': 'HP:0000501|HP:0000365', @@ -154,7 +152,6 @@ 'alt-1': 'T', 'chrom-1': '1', 'gene_known_for_phenotype-1': 'Candidate', - 'tags-1': ['Tier 1 - Novel gene and phenotype'], 'phenotype_contribution-1': 'Full', 'partial_contribution_explained-1': '', 'pos-1': 248367227, diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index 0bba83abc1..6178157b89 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -161,7 +161,7 @@ def parse_anvil_metadata( get_additional_individual_fields: Callable[[Individual, dict], dict] = None, individual_samples: dict[Individual, Sample] = None, individual_data_types: dict[str, Iterable[str]] = None, airtable_fields: Iterable[str] = None, mme_values: dict = None, include_svs: bool = True, - variant_json_fields: Iterable[str] = None, post_process_variant: Callable[[dict, list[dict]], dict] = None, + variant_json_fields: Iterable[str] = None, variant_attr_fields: Iterable[str] = None, post_process_variant: Callable[[dict, list[dict]], dict] = None, include_no_individual_families: bool = False, omit_airtable: bool = False, include_metadata: bool = False, include_discovery_sample_id: bool = False, include_mondo: bool = False, include_parent_mnvs: bool = False, proband_only_variants: bool = False): @@ -184,7 +184,7 @@ def parse_anvil_metadata( sample_ids.add(sample.sample_id) saved_variants_by_family = _get_parsed_saved_discovery_variants_by_family( - list(family_data_by_id.keys()), include_metadata, include_svs=include_svs, variant_json_fields=variant_json_fields, + list(family_data_by_id.keys()), include_metadata, include_svs, variant_json_fields, variant_attr_fields, ) condition_map = _get_condition_map(family_data_by_id.values()) @@ -329,6 +329,7 @@ def _post_process_variant_metadata(v, gene_variants, include_parent_mnvs=False): def _get_parsed_saved_discovery_variants_by_family( families: Iterable[Family], include_metadata: bool, include_svs: dict, variant_json_fields: list[str], + variant_attr_fields: list[str], ): tag_types = VariantTagType.objects.filter(project__isnull=True, category=DISCOVERY_CATEGORY) @@ -372,12 +373,11 @@ def _get_parsed_saved_discovery_variants_by_family( **{k: _get_transcript_field(k, config, main_transcript) for k, config in TRANSCRIPT_FIELDS.items()}, **{k: variant_json.get(k) for k in variant_fields + (variant_json_fields or [])}, 'ClinGen_allele_ID': variant_json.get('CAID'), - **{k: getattr(variant, k) for k in ['family_id', 'ref', 'alt']}, + **{k: getattr(variant, k) for k in ['family_id', 'ref', 'alt'] + (variant_attr_fields or [])}, } if include_metadata: parsed_variant.update({ 'seqr_chosen_consequence': main_transcript.get('majorConsequence'), # TODO individual/variant, currently not in family - 'tags': variant.tags, # TODO variant only }) variants.append(parsed_variant) From 76ccd224139c069c667171d82b110c2493c257c6 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 27 Jun 2024 11:57:06 -0400 Subject: [PATCH 374/736] clean up family_history computation --- seqr/views/apis/report_api_tests.py | 3 --- seqr/views/apis/summary_data_api.py | 4 ++++ seqr/views/utils/anvil_metadata_utils.py | 6 ------ 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index 7624509b1b..8f65cf7dbe 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -1189,7 +1189,6 @@ def test_family_metadata(self): 'other_individual_ids': 'NA20870; NA20888', 'individual_count': 3, 'family_structure': 'other', - 'family_history': 'Yes', # unused 'genes': 'DEL:chr1:249045487-249045898; OR4G11P', 'pmid_id': None, 'phenotype_description': None, @@ -1333,7 +1332,6 @@ def test_variant_metadata(self): 'displayName': '12', 'familyGuid': 'F000012_12', 'family_id': '12', - 'family_history': 'Yes', # unused 'gene_of_interest': 'OR4G11P', 'gene_id': 'ENSG00000240361', 'gene_known_for_phenotype': 'Candidate', @@ -1365,7 +1363,6 @@ def test_variant_metadata(self): 'end': 249045898, 'familyGuid': 'F000012_12', 'family_id': '12', - 'family_history': 'Yes', # unused 'gene_of_interest': None, 'gene_id': None, 'gene_known_for_phenotype': 'Candidate', diff --git a/seqr/views/apis/summary_data_api.py b/seqr/views/apis/summary_data_api.py index 8e557c4a4a..7c4bdb2f7e 100644 --- a/seqr/views/apis/summary_data_api.py +++ b/seqr/views/apis/summary_data_api.py @@ -294,6 +294,9 @@ def _add_row(row, family_id, row_type): collaborator = row.pop('Collaborator', None) if collaborator: collaborator_map[row_key] = collaborator + is_additional_affected = row.pop('is_additional_affected') + if is_additional_affected: + family_rows_by_id[family_id]['family_history'] = 'Yes' race = row.pop('reported_race') ancestry_detail = row.pop('ancestry_detail') ethnicity = row.pop('reported_ethnicity') @@ -320,6 +323,7 @@ def _add_row(row, family_id, row_type): 'filter_flags': json.dumps(individual.filter_flags) if individual.filter_flags else '', 'paternal_guid': paternal_ids[1], 'maternal_guid': maternal_ids[1], + 'is_additional_affected': individual.affected == Individual.AFFECTED_STATUS_AFFECTED and individual.proband_relationship != Individual.SELF_RELATIONSHIP, **anvil_export_airtable_fields(airtable_metadata, has_dbgap_submission), }, ) diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index 6178157b89..d0e003b47b 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -204,10 +204,6 @@ def parse_anvil_metadata( family_subject_row, saved_variants, *condition_map, set_conditions_for_variants=proband_only_variants, ) - affected_individuals = [ - individual for individual in family_individuals if individual.affected == Individual.AFFECTED_STATUS_AFFECTED - ] if include_metadata else [] # TODO individual only - subject_family_row = {k: family_subject_row.pop(k) for k in FAMILY_INDIVIDUAL_FIELDS} family_row = { 'family_id': subject_family_row['family_id'], @@ -217,8 +213,6 @@ def parse_anvil_metadata( ), 'Unknown'), **family_subject_row, } - if len(affected_individuals) > 1: - family_row['family_history'] = 'Yes' add_row(family_row, family_id, FAMILY_ROW_TYPE) for individual in family_individuals: From 4f810b0847c9a2635aa15e01b35266f58f56eb69 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Thu, 27 Jun 2024 12:01:26 -0400 Subject: [PATCH 375/736] hopefully just these changes --- requirements.in | 2 +- requirements.txt | 6 ++---- seqr/utils/middleware.py | 2 +- settings.py | 9 ++++++--- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/requirements.in b/requirements.in index 9ddc62799f..4a76de1a1f 100644 --- a/requirements.in +++ b/requirements.in @@ -1,4 +1,4 @@ -Django<3.3 # core server-side framework +Django==4.2 # core server-side framework django-anymail # for sending emails using cloud-based mail service providers django-csp # for setting CSP headers django-guardian # object-level permissions for database records. Behind a major version due to missing Python 2 support diff --git a/requirements.txt b/requirements.txt index 8e7ef47708..83322e1a7a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -26,7 +26,7 @@ defusedxml==0.7.1 # via # python3-openid # social-auth-core -django==3.2.25 +django==4.2 # via # -r requirements.in # django-anymail @@ -126,9 +126,7 @@ python-dateutil==2.8.2 python3-openid==3.2.0 # via social-auth-core pytz==2022.7.1 - # via - # django - # django-notifications-hq + # via django-notifications-hq redis==4.5.4 # via -r requirements.in requests==2.32.2 diff --git a/seqr/utils/middleware.py b/seqr/utils/middleware.py index 1ee3e53195..33d22532a3 100644 --- a/seqr/utils/middleware.py +++ b/seqr/utils/middleware.py @@ -104,7 +104,7 @@ def process_response(request, response): # conforms to the httpRequest json spec for stackdriver: https://cloud.google.com/logging/docs/reference/v2/rest/v2/LogEntry#HttpRequest http_json = { 'requestMethod': request.method, - 'requestUrl': request.get_raw_uri(), + 'requestUrl': request.build_absolute_uri(), 'status': response.status_code, 'responseSize': len(response.content) if hasattr(response, 'content') else request.META.get('CONTENT_LENGTH'), 'userAgent': request.META.get('HTTP_USER_AGENT'), diff --git a/settings.py b/settings.py index b7fd626bfa..430c9082a1 100644 --- a/settings.py +++ b/settings.py @@ -128,10 +128,12 @@ USE_TZ = True # Static files (CSS, JavaScript, Images) -# https://docs.djangoproject.com/en/1.10/howto/static-files/ +# https://docs.djangoproject.com/en/4.2/howto/static-files/ STATIC_URL = '/static/' -STATICFILES_DIRS = ['ui/dist'] -STATIC_ROOT = os.path.join(BASE_DIR, 'static') +STATICFILES_DIRS = [ + os.path.join(BASE_DIR, 'static'), + 'ui/dist', +] STATICFILES_FINDERS = ( 'django.contrib.staticfiles.finders.FileSystemFinder', 'django.contrib.staticfiles.finders.AppDirectoriesFinder', @@ -262,6 +264,7 @@ 'http://localhost:3000', 'http://localhost:8000', ) + # TODO: will docker build fail if STATICFILES_DIRS always contains both? # the collectstatic step in docker build runs without env variables set, and uncommenting these lines breaks the docker build # STATICFILES_DIRS.append(STATIC_ROOT) # STATIC_ROOT = None From 4be34fd04f40a35cffc1abdef3416daa0f4870a1 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 27 Jun 2024 12:10:36 -0400 Subject: [PATCH 376/736] clean up mme annotation --- seqr/views/apis/report_api.py | 4 ++-- seqr/views/apis/summary_data_api.py | 1 + seqr/views/utils/anvil_metadata_utils.py | 10 +++++----- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 1557d09628..069032c125 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -931,7 +931,7 @@ def _add_row(row, family_id, row_type): family = families_by_id[family_id] for variant in row: variant_rows.append({ - 'MME': variant.pop('variantId') in participant_mme[variant['participant_id']].get('variant_ids', []), + 'MME': variant.pop('variantId') in (participant_mme[variant['participant_id']] or []), 'phenotype_contribution': 'Full', **family, **variant, @@ -945,7 +945,7 @@ def _add_row(row, family_id, row_type): add_row=_add_row, variant_json_fields=['clinvar', 'variantId'], variant_attr_fields=['tags'], - mme_values={'variant_ids': ArrayAgg('matchmakersubmissiongenes__saved_variant__saved_variant_json__variantId')}, + mme_value=ArrayAgg('matchmakersubmissiongenes__saved_variant__saved_variant_json__variantId'), include_metadata=True, include_mondo=True, omit_airtable=True, diff --git a/seqr/views/apis/summary_data_api.py b/seqr/views/apis/summary_data_api.py index 7c4bdb2f7e..022480ce0c 100644 --- a/seqr/views/apis/summary_data_api.py +++ b/seqr/views/apis/summary_data_api.py @@ -316,6 +316,7 @@ def _add_row(row, family_id, row_type): projects, request.user, _add_row, max_loaded_date=request.GET.get('loadedBefore'), include_metadata=True, omit_airtable=not include_airtable, + mme_value=Value('Yes'), get_additional_individual_fields=lambda individual, airtable_metadata, has_dbgap_submission, maternal_ids, paternal_ids: { 'Collaborator': (airtable_metadata or {}).get('Collaborator'), 'individual_guid': individual.guid, diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index d0e003b47b..ee87e168db 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -1,6 +1,6 @@ from collections import defaultdict from datetime import datetime -from django.db.models import F, Q, Value, CharField +from django.db.models import F, Q, Value, CharField, Aggregate from django.db.models.functions import Replace from django.contrib.auth.models import User from django.contrib.postgres.aggregates import ArrayAgg @@ -160,7 +160,7 @@ def parse_anvil_metadata( get_additional_sample_fields: Callable[[Sample, dict], dict] = None, get_additional_individual_fields: Callable[[Individual, dict], dict] = None, individual_samples: dict[Individual, Sample] = None, individual_data_types: dict[str, Iterable[str]] = None, - airtable_fields: Iterable[str] = None, mme_values: dict = None, include_svs: bool = True, + airtable_fields: Iterable[str] = None, mme_value: Aggregate = None, include_svs: bool = True, variant_json_fields: Iterable[str] = None, variant_attr_fields: Iterable[str] = None, post_process_variant: Callable[[dict, list[dict]], dict] = None, include_no_individual_families: bool = False, omit_airtable: bool = False, include_metadata: bool = False, include_discovery_sample_id: bool = False, include_mondo: bool = False, include_parent_mnvs: bool = False, @@ -192,8 +192,8 @@ def parse_anvil_metadata( sample_airtable_metadata = None if omit_airtable else _get_sample_airtable_metadata( list(sample_ids) or [i[0] for i in individual_ids_map.values()], user, airtable_fields) - matchmaker_individuals = {m['individual_id']: m for m in MatchmakerSubmission.objects.filter( - individual__in=individual_samples).values('individual_id', **(mme_values or {}))} if include_metadata else {} # TODO individual/variant, already dropped for family + matchmaker_individuals = {m['individual_id']: m['value'] for m in MatchmakerSubmission.objects.filter( + individual__in=individual_samples).values('individual_id', value=mme_value)} if mme_value else {} for family_id, family_subject_row in family_data_by_id.items(): saved_variants = saved_variants_by_family[family_id] @@ -233,7 +233,7 @@ def parse_anvil_metadata( format_id, ) if individual.id in matchmaker_individuals: - subject_row['MME'] = matchmaker_individuals[individual.id] if mme_values else 'Yes' + subject_row['MME'] = matchmaker_individuals[individual.id] subject_row.update(subject_family_row) if individual.solve_status: subject_row['solve_status'] = Individual.SOLVE_STATUS_LOOKUP[individual.solve_status] From f006ded12856725998fe74a1226fcd7863c7c69b Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 27 Jun 2024 12:25:02 -0400 Subject: [PATCH 377/736] specific family metadata fields --- seqr/views/apis/report_api.py | 4 +++- seqr/views/apis/report_api_tests.py | 2 -- seqr/views/apis/summary_data_api.py | 2 ++ seqr/views/utils/anvil_metadata_utils.py | 26 ++++++++++++------------ 4 files changed, 18 insertions(+), 16 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 069032c125..70d02cca9f 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -847,7 +847,8 @@ def _add_row(row, family_id, row_type): family['inheritance_models'].update({v['variant_inheritance'] for v in row}) parse_anvil_metadata( - projects, user=request.user, add_row=_add_row, omit_airtable=True, include_metadata=True, include_no_individual_families=True) + projects, user=request.user, add_row=_add_row, omit_airtable=True, include_metadata=True, + include_family_name_display=True, include_family_sample_metadata=True, include_no_individual_families=True) for family_id, f in families_by_id.items(): individuals_by_id = family_individuals[family_id] @@ -947,6 +948,7 @@ def _add_row(row, family_id, row_type): variant_attr_fields=['tags'], mme_value=ArrayAgg('matchmakersubmissiongenes__saved_variant__saved_variant_json__variantId'), include_metadata=True, + include_family_name_display=True, include_mondo=True, omit_airtable=True, proband_only_variants=True, diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index 8f65cf7dbe..5b74a65121 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -497,8 +497,6 @@ 'MME': False, 'additional_family_members_with_variant': '', 'allele_balance_or_heteroplasmy_percentage': None, - 'analysisStatus': 'Q', # unused - 'analysis_groups': '', # unused 'clinvar': None, 'condition_id': None, 'consanguinity': 'Unknown', # unused diff --git a/seqr/views/apis/summary_data_api.py b/seqr/views/apis/summary_data_api.py index 022480ce0c..c2fc6ff734 100644 --- a/seqr/views/apis/summary_data_api.py +++ b/seqr/views/apis/summary_data_api.py @@ -315,6 +315,8 @@ def _add_row(row, family_id, row_type): parse_anvil_metadata( projects, request.user, _add_row, max_loaded_date=request.GET.get('loadedBefore'), include_metadata=True, + include_family_name_display=True, + include_family_sample_metadata=True, omit_airtable=not include_airtable, mme_value=Value('Yes'), get_additional_individual_fields=lambda individual, airtable_metadata, has_dbgap_submission, maternal_ids, paternal_ids: { diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index ee87e168db..2e3851bd76 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -89,11 +89,10 @@ SAMPLE_ROW_TYPE = 'sample' DISCOVERY_ROW_TYPE = 'discovery' -METADATA_FAMILY_VALUES = { +FAMILY_NAME_DISPLAY_VALUES = { 'familyGuid': F('guid'), 'projectGuid': F('project__guid'), 'displayName': F('family_id'), - 'analysis_groups': ArrayAgg('analysisgroup__name', distinct=True, filter=Q(analysisgroup__isnull=False)), } METHOD_MAP = { @@ -121,7 +120,11 @@ def _format_transcript_id(transcript_id, transcript): } -def _get_family_metadata(family_filter, family_fields, include_metadata, include_mondo, format_id): +def _get_family_metadata(family_filter, family_fields, include_family_name_display, include_family_sample_metadata, include_mondo, format_id): + family_fields = {'analysis_groups': { + 'value': ArrayAgg('analysisgroup__name', distinct=True, filter=Q(analysisgroup__isnull=False)), + 'format': lambda f: '; '.join(f['analysis_groups']), + }} if include_family_sample_metadata else family_fields family_data = Family.objects.filter(**family_filter).distinct().order_by('id').values( 'id', 'family_id', 'post_discovery_omim_numbers', *(['post_discovery_mondo_id'] if include_mondo else []), @@ -132,14 +135,14 @@ def _get_family_metadata(family_filter, family_fields, include_metadata, include Value('\t'), Value(' '), ), analysisStatus=F('analysis_status'), - **(METADATA_FAMILY_VALUES if include_metadata else {}), # TODO analysis_groups: individual/family, rest all + **(FAMILY_NAME_DISPLAY_VALUES if include_family_name_display else {}), **{k: v['value'] for k, v in (family_fields or {}).items()} ) family_data_by_id = {} for f in family_data: family_id = f.pop('id') - analysis_status = f['analysisStatus'] if include_metadata else f.pop('analysisStatus') # TODO individual/family + analysis_status = f['analysisStatus'] if include_family_sample_metadata else f.pop('analysisStatus') solve_status = ANALYSIS_SOLVE_STATUS_LOOKUP.get(analysis_status, Individual.UNSOLVED) f.update({ 'solve_status': Individual.SOLVE_STATUS_LOOKUP[solve_status], @@ -147,8 +150,6 @@ def _get_family_metadata(family_filter, family_fields, include_metadata, include }) if format_id: f.update({k: format_id(f[k]) for k in ['family_id', 'internal_project_id']}) - if include_metadata: - f['analysis_groups'] = '; '.join(f['analysis_groups']) # TODO individual/family family_data_by_id[family_id] = f return family_data_by_id @@ -162,7 +163,7 @@ def parse_anvil_metadata( individual_samples: dict[Individual, Sample] = None, individual_data_types: dict[str, Iterable[str]] = None, airtable_fields: Iterable[str] = None, mme_value: Aggregate = None, include_svs: bool = True, variant_json_fields: Iterable[str] = None, variant_attr_fields: Iterable[str] = None, post_process_variant: Callable[[dict, list[dict]], dict] = None, - include_no_individual_families: bool = False, omit_airtable: bool = False, include_metadata: bool = False, + include_no_individual_families: bool = False, omit_airtable: bool = False, include_metadata: bool = False, include_family_name_display: bool = False, include_family_sample_metadata: bool = False, include_discovery_sample_id: bool = False, include_mondo: bool = False, include_parent_mnvs: bool = False, proband_only_variants: bool = False): @@ -171,7 +172,7 @@ def parse_anvil_metadata( family_data_by_id = _get_family_metadata( {'project__in': projects} if include_no_individual_families else {'individual__in': individual_samples}, - family_fields, include_metadata, include_mondo, format_id + family_fields, include_family_name_display, include_family_sample_metadata, include_mondo, format_id ) individuals_by_family_id = defaultdict(list) @@ -243,7 +244,7 @@ def parse_anvil_metadata( participant_id = subject_row['participant_id'] if sample: - sample_row = _get_sample_row(sample, participant_id, has_dbgap_submission, airtable_metadata, include_metadata, get_additional_sample_fields) + sample_row = _get_sample_row(sample, participant_id, has_dbgap_submission, airtable_metadata, include_family_sample_metadata, get_additional_sample_fields) add_row(sample_row, family_id, SAMPLE_ROW_TYPE) if proband_only_variants and individual.proband_relationship != Individual.SELF_RELATIONSHIP: @@ -449,15 +450,14 @@ def anvil_export_airtable_fields(airtable_metadata, has_dbgap_submission): } -def _get_sample_row(sample, participant_id, has_dbgap_submission, airtable_metadata, include_metadata, get_additional_sample_fields=None): +def _get_sample_row(sample, participant_id, has_dbgap_submission, airtable_metadata, include_family_sample_metadata, get_additional_sample_fields=None): sample_row = { 'participant_id': participant_id, 'sample_id': sample.sample_id, } if has_dbgap_submission: sample_row['dbgap_sample_id'] = airtable_metadata.get('dbgap_sample_id', '') - if include_metadata: - # TODO individual/family, currently not in variant + if include_family_sample_metadata: sample_row.update({ 'data_type': sample.sample_type, 'date_data_generation': sample.loaded_date.strftime('%Y-%m-%d'), From 82d10f05476e514303c9f10190fdc6dedaf7498a Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 27 Jun 2024 12:29:19 -0400 Subject: [PATCH 378/736] clean up --- seqr/views/apis/report_api.py | 2 +- seqr/views/apis/summary_data_api.py | 1 - seqr/views/utils/anvil_metadata_utils.py | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 70d02cca9f..87fb635e19 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -848,7 +848,7 @@ def _add_row(row, family_id, row_type): parse_anvil_metadata( projects, user=request.user, add_row=_add_row, omit_airtable=True, include_metadata=True, - include_family_name_display=True, include_family_sample_metadata=True, include_no_individual_families=True) + include_family_sample_metadata=True, include_no_individual_families=True) for family_id, f in families_by_id.items(): individuals_by_id = family_individuals[family_id] diff --git a/seqr/views/apis/summary_data_api.py b/seqr/views/apis/summary_data_api.py index c2fc6ff734..c04136c4fb 100644 --- a/seqr/views/apis/summary_data_api.py +++ b/seqr/views/apis/summary_data_api.py @@ -315,7 +315,6 @@ def _add_row(row, family_id, row_type): parse_anvil_metadata( projects, request.user, _add_row, max_loaded_date=request.GET.get('loadedBefore'), include_metadata=True, - include_family_name_display=True, include_family_sample_metadata=True, omit_airtable=not include_airtable, mme_value=Value('Yes'), diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index 2e3851bd76..c2243a7a55 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -135,7 +135,7 @@ def _get_family_metadata(family_filter, family_fields, include_family_name_displ Value('\t'), Value(' '), ), analysisStatus=F('analysis_status'), - **(FAMILY_NAME_DISPLAY_VALUES if include_family_name_display else {}), + **(FAMILY_NAME_DISPLAY_VALUES if include_family_name_display or include_family_sample_metadata else {}), **{k: v['value'] for k, v in (family_fields or {}).items()} ) From bb847253daa0a16de27cd7862dfcb37465652dca Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Thu, 27 Jun 2024 12:30:59 -0400 Subject: [PATCH 379/736] conditionally set static root --- settings.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/settings.py b/settings.py index 430c9082a1..5598d9dadd 100644 --- a/settings.py +++ b/settings.py @@ -127,18 +127,6 @@ USE_L10N = True USE_TZ = True -# Static files (CSS, JavaScript, Images) -# https://docs.djangoproject.com/en/4.2/howto/static-files/ -STATIC_URL = '/static/' -STATICFILES_DIRS = [ - os.path.join(BASE_DIR, 'static'), - 'ui/dist', -] -STATICFILES_FINDERS = ( - 'django.contrib.staticfiles.finders.FileSystemFinder', - 'django.contrib.staticfiles.finders.AppDirectoriesFinder', -) - # If specified, store data in the named GCS bucket and use the gcloud storage backend. # Else, fall back to a path on the local filesystem. GCS_MEDIA_ROOT_BUCKET = os.environ.get('GCS_MEDIA_ROOT_BUCKET') @@ -264,7 +252,7 @@ 'http://localhost:3000', 'http://localhost:8000', ) - # TODO: will docker build fail if STATICFILES_DIRS always contains both? + # TODO: ? # the collectstatic step in docker build runs without env variables set, and uncommenting these lines breaks the docker build # STATICFILES_DIRS.append(STATIC_ROOT) # STATIC_ROOT = None @@ -276,6 +264,22 @@ HIJACK_LOGIN_REDIRECT_URL = '/' TEMPLATE_DIRS.append('ui') + +# Static files (CSS, JavaScript, Images) +# https://docs.djangoproject.com/en/4.2/howto/static-files/ +STATIC_URL = '/static/' +STATICFILES_DIRS = ['ui/dist'] +if DEBUG: + STATICFILES_DIRS.append(os.path.join(BASE_DIR, 'static')) +else: + STATIC_ROOT = os.path.join(BASE_DIR, 'static') + +STATICFILES_FINDERS = ( + 'django.contrib.staticfiles.finders.FileSystemFinder', + 'django.contrib.staticfiles.finders.AppDirectoriesFinder', +) + + TEMPLATES = [ { 'BACKEND': 'django.template.backends.django.DjangoTemplates', From 30ee8df2972b3cd4a45609365fa7de9ef7c51b9a Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 27 Jun 2024 12:32:04 -0400 Subject: [PATCH 380/736] final include_metadata cleanup --- seqr/views/apis/report_api.py | 4 +--- seqr/views/apis/summary_data_api.py | 1 - seqr/views/utils/anvil_metadata_utils.py | 8 ++++---- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 87fb635e19..6a6948a0fb 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -847,8 +847,7 @@ def _add_row(row, family_id, row_type): family['inheritance_models'].update({v['variant_inheritance'] for v in row}) parse_anvil_metadata( - projects, user=request.user, add_row=_add_row, omit_airtable=True, include_metadata=True, - include_family_sample_metadata=True, include_no_individual_families=True) + projects, user=request.user, add_row=_add_row, omit_airtable=True, include_family_sample_metadata=True, include_no_individual_families=True) for family_id, f in families_by_id.items(): individuals_by_id = family_individuals[family_id] @@ -947,7 +946,6 @@ def _add_row(row, family_id, row_type): variant_json_fields=['clinvar', 'variantId'], variant_attr_fields=['tags'], mme_value=ArrayAgg('matchmakersubmissiongenes__saved_variant__saved_variant_json__variantId'), - include_metadata=True, include_family_name_display=True, include_mondo=True, omit_airtable=True, diff --git a/seqr/views/apis/summary_data_api.py b/seqr/views/apis/summary_data_api.py index c04136c4fb..1d988ee506 100644 --- a/seqr/views/apis/summary_data_api.py +++ b/seqr/views/apis/summary_data_api.py @@ -314,7 +314,6 @@ def _add_row(row, family_id, row_type): parse_anvil_metadata( projects, request.user, _add_row, max_loaded_date=request.GET.get('loadedBefore'), - include_metadata=True, include_family_sample_metadata=True, omit_airtable=not include_airtable, mme_value=Value('Yes'), diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index c2243a7a55..2187c8e196 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -163,7 +163,7 @@ def parse_anvil_metadata( individual_samples: dict[Individual, Sample] = None, individual_data_types: dict[str, Iterable[str]] = None, airtable_fields: Iterable[str] = None, mme_value: Aggregate = None, include_svs: bool = True, variant_json_fields: Iterable[str] = None, variant_attr_fields: Iterable[str] = None, post_process_variant: Callable[[dict, list[dict]], dict] = None, - include_no_individual_families: bool = False, omit_airtable: bool = False, include_metadata: bool = False, include_family_name_display: bool = False, include_family_sample_metadata: bool = False, + include_no_individual_families: bool = False, omit_airtable: bool = False, include_family_name_display: bool = False, include_family_sample_metadata: bool = False, include_discovery_sample_id: bool = False, include_mondo: bool = False, include_parent_mnvs: bool = False, proband_only_variants: bool = False): @@ -185,7 +185,7 @@ def parse_anvil_metadata( sample_ids.add(sample.sample_id) saved_variants_by_family = _get_parsed_saved_discovery_variants_by_family( - list(family_data_by_id.keys()), include_metadata, include_svs, variant_json_fields, variant_attr_fields, + list(family_data_by_id.keys()), bool(mme_value), include_svs, variant_json_fields, variant_attr_fields, ) condition_map = _get_condition_map(family_data_by_id.values()) @@ -372,7 +372,7 @@ def _get_parsed_saved_discovery_variants_by_family( } if include_metadata: parsed_variant.update({ - 'seqr_chosen_consequence': main_transcript.get('majorConsequence'), # TODO individual/variant, currently not in family + 'seqr_chosen_consequence': main_transcript.get('majorConsequence'), }) variants.append(parsed_variant) @@ -380,7 +380,7 @@ def _get_parsed_saved_discovery_variants_by_family( saved_variants_by_family = defaultdict(list) for row in variants: - gene_id = row['gene_id'] if include_metadata else row.pop('gene_id') # TODO individual only, currently not in family, add to variant? + gene_id = row['gene_id'] if include_metadata else row.pop('gene_id') row[GENE_COLUMN] = genes_by_id.get(gene_id, {}).get('geneSymbol') family_id = row.pop('family_id') saved_variants_by_family[family_id].append(row) From 055bf49c070a697abbc3f4abde0e24c382857ac3 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 27 Jun 2024 12:33:47 -0400 Subject: [PATCH 381/736] remove unused field from inidivudal metadata --- seqr/views/apis/summary_data_api.py | 2 +- seqr/views/apis/summary_data_api_tests.py | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/seqr/views/apis/summary_data_api.py b/seqr/views/apis/summary_data_api.py index 1d988ee506..df71a4837f 100644 --- a/seqr/views/apis/summary_data_api.py +++ b/seqr/views/apis/summary_data_api.py @@ -286,7 +286,7 @@ def _add_row(row, family_id, row_type): elif row_type == DISCOVERY_ROW_TYPE: for i, discovery_row in enumerate(row): participant_id = discovery_row.pop('participant_id') - parsed_row = {'{}-{}'.format(k, i + 1): v for k, v in discovery_row.items()} + parsed_row = {'{}-{}'.format(k, i + 1): v for k, v in discovery_row.items() if k != 'allele_balance_or_heteroplasmy_percentage'} parsed_row['num_saved_variants'] = len(row) rows_by_subject_family_id[(participant_id, family_id)].update(parsed_row) elif row_type == SUBJECT_ROW_TYPE: diff --git a/seqr/views/apis/summary_data_api_tests.py b/seqr/views/apis/summary_data_api_tests.py index eac67d64f8..835c9ac946 100644 --- a/seqr/views/apis/summary_data_api_tests.py +++ b/seqr/views/apis/summary_data_api_tests.py @@ -98,8 +98,6 @@ 'svType-1': None, 'sv_name-1': None, 'end-1': None, - 'allele_balance_or_heteroplasmy_percentage-1': None, # unused - 'allele_balance_or_heteroplasmy_percentage-2': None, 'notes-1': None, 'notes-2': None, 'phenotype_contribution-1': 'Partial', @@ -159,7 +157,6 @@ 'ref-1': 'TC', 'zygosity-1': 'Heterozygous', 'variant_reference_assembly-1': 'GRCh38', - 'allele_balance_or_heteroplasmy_percentage-1': None, 'gene_of_interest-1': None, 'gene_id-1': None, 'hgvsc-1': '', From 2eebf81e9ca086d7ac9ddb19a479660489649f5f Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 27 Jun 2024 12:38:06 -0400 Subject: [PATCH 382/736] remove unused consanguinity --- seqr/views/apis/report_api_tests.py | 1 - seqr/views/utils/anvil_metadata_utils.py | 9 +++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index 5b74a65121..0bc533fb46 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -499,7 +499,6 @@ 'allele_balance_or_heteroplasmy_percentage': None, 'clinvar': None, 'condition_id': None, - 'consanguinity': 'Unknown', # unused 'end': None, 'hgvsc': '', 'hgvsp': '', diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index 2187c8e196..40aca9ca8a 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -208,12 +208,13 @@ def parse_anvil_metadata( subject_family_row = {k: family_subject_row.pop(k) for k in FAMILY_INDIVIDUAL_FIELDS} family_row = { 'family_id': subject_family_row['family_id'], - 'consanguinity': next(( - 'Present' if individual.consanguinity else 'None suspected' - for individual in family_individuals if individual.consanguinity is not None - ), 'Unknown'), **family_subject_row, } + if not include_family_name_display: + family_row['consanguinity'] = next(( + 'Present' if individual.consanguinity else 'None suspected' + for individual in family_individuals if individual.consanguinity is not None + ), 'Unknown') add_row(family_row, family_id, FAMILY_ROW_TYPE) for individual in family_individuals: From a1cc0013b7df579a4b28d7c8eaf57d0dfeb5d246 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 27 Jun 2024 12:46:19 -0400 Subject: [PATCH 383/736] include gene_id in variant download --- seqr/views/apis/report_api_tests.py | 2 +- ui/pages/SummaryData/components/IndividualMetadata.jsx | 4 ++-- ui/shared/utils/constants.js | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index 0bc533fb46..d532ba142f 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -1264,7 +1264,7 @@ def test_variant_metadata(self): 'familyGuid': 'F000002_2', 'family_id': '2', 'gene_of_interest': 'RP11', - 'gene_id': 'ENSG00000135953', # unused + 'gene_id': 'ENSG00000135953', 'gene_known_for_phenotype': 'Known', 'genetic_findings_id': 'HG00731_1_248367227', 'known_condition_name': 'mitochondrial disease', diff --git a/ui/pages/SummaryData/components/IndividualMetadata.jsx b/ui/pages/SummaryData/components/IndividualMetadata.jsx index 6da7ac13c9..91c7610390 100644 --- a/ui/pages/SummaryData/components/IndividualMetadata.jsx +++ b/ui/pages/SummaryData/components/IndividualMetadata.jsx @@ -65,9 +65,9 @@ const getColumns = (data) => { const hasAirtable = data && data[0] && data[0][AIRTABLE_DBGAP_SUBMISSION_FIELD] return [...CORE_COLUMNS, ...(hasAirtable ? AIRTABLE_COLUMNS : [])].concat( ...[...Array(maxSavedVariants).keys()].map(i => VARIANT_METADATA_COLUMNS.map( - ({ name, format, fieldName, ...col }) => ({ + ({ name, format, fieldName, secondaryExportColumn, ...col }) => ({ name: `${name}-${i + 1}`, - secondaryExportColumn: name === 'gene_of_interest' ? `gene_id-${i + 1}` : null, + secondaryExportColumn: secondaryExportColumn && `${secondaryExportColumn}-${i + 1}`, format: format ? row => format({ [fieldName]: row[`${fieldName}-${i + 1}`] }) : null, ...col, }), diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js index 5ddcac1e36..6f7c3faf5c 100644 --- a/ui/shared/utils/constants.js +++ b/ui/shared/utils/constants.js @@ -1861,7 +1861,7 @@ export const VARIANT_METADATA_COLUMNS = [ { name: 'end' }, { name: 'ref' }, { name: 'alt' }, - { name: 'gene_of_interest' }, + { name: 'gene_of_interest', secondaryExportColumn: 'gene_id' }, { name: 'seqr_chosen_consequence' }, { name: 'transcript' }, { name: 'hgvsc' }, From f11c73b7eaff9d92d25d2b97d5596c04147809ab Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 27 Jun 2024 12:50:08 -0400 Subject: [PATCH 384/736] keep analysis status for variant metadata --- seqr/views/apis/report_api_tests.py | 1 + seqr/views/utils/anvil_metadata_utils.py | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index d532ba142f..ad185e4d6c 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -497,6 +497,7 @@ 'MME': False, 'additional_family_members_with_variant': '', 'allele_balance_or_heteroplasmy_percentage': None, + 'analysisStatus': 'Q', 'clinvar': None, 'condition_id': None, 'end': None, diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index 40aca9ca8a..77f0f3b4dc 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -125,6 +125,7 @@ def _get_family_metadata(family_filter, family_fields, include_family_name_displ 'value': ArrayAgg('analysisgroup__name', distinct=True, filter=Q(analysisgroup__isnull=False)), 'format': lambda f: '; '.join(f['analysis_groups']), }} if include_family_sample_metadata else family_fields + include_family_name_display = include_family_name_display or include_family_sample_metadata family_data = Family.objects.filter(**family_filter).distinct().order_by('id').values( 'id', 'family_id', 'post_discovery_omim_numbers', *(['post_discovery_mondo_id'] if include_mondo else []), @@ -135,14 +136,14 @@ def _get_family_metadata(family_filter, family_fields, include_family_name_displ Value('\t'), Value(' '), ), analysisStatus=F('analysis_status'), - **(FAMILY_NAME_DISPLAY_VALUES if include_family_name_display or include_family_sample_metadata else {}), + **(FAMILY_NAME_DISPLAY_VALUES if include_family_name_display else {}), **{k: v['value'] for k, v in (family_fields or {}).items()} ) family_data_by_id = {} for f in family_data: family_id = f.pop('id') - analysis_status = f['analysisStatus'] if include_family_sample_metadata else f.pop('analysisStatus') + analysis_status = f['analysisStatus'] if include_family_name_display else f.pop('analysisStatus') solve_status = ANALYSIS_SOLVE_STATUS_LOOKUP.get(analysis_status, Individual.UNSOLVED) f.update({ 'solve_status': Individual.SOLVE_STATUS_LOOKUP[solve_status], From 21100ed085e688d7997e7244bba508a0c42be864 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 27 Jun 2024 13:41:19 -0400 Subject: [PATCH 385/736] update js tests --- .../components/IndividualMetadata.test.js | 34 +++++++++++-------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/ui/pages/SummaryData/components/IndividualMetadata.test.js b/ui/pages/SummaryData/components/IndividualMetadata.test.js index abb8c850ae..44c5ad173d 100644 --- a/ui/pages/SummaryData/components/IndividualMetadata.test.js +++ b/ui/pages/SummaryData/components/IndividualMetadata.test.js @@ -14,7 +14,6 @@ const DATA = [ projectGuid: 'R0003_test', num_saved_variants: 2, solve_status: 'Tier 1', - sample_id: 'NA20889', 'gene_known_for_phenotype-1': 'Candidate', 'gene_known_for_phenotype-2': 'Candidate', 'variant_inheritance-1': 'unknown', @@ -31,6 +30,7 @@ const DATA = [ 'sv_name-2': 'DEL:chr12:49045487-49045898', 'chrom-2': '12', 'pos-2': '49045487', + 'end-2': '49045898', maternal_id: '', paternal_id: '', maternal_guid: '', @@ -38,6 +38,7 @@ const DATA = [ 'hgvsp-1': 'c.1586-17C>G', internal_project_id: 'Test Reprocessed Project', 'pos-1': 248367227, + 'end-1': null, data_type: 'WES', familyGuid: 'F000012_12', family_history: 'Yes', @@ -58,10 +59,13 @@ const DATA = [ disorders: null, family_id: '12', displayName: '12', - MME: 'Y', + MME: 'Yes', participant_id: 'NA20889', individual_guid: 'I000017_na20889', proband_relationship: 'Self', + condition_id: 'OMIM:616126', + condition_inheritance: 'Autosomal recessive', + known_condition_name: 'Immunodeficiency 38', 'phenotype_contribution-1': 'Partial', 'phenotype_contribution-2': 'Full', 'partial_contribution_explained-1': 'HP:0000501|HP:0000365', @@ -76,26 +80,26 @@ test('IndividualMetadata render and export', () => { const sampleMetadata = mount() const exportConfig = sampleMetadata.find('DataTable').instance().exportConfig(DATA)[0] expect(exportConfig.headers).toEqual([ - 'project_id', 'projectGuid', 'family_id', 'familyGuid', 'participant_id', 'individual_guid', 'pmid_id', 'paternal_id', - 'paternal_guid', 'maternal_id', 'maternal_guid', 'proband_relationship', 'sex', 'ancestry', - 'condition_id', 'known_condition_name', 'disorders', 'affected_status', 'hpo_present', 'hpo_absent', - 'phenotype_description', 'analysis_groups', 'analysis_status', 'solve_status', 'MME', 'data_type', 'date_data_generation', + 'project_id', 'projectGuid', 'family_id', 'familyGuid', 'participant_id', 'individual_guid', 'paternal_id', + 'paternal_guid', 'maternal_id', 'maternal_guid', 'proband_relationship', 'sex', 'ancestry', 'affected_status', + 'hpo_present', 'hpo_absent', 'MME', 'pmid_id', 'condition_id', 'known_condition_name', 'condition_inheritance', 'disorders', + 'phenotype_description', 'analysis_groups', 'analysis_status', 'solve_status', 'data_type', 'date_data_generation', 'filter_flags', 'consanguinity', 'family_history', 'genetic_findings_id-1', 'variant_reference_assembly-1', - 'chrom-1', 'pos-1', 'ref-1', 'alt-1', 'gene_of_interest-1', 'gene_id-1', 'seqr_chosen_consequence-1', 'transcript-1', + 'chrom-1', 'pos-1', 'end-1', 'ref-1', 'alt-1', 'gene_of_interest-1', 'gene_id-1', 'seqr_chosen_consequence-1', 'transcript-1', 'hgvsc-1', 'hgvsp-1', 'zygosity-1', 'sv_name-1', 'sv_type-1', 'variant_inheritance-1', 'gene_known_for_phenotype-1', 'phenotype_contribution-1', 'partial_contribution_explained-1', 'notes-1', 'ClinGen_allele_ID-1', - 'genetic_findings_id-2', 'variant_reference_assembly-2', 'chrom-2', 'pos-2', + 'genetic_findings_id-2', 'variant_reference_assembly-2', 'chrom-2', 'pos-2', 'end-2', 'ref-2', 'alt-2', 'gene_of_interest-2', 'gene_id-2', 'seqr_chosen_consequence-2', 'transcript-2', 'hgvsc-2', 'hgvsp-2', 'zygosity-2', 'sv_name-2', 'sv_type-2', 'variant_inheritance-2', 'gene_known_for_phenotype-2', 'phenotype_contribution-2', 'partial_contribution_explained-2', 'notes-2', 'ClinGen_allele_ID-2']) expect(exportConfig.processRow(DATA[0])).toEqual([ - 'Test Reprocessed Project', 'R0003_test', '12', 'F000012_12', 'NA20889', 'I000017_na20889', null, '', '', '', '', - 'Self', 'Female', 'Ashkenazi Jewish', undefined, undefined, null, 'Affected', - 'HP:0011675 (Arrhythmia)|HP:0001509 ()', '', null, undefined, 'Waiting for data', 'Tier 1', 'Y', 'WES', '2017-02-05', '', - undefined, 'Yes', 'NA20889_1_248367227', undefined, '1', 248367227, 'TC', 'T', 'OR4G11P', 'ENSG00000240361', - 'intron_variant', 'ENST00000505820', 'c.3955G>A', 'c.1586-17C>G', 'Heterozygous', undefined, undefined, - 'unknown', 'Candidate', 'Partial', 'HP:0000501|HP:0000365', undefined, 'CA1501729', 'NA20889_1_249045487', undefined, '12', '49045487', undefined, - undefined, undefined, undefined, undefined, + 'Test Reprocessed Project', 'R0003_test', '12', 'F000012_12', 'NA20889', 'I000017_na20889', '', '', '', '', + 'Self', 'Female', 'Ashkenazi Jewish', 'Affected', 'HP:0011675 (Arrhythmia)|HP:0001509 ()', '', 'Yes', null, + 'OMIM:616126', 'Immunodeficiency 38', 'Autosomal recessive', null, null, undefined, 'Waiting for data', 'Tier 1', + 'WES', '2017-02-05', '', undefined, 'Yes', 'NA20889_1_248367227', undefined, '1', 248367227, null, 'TC', 'T', + 'OR4G11P', 'ENSG00000240361', 'intron_variant', 'ENST00000505820', 'c.3955G>A', 'c.1586-17C>G', 'Heterozygous', undefined, undefined, + 'unknown', 'Candidate', 'Partial', 'HP:0000501|HP:0000365', undefined, 'CA1501729', 'NA20889_1_249045487', undefined, + '12', '49045487', '49045898', undefined, undefined, undefined, undefined, undefined, undefined, undefined, undefined, 'Heterozygous', 'DEL:chr12:49045487-49045898', 'Deletion', 'unknown', 'Candidate', 'Full', '', undefined, null]) }) From 073da911ddcfd420ca043dd4c32e89c552b55318 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 27 Jun 2024 14:18:41 -0400 Subject: [PATCH 386/736] update test fixture --- .../families/F000002_2.ht/.README.txt.crc | Bin 12 -> 12 bytes .../families/F000002_2.ht/.metadata.json.gz.crc | Bin 12 -> 12 bytes .../SNV_INDEL/families/F000002_2.ht/README.txt | 4 ++-- .../.index.crc | Bin .../.metadata.json.gz.crc | Bin .../index | Bin .../metadata.json.gz | Bin .../families/F000002_2.ht/metadata.json.gz | Bin 350 -> 350 bytes .../F000002_2.ht/rows/.metadata.json.gz.crc | Bin 16 -> 16 bytes .../families/F000002_2.ht/rows/metadata.json.gz | Bin 613 -> 613 bytes ...t-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.crc | Bin 0 -> 12 bytes ...t-0-a30c1a83-2851-4434-9ca6-ba437ff4a1da.crc | Bin 12 -> 0 bytes .../part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce | Bin 0 -> 94 bytes .../part-0-a30c1a83-2851-4434-9ca6-ba437ff4a1da | Bin 101 -> 0 bytes .../projects/R0001_1kg.ht/.README.txt.crc | Bin 12 -> 12 bytes .../SNV_INDEL/projects/R0001_1kg.ht/README.txt | 2 +- .../.index.crc | Bin .../.metadata.json.gz.crc | Bin .../index | Bin .../metadata.json.gz | Bin .../R0001_1kg.ht/rows/.metadata.json.gz.crc | Bin 16 -> 16 bytes .../projects/R0001_1kg.ht/rows/metadata.json.gz | Bin 625 -> 624 bytes ...t-0-7af5daf7-4c5e-40db-ba9f-3248f8540152.crc | Bin 12 -> 0 bytes ...t-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.crc | Bin 0 -> 12 bytes .../part-0-7af5daf7-4c5e-40db-ba9f-3248f8540152 | Bin 102 -> 0 bytes .../part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f | Bin 0 -> 95 bytes hail_search/test_search.py | 3 --- 27 files changed, 3 insertions(+), 6 deletions(-) rename hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/index/{part-0-a30c1a83-2851-4434-9ca6-ba437ff4a1da.idx => part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.idx}/.index.crc (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/index/{part-0-a30c1a83-2851-4434-9ca6-ba437ff4a1da.idx => part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.idx}/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/index/{part-0-a30c1a83-2851-4434-9ca6-ba437ff4a1da.idx => part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.idx}/index (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/index/{part-0-a30c1a83-2851-4434-9ca6-ba437ff4a1da.idx => part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.idx}/metadata.json.gz (100%) create mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/rows/parts/.part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.crc delete mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/rows/parts/.part-0-a30c1a83-2851-4434-9ca6-ba437ff4a1da.crc create mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/rows/parts/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce delete mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/rows/parts/part-0-a30c1a83-2851-4434-9ca6-ba437ff4a1da rename hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/index/{part-0-7af5daf7-4c5e-40db-ba9f-3248f8540152.idx => part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.idx}/.index.crc (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/index/{part-0-7af5daf7-4c5e-40db-ba9f-3248f8540152.idx => part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.idx}/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/index/{part-0-7af5daf7-4c5e-40db-ba9f-3248f8540152.idx => part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.idx}/index (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/index/{part-0-7af5daf7-4c5e-40db-ba9f-3248f8540152.idx => part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.idx}/metadata.json.gz (100%) delete mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/rows/parts/.part-0-7af5daf7-4c5e-40db-ba9f-3248f8540152.crc create mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/rows/parts/.part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.crc delete mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/rows/parts/part-0-7af5daf7-4c5e-40db-ba9f-3248f8540152 create mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/rows/parts/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/.README.txt.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/.README.txt.crc index c4ef09461d00895cbc124b467c0d4f9916097109..2abb07dfbc0fa2ce089add463ecfb471acea65b3 100644 GIT binary patch literal 12 TcmYc;N@ieSU}89^X89KY5@iEe literal 12 TcmYc;N@ieSU}E^B?mQI$64(PQ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/.metadata.json.gz.crc index 1bb19a2a334f634e5c3930d000aadb311cb1b3bf..8e6ff63cdd4fafe21b64f08f4027a003aca3dc7c 100644 GIT binary patch literal 12 TcmYc;N@ieSU}9MQW{nj96mkQ! literal 12 TcmYc;N@ieSU}BgOe`_HC6G8*2 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/README.txt b/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/README.txt index 5958e8574d..b41496ec38 100644 --- a/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/README.txt +++ b/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/README.txt @@ -1,3 +1,3 @@ This folder comprises a Hail (www.hail.is) native Table or MatrixTable. - Written with version 0.2.126-ee77707f4fab - Created at 2024/01/24 11:38:19 \ No newline at end of file + Written with version 0.2.128-eead8100a1c1 + Created at 2024/06/27 14:14:27 \ No newline at end of file diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/index/part-0-a30c1a83-2851-4434-9ca6-ba437ff4a1da.idx/.index.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/index/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.idx/.index.crc similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/index/part-0-a30c1a83-2851-4434-9ca6-ba437ff4a1da.idx/.index.crc rename to hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/index/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.idx/.index.crc diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/index/part-0-a30c1a83-2851-4434-9ca6-ba437ff4a1da.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/index/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.idx/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/index/part-0-a30c1a83-2851-4434-9ca6-ba437ff4a1da.idx/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/index/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.idx/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/index/part-0-a30c1a83-2851-4434-9ca6-ba437ff4a1da.idx/index b/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/index/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.idx/index similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/index/part-0-a30c1a83-2851-4434-9ca6-ba437ff4a1da.idx/index rename to hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/index/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.idx/index diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/index/part-0-a30c1a83-2851-4434-9ca6-ba437ff4a1da.idx/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/index/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.idx/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/index/part-0-a30c1a83-2851-4434-9ca6-ba437ff4a1da.idx/metadata.json.gz rename to hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/index/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.idx/metadata.json.gz diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/metadata.json.gz index fc4e99ad6d91c3df9dc5abfacf39447ade356150..eae96f9ae97f56323127e98c9ec99dab2940f8a4 100644 GIT binary patch literal 350 zcmV-k0ipgMiwFP!000000F9E(Zo)7Sgx|%dR&o&uQbE48K}A)2pn!ToQDmG2teV)e zouVl6yLS^3{!YC_o7rzB*|lr3l?t+7uukfNu;=k8o=~#qQe8wc9K?eto;(5Yd=d?Z zJQ5M1#KIOV6arjkL1l*fJ;E0i5zjXm?<)h?Sv4>9PU#I-Z0T(&eC_zbAR%+dUaaNS z8g*IhQra~dD?4elG&%R+*b;n?1^NROTPrO}N4Zjc%1k;okD@dHQ7YYzSw)=M=^bQWp7IJrku_@ZMI?w=7%%{9AD^z(FLYB!fJAw zye$iUXg0F=o+drAG$7igo;TU^0abnczpsVSWVrRx;}K>;m)$>f0rS3huKm|ErxH|{|(>i~98Cq-j1M(|H{e=Pm0C$q4HUIzs literal 350 zcmV-k0ipgMiwFP!000000F9DgZ-Ous#lK6RwxlzPi!6O}n3-jJaBB8q2&r(z1RGPMj(vXHVV}+OVZMy^OV diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/rows/.metadata.json.gz.crc index 466a4ce583c6f5195849c64caf640dc4564f63d8..4a03ffff0cb622df3b425a67700ba3b3f6bddf60 100644 GIT binary patch literal 16 XcmYc;N@ieSU}9J$n%q_X(2EBEB60+k literal 16 XcmYc;N@ieSU}CtncbC+T!%nLJDVzok diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/rows/metadata.json.gz index d3078bac8bf2d4e6a8302fb862909441bf7b89f6..02f9d0c657ceeb25b4798c2aca2d39a7885d0174 100644 GIT binary patch literal 613 zcmV-r0-F6FiwFP!000000NqwyZ`v>v{V#snw9q9GNS8N4DMG5I4yrw@Lde)R;5BuS zW2Q2c|Gw*dkpw1#*l&T4ssLtzfiUuE9MA;jTJ4xVWbOP0#A2z%+F(Lb zwYRo$BD9DlB2%e*-P9^(BQR8o?jOt4g%yfOgG&6907$Jwt)xD1^3BgXTf@K~-$pSt z>zn%$ojeq)bdee%zX~A@33ulI!(=wv^t_K1!+aPf3b{VAn}e>fl30~lFscaE_*8U8 z*uNmN+Jzyg*5==h1E+f(977gi8xiqZN`SPVkU=A9Wx%=kjR}K(zc)B7`;lm#m0nnf z!hk-vD^v4B5yIH5rD0lPnfnw(_f&5Xzqt(mx90Kt{S)& z`n%z%V=hMgH#BW(#bKC$u9#3MV@-?d+KlwP|Bg~>LK=250xmUc#p;Api_JSRAbyyV zMqkl4uQOCOPk}sdq0F&M{QhCtdfBH&%#k-YT|Pzuixodl>Ovn@JJ9gnsq(h&w(85B zdQQq;guF-d;-z{V#snw4@^>T$VRNDMG5I4yrw@Lde8lz-#It z$4q4?|Gl4cF(goFRO?H5K*{-T=h!E*_e9VqAU>H02G78zKd)yS2;0OWi$~ZI|AE9w zxJ~h$#xcZ@k~xOLgh8kiiNCKp-0wEPh9-bnpdpNW83ifDNWtU#vzNdSbByk2scIR4=mt*t@ek8a|G z>gVgb3LW2QVI;#XH+^K^~IK%?Ab|Kz;mJ&(9d)q=CdT#a zPRc7nKB9SX(cX9>F>M)w$41Q0kSV53%1=IC=k}B+=g%$S*d?-AOQ+vpzGBEKS&u#ZwP7gh7 z?ixs7fJtM5TTVc*|I&wxFy1n>c1{eHgTY36ghn#0g(@ZWq%3;@Beqb=CkX%m?s+f3 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/rows/parts/.part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/rows/parts/.part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.crc new file mode 100644 index 0000000000000000000000000000000000000000..f234dac53f5444474b1f4e765ec640880a951168 GIT binary patch literal 12 TcmYc;N@ieSU}E6o+4%_o5I_R} literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/rows/parts/.part-0-a30c1a83-2851-4434-9ca6-ba437ff4a1da.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/rows/parts/.part-0-a30c1a83-2851-4434-9ca6-ba437ff4a1da.crc deleted file mode 100644 index 20862b20944194fa669251c3c138ba0735c079de..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}8w0q<#kg5&Q!; diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/rows/parts/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce b/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/rows/parts/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce new file mode 100644 index 0000000000000000000000000000000000000000..dbd63db8d859c18c8bc957085f89ba7d6979f986 GIT binary patch literal 94 zcmeZdU|_HTVvVi(e-&&VGBN<6`J6*5T$mUg8QmGB!UBVXLW&aek~30+{Q@j4^(+la l@FuArAvD&>#>18wNCq0RaC|6MO&w literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/rows/parts/part-0-a30c1a83-2851-4434-9ca6-ba437ff4a1da b/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/rows/parts/part-0-a30c1a83-2851-4434-9ca6-ba437ff4a1da deleted file mode 100644 index da7510a14b461c8f22e6aac42fcbe0d91d0abe93..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 101 zcmWGwU|?_tVvVi(e-)f1nHU%u7|rJ#TH(UP=*Z~KC>0hM928QNn3tT98tfNfX{l#v sP?8U#Oqm%B7#JGt1wd>{au2{e!a05%2^)Bpeg diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/.README.txt.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/.README.txt.crc index b1ba5f279a8c5e7d088d9f99c5c16dcfcf590841..eea7b98bfaabb9f3d3336072da7508278d7ab3f9 100644 GIT binary patch literal 12 TcmYc;N@ieSU}CV?U2zWp5?=#^ literal 12 TcmYc;N@ieSU}AV$XPycG6JY~7 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/README.txt b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/README.txt index ad57efea23..2913b8406f 100644 --- a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/README.txt +++ b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/README.txt @@ -1,3 +1,3 @@ This folder comprises a Hail (www.hail.is) native Table or MatrixTable. Written with version 0.2.128-eead8100a1c1 - Created at 2024/04/03 17:41:01 \ No newline at end of file + Created at 2024/06/27 14:07:54 \ No newline at end of file diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/index/part-0-7af5daf7-4c5e-40db-ba9f-3248f8540152.idx/.index.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/index/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.idx/.index.crc similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/index/part-0-7af5daf7-4c5e-40db-ba9f-3248f8540152.idx/.index.crc rename to hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/index/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.idx/.index.crc diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/index/part-0-7af5daf7-4c5e-40db-ba9f-3248f8540152.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/index/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.idx/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/index/part-0-7af5daf7-4c5e-40db-ba9f-3248f8540152.idx/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/index/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.idx/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/index/part-0-7af5daf7-4c5e-40db-ba9f-3248f8540152.idx/index b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/index/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.idx/index similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/index/part-0-7af5daf7-4c5e-40db-ba9f-3248f8540152.idx/index rename to hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/index/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.idx/index diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/index/part-0-7af5daf7-4c5e-40db-ba9f-3248f8540152.idx/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/index/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.idx/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/index/part-0-7af5daf7-4c5e-40db-ba9f-3248f8540152.idx/metadata.json.gz rename to hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/index/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.idx/metadata.json.gz diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/rows/.metadata.json.gz.crc index 194d29c5040cd3326ec2c8764444c588648b053e..d00b64c90d52a5dce573f5fee9ced1b44ac8713c 100644 GIT binary patch literal 16 XcmYc;N@ieSU}9)~EEvA{q9YFgBwqx| literal 16 XcmYc;N@ieSU}6xTc|PU+%Hvf4CEx}3 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/rows/metadata.json.gz index 9921c5c42d9770227aa5c5cb974f179f31ccaac9..42b4aee2d2405270584b4cd601e858e55110fbfd 100644 GIT binary patch literal 624 zcmV-$0+0P4iwFP!000000Ns{dZ`v>vhX0GNHZAG+DzMxUN>Qb1>Y&=iDukT)7`z$> zIc6$D`R_YU$R|u1)p}Ddz&Sq0=Y8!5@PR100pgK6#oz_l?EB4p3t@*iWcdtx;yn== zg}WG^sgxigCJUSjBL<;OB;Fxk;c>qOHZCSG3v38yK23ZY!`$dyXb$OeUM&NQVoMUecjI^fvr#yAxa3Kxo*gKt|8zJ{gP)3VdW%gO^KmF!q zjhvD1O>SjG?fB*)k4_(tqhzs2tg&B5V>L_n_Wi?jHePoJ9}C4s4K*cu~{xH*l`ZJaWwZ6zug)7<&;ChwUDfm;-iUY)Kg0eY;Z1qV?zJ> zy4yc3`w<(KmR?wg&VX*3m5F_!31MMuc(}`BDVrMP12t=mUtN7p(`{4&0 zE_7^`g1uzS^?MG|U*oOJ#ToAnkxrb5^B7E_genyodaSPWNRqQOmR2f8YCKs3m!ey! zI?>cn^G*Q7uc@N)D{}KXpwfOC)OmzbVwd=90L#WX_n_RJ`YH*ri*Knmx5C~7@M>Tm+?E>#yKFypEIgf;72>fZ2f`623N4gvP%XIGKR710JY=* z6R+Ltb_T8X02wYWA}C;lNmGJHPC&8$vW`%p+%ZHu6^72Ch)rw@8%eAeYK*p>lH?Cn KEJC$J2><|BVkm$B literal 625 zcmV-%0*?J3iwFP!000000Ns{RZ`v>vh5w76HZAEOB!uM+C`Cxs)Iqg}RS22*2D};v zIc6$D`R}_Luk;OCYN$^BO z9PSeQOoade37O+k7&8b>Aqft}4v+gSuwl7?RbV7s`7|2P1ZG<8LVd`#3u;mrOU+kC zo^;QzEX7mRFuxGhzkuW3C4hy+lRd#2fE3NJ{ zZz$W2{z`i%?n-k*99CFc?xF}(Iv97=6uMi?bES*X^^OiI%~ Ljx*ynMF{`^UaKHL diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/rows/parts/.part-0-7af5daf7-4c5e-40db-ba9f-3248f8540152.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/rows/parts/.part-0-7af5daf7-4c5e-40db-ba9f-3248f8540152.crc deleted file mode 100644 index 54a4406d9c561da2e67b002894e377d5aeb560d1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}6w-37ZK357h#H diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/rows/parts/.part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/rows/parts/.part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.crc new file mode 100644 index 0000000000000000000000000000000000000000..7078a48348cdfe6d46c22cd1b5d2141a0235263c GIT binary patch literal 12 TcmYc;N@ieSU}CV}b<79=5@-V~ literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/rows/parts/part-0-7af5daf7-4c5e-40db-ba9f-3248f8540152 b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/rows/parts/part-0-7af5daf7-4c5e-40db-ba9f-3248f8540152 deleted file mode 100644 index 131c6264f3639eab1d85802b48fbb00567ae9ec8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 102 zcmWG!U|?_oVvVi(e-&I5nHU%u7|rJ#TH(UP=*Z~KC>0hM928QNn3tT98tfNfX{l#v tP?8U#Oc|LO3>X+1>;*&^*nkEwFtIQsv+Aqc8}cylg7`oHHWX+g0{}==65s#; diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/rows/parts/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/rows/parts/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f new file mode 100644 index 0000000000000000000000000000000000000000..170603a856d475e89eb2ad97b5490d71befc61d9 GIT binary patch literal 95 zcmeZbU|_HXVvVi(e-&(BGBN<6`J6*5T$mUg8QmGB!UBVXLW&aek~30+{Q@j4^(+la m@ Date: Thu, 27 Jun 2024 14:54:31 -0400 Subject: [PATCH 387/736] fix formatting for saved variant download --- ui/shared/components/panel/variants/selectors.js | 5 +++-- ui/shared/utils/constants.js | 11 ++++++++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/ui/shared/components/panel/variants/selectors.js b/ui/shared/components/panel/variants/selectors.js index a58de15314..651b9975a0 100644 --- a/ui/shared/components/panel/variants/selectors.js +++ b/ui/shared/components/panel/variants/selectors.js @@ -309,10 +309,11 @@ export const getSavedVariantExportConfig = createSelector( getAnalysisGroupsByGuid, getVariantTagsByGuid, getVariantNotesByGuid, + getGenesById, (state, props) => props.project, getSavedVariantTableState, (state, props) => props.match.params, - (analysisGroupsByGuid, tagsByGuid, notesByGuid, project, tableState, params) => { + (analysisGroupsByGuid, tagsByGuid, notesByGuid, genesById, project, tableState, params) => { if (project && project.isDemo && !project.allUserDemo) { // Do not allow downloads for demo projects return null @@ -329,7 +330,7 @@ export const getSavedVariantExportConfig = createSelector( getHeaders: state => getSavedVariantExportHeaders(state, { project, match: { params } }), processRow: variant => ([ ...VARIANT_EXPORT_DATA.map(config => ( - config.getVal ? config.getVal(variant, tagsByGuid, notesByGuid) : variant[config.header])), + config.getVal ? config.getVal(variant, tagsByGuid, notesByGuid, genesById) : variant[config.header])), ...Object.values(variant.genotypes).reduce( (acc, { sampleId, numAlt, gq, ab }) => ([...acc, sampleId, numAlt, gq, ab]), [], ), diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js index 882b788c1f..9ce1b5f479 100644 --- a/ui/shared/utils/constants.js +++ b/ui/shared/utils/constants.js @@ -1531,14 +1531,19 @@ const getPopAf = population => (variant) => { return (populationData || {}).af } +const getVariantGene = (variant, tagsByGuid, notesByGuid, genesById) => { + const { geneId } = getVariantMainTranscript(variant) + return genesById[geneId]?.geneSymbol || geneId +} + export const VARIANT_EXPORT_DATA = [ { header: 'chrom' }, { header: 'pos' }, { header: 'ref' }, { header: 'alt' }, - { header: 'gene', getVal: variant => getVariantMainTranscript(variant).geneSymbol }, + { header: 'gene', getVal: getVariantGene }, { header: 'worst_consequence', getVal: variant => getVariantMainTranscript(variant).majorConsequence }, - { header: 'callset_freq', getVal: getPopAf('callset') }, + { header: 'callset_freq', getVal: variant => getPopAf('callset')(variant) || getPopAf('seqr')(variant) }, { header: 'exac_freq', getVal: getPopAf('exac') }, { header: 'gnomad_genomes_freq', getVal: getPopAf('gnomad_genomes') }, { header: 'gnomad_exomes_freq', getVal: getPopAf('gnomad_exomes') }, @@ -1554,7 +1559,7 @@ export const VARIANT_EXPORT_DATA = [ { header: 'rsid', getVal: variant => variant.rsid }, { header: 'hgvsc', getVal: variant => getVariantMainTranscript(variant).hgvsc }, { header: 'hgvsp', getVal: variant => getVariantMainTranscript(variant).hgvsp }, - { header: 'clinvar_clinical_significance', getVal: variant => (variant.clinvar || {}).clinicalSignificance }, + { header: 'clinvar_clinical_significance', getVal: variant => (variant.clinvar || {}).clinicalSignificance || (variant.clinvar || {}).pathogenicity }, { header: 'clinvar_gold_stars', getVal: variant => (variant.clinvar || {}).goldStars }, { header: 'filter', getVal: variant => variant.genotypeFilters }, { header: 'project' }, From c1fdc7844e783eb971acee50d071fc4c625f9ee7 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Thu, 27 Jun 2024 16:22:01 -0400 Subject: [PATCH 388/736] fix individualrow --- ui/pages/Project/components/FamilyTable/IndividualRow.jsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ui/pages/Project/components/FamilyTable/IndividualRow.jsx b/ui/pages/Project/components/FamilyTable/IndividualRow.jsx index 8717ebdde3..0d9c035f82 100644 --- a/ui/pages/Project/components/FamilyTable/IndividualRow.jsx +++ b/ui/pages/Project/components/FamilyTable/IndividualRow.jsx @@ -169,7 +169,7 @@ const DataDetails = React.memo(({ loadedSamples, individual, mmeSubmission }) => /> ) : )} - {loadedSamples.some(sample => sample.rnaSeqTypes?.length > 0) && ( + {loadedSamples.some(sample => sample.isActive && (sample.rnaSeqTypes.includes('Expression Outlier') || sample.rnaSeqTypes.includes('Splice Outlier'))) && (
Date: Thu, 27 Jun 2024 16:35:29 -0400 Subject: [PATCH 389/736] allow anvil users to create manual variants --- ui/pages/Project/components/CreateVariantButton.jsx | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ui/pages/Project/components/CreateVariantButton.jsx b/ui/pages/Project/components/CreateVariantButton.jsx index 197f2bc491..b0208c856e 100644 --- a/ui/pages/Project/components/CreateVariantButton.jsx +++ b/ui/pages/Project/components/CreateVariantButton.jsx @@ -212,10 +212,11 @@ const SV_FIELDS = [ }, ].map(formatField) -const BaseCreateVariantButton = React.memo(({ variantType, family, user, ...props }) => ( - user.isAnalyst ? ( +const BaseCreateVariantButton = React.memo(({ variantType, family, user, project, ...props }) => ( + (project.isAnalystProject ? user.isAnalyst : project.canEdit) ? ( ({ user: getUser(state), - initialValues: getCurrentProject(state), + project: getCurrentProject(state), }) const mapDispatchToProps = (dispatch, ownProps) => ({ From af4654208f3178c884dcebb7226cff0d80570ae5 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Thu, 27 Jun 2024 16:43:43 -0400 Subject: [PATCH 390/736] isactive --- ui/shared/components/panel/sample.jsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ui/shared/components/panel/sample.jsx b/ui/shared/components/panel/sample.jsx index f19d078a0c..c682c892b4 100644 --- a/ui/shared/components/panel/sample.jsx +++ b/ui/shared/components/panel/sample.jsx @@ -40,7 +40,7 @@ const Sample = React.memo(({ loadedSample, isOutdated, hoverDetails }) => ( 'no data available'} {loadedSample.sampleType && loadedSample.sampleType === SAMPLE_TYPE_RNA && - loadedSample.rnaSeqTypes?.length > 0 && `RNAseq methods: ${loadedSample.rnaSeqTypes.join(', ')}`} + loadedSample.isActive && `RNAseq methods: ${loadedSample.rnaSeqTypes.join(', ')}`}
} position="left center" From 6e0ee86f5ad0cc0f533c4001c45c79be95e7a03c Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 28 Jun 2024 11:52:57 -0400 Subject: [PATCH 391/736] add vlm contact db field; --- CHANGELOG.md | 1 + .../0068_project_vlm_contact_email.py | 29 +++++++++++++++++++ seqr/models.py | 5 +++- settings.py | 2 ++ 4 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 seqr/migrations/0068_project_vlm_contact_email.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 984ec30bbb..cf141bec2b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # _seqr_ Changes ## dev +* Add VLM contact for Projects (REQUIRES DB MIGRATION) ## 6/11/24 * Add "Partial Phenotype Contribution" functional tag (REQUIRES DB MIGRATION) diff --git a/seqr/migrations/0068_project_vlm_contact_email.py b/seqr/migrations/0068_project_vlm_contact_email.py new file mode 100644 index 0000000000..c158184936 --- /dev/null +++ b/seqr/migrations/0068_project_vlm_contact_email.py @@ -0,0 +1,29 @@ +# Generated by Django 3.2.23 on 2024-06-28 15:44 + +from django.db import migrations, models + + +def update_vlm_contact_email(apps, schema_editor): + Project = apps.get_model('seqr', 'Project') + db_alias = schema_editor.connection.alias + + projects = Project.objects.using(db_alias).all() + for project in projects: + project.vlm_contact_email = project.mme_contact_url.replace('mailto:', '').replace('matchmaker', 'vlm') + Project.objects.using(db_alias).bulk_update(projects, ['vlm_contact_email']) + + +class Migration(migrations.Migration): + + dependencies = [ + ('seqr', '0067_alter_variantfunctionaldata_functional_data_tag'), + ] + + operations = [ + migrations.AddField( + model_name='project', + name='vlm_contact_email', + field=models.TextField(blank=True, default='vlm@broadinstitute.org', null=True), + ), + migrations.RunPython(update_vlm_contact_email, reverse_code=migrations.RunPython.noop), + ] diff --git a/seqr/models.py b/seqr/models.py index 1cd1f46b0e..770a37cce1 100644 --- a/seqr/models.py +++ b/seqr/models.py @@ -17,7 +17,8 @@ from seqr.utils.xpos_utils import get_chrom_pos from seqr.views.utils.terra_api_utils import anvil_enabled from reference_data.models import GENOME_VERSION_GRCh37, GENOME_VERSION_CHOICES -from settings import MME_DEFAULT_CONTACT_NAME, MME_DEFAULT_CONTACT_HREF, MME_DEFAULT_CONTACT_INSTITUTION +from settings import MME_DEFAULT_CONTACT_NAME, MME_DEFAULT_CONTACT_HREF, MME_DEFAULT_CONTACT_INSTITUTION, \ + VLM_DEFAULT_CONTACT_EMAIL logger = SeqrLogger(__name__) @@ -196,6 +197,8 @@ class Project(ModelWithGUID): mme_contact_url = models.TextField(null=True, blank=True, default=MME_DEFAULT_CONTACT_HREF) mme_contact_institution = models.TextField(null=True, blank=True, default=MME_DEFAULT_CONTACT_INSTITUTION) + vlm_contact_email = models.TextField(null=True, blank=True, default=VLM_DEFAULT_CONTACT_EMAIL) + has_case_review = models.BooleanField(default=False) enable_hgmd = models.BooleanField(default=False) all_user_demo = models.BooleanField(default=False) diff --git a/settings.py b/settings.py index b7fd626bfa..852d1ddac4 100644 --- a/settings.py +++ b/settings.py @@ -352,6 +352,8 @@ MME_DEFAULT_CONTACT_EMAIL = 'matchmaker@broadinstitute.org' MME_DEFAULT_CONTACT_HREF = 'mailto:{}'.format(MME_DEFAULT_CONTACT_EMAIL) +VLM_DEFAULT_CONTACT_EMAIL = 'vlm@broadinstitute.org' + MME_CONFIG_DIR = os.environ.get('MME_CONFIG_DIR', '') MME_NODES = {} if MME_CONFIG_DIR: From d1c6891ed4953c8bb0322f996a08a79d259f0950 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 28 Jun 2024 12:10:11 -0400 Subject: [PATCH 392/736] use vlm_contact_email in create and responses --- seqr/fixtures/1kg_project.json | 4 +++ seqr/models.py | 1 + seqr/views/apis/anvil_workspace_api.py | 1 + seqr/views/apis/anvil_workspace_api_tests.py | 29 ++++++++++++++------ seqr/views/apis/project_api_tests.py | 27 ++++++++++++++---- seqr/views/utils/test_utils.py | 2 +- 6 files changed, 49 insertions(+), 15 deletions(-) diff --git a/seqr/fixtures/1kg_project.json b/seqr/fixtures/1kg_project.json index bad151c995..6f83fda996 100644 --- a/seqr/fixtures/1kg_project.json +++ b/seqr/fixtures/1kg_project.json @@ -20,6 +20,7 @@ "has_case_review": true, "mme_primary_data_owner": "PI", "mme_contact_url": "mailto:test@broadinstitute.org,matchmaker@broadinstitute.org", + "vlm_contact_email": "test@broadinstitute.org,vlm@broadinstitute.org", "last_accessed_date": "2017-09-15T18:15:50.827Z" } }, @@ -41,6 +42,7 @@ "can_view_group": 3, "is_mme_enabled": false, "mme_primary_data_owner": "", + "vlm_contact_email": "vlm@broadinstitute.org", "last_accessed_date": "2017-09-15T18:15:50.827Z" } }, @@ -63,6 +65,7 @@ "is_demo": true, "mme_primary_data_owner": "", "mme_contact_url": "mailto:seqr-test@gmail.com,test@broadinstitute.org", + "vlm_contact_email": "seqr-test@gmail.com,test@broadinstitute.org", "last_accessed_date": "2017-09-15T18:15:50.827Z" } }, @@ -81,6 +84,7 @@ "last_accessed_date": "2017-09-15T18:15:50.827Z", "consent_code": "H", "genome_version": "38", + "vlm_contact_email": "vlm@broadinstitute.org", "workspace_name": "anvil-non-analyst-project 1000 Genomes Demo", "workspace_namespace": "ext-data" } diff --git a/seqr/models.py b/seqr/models.py index 770a37cce1..bda633b9b7 100644 --- a/seqr/models.py +++ b/seqr/models.py @@ -264,6 +264,7 @@ class Meta: 'name', 'description', 'created_date', 'last_modified_date', 'genome_version', 'mme_contact_institution', 'last_accessed_date', 'is_mme_enabled', 'mme_primary_data_owner', 'mme_contact_url', 'guid', 'consent_code', 'workspace_namespace', 'workspace_name', 'has_case_review', 'enable_hgmd', 'is_demo', 'all_user_demo', + 'vlm_contact_email', ] diff --git a/seqr/views/apis/anvil_workspace_api.py b/seqr/views/apis/anvil_workspace_api.py index 281be65beb..9a0f006c14 100644 --- a/seqr/views/apis/anvil_workspace_api.py +++ b/seqr/views/apis/anvil_workspace_api.py @@ -184,6 +184,7 @@ def create_project_from_workspace(request, namespace, name): 'workspace_name': name, 'mme_primary_data_owner': request.user.get_full_name(), 'mme_contact_url': 'mailto:{}'.format(request.user.email), + 'vlm_contact_email': request.user.email, } project = create_model_from_json(Project, project_args, user=request.user) diff --git a/seqr/views/apis/anvil_workspace_api_tests.py b/seqr/views/apis/anvil_workspace_api_tests.py index f24cc485d1..4f000b641b 100644 --- a/seqr/views/apis/anvil_workspace_api_tests.py +++ b/seqr/views/apis/anvil_workspace_api_tests.py @@ -559,14 +559,27 @@ def test_create_project_from_workspace(self): self.assertEqual(response.status_code, 200) project = Project.objects.get(workspace_namespace=TEST_WORKSPACE_NAMESPACE, workspace_name=TEST_NO_PROJECT_WORKSPACE_NAME) response_json = response.json() - self.assertEqual(project.guid, response_json['projectGuid']) - self.assertListEqual( - [project.genome_version, project.description, project.workspace_namespace, project.workspace_name], - ['38', 'A test project', TEST_WORKSPACE_NAMESPACE, TEST_NO_PROJECT_WORKSPACE_NAME]) - - self.assertListEqual( - [project.mme_contact_institution, project.mme_primary_data_owner, project.mme_contact_url], - ['Broad Center for Mendelian Genomics', 'Test Manager User', 'mailto:test_user_manager@test.com']) + self.assertDictEqual({k: getattr(project, k) for k in project._meta.json_fields}, { + 'guid': response_json['projectGuid'], + 'name': TEST_NO_PROJECT_WORKSPACE_NAME, + 'description': 'A test project', + 'workspace_namespace': TEST_WORKSPACE_NAMESPACE, + 'workspace_name': TEST_NO_PROJECT_WORKSPACE_NAME, + 'has_case_review': False, + 'enable_hgmd': False, + 'is_demo': False, + 'all_user_demo': False, + 'consent_code': None, + 'created_date': mock.ANY, + 'last_modified_date': mock.ANY, + 'last_accessed_date': mock.ANY, + 'genome_version': '38', + 'is_mme_enabled': True, + 'mme_contact_institution': 'Broad Center for Mendelian Genomics', + 'mme_primary_data_owner': 'Test Manager User', + 'mme_contact_url': 'mailto:test_user_manager@test.com', + 'vlm_contact_email': 'test_user_manager@test.com', + }) self._assert_valid_operation(project, test_add_data=False) diff --git a/seqr/views/apis/project_api_tests.py b/seqr/views/apis/project_api_tests.py index 3f07dd8526..7c5c821542 100644 --- a/seqr/views/apis/project_api_tests.py +++ b/seqr/views/apis/project_api_tests.py @@ -73,15 +73,30 @@ def test_create_and_delete_project(self, mock_airtable_logger): # check that project was created new_project = Project.objects.get(name='new_project') - self.assertEqual(new_project.description, 'new project description') - self.assertEqual(new_project.genome_version, '38') - self.assertEqual(new_project.consent_code, 'H') - self.assertTrue(new_project.is_demo) - self.assertFalse(new_project.is_mme_enabled) self.assertEqual(new_project.created_by, self.pm_user) self.assertEqual(new_project.projectcategory_set.count(), 0) expected_workspace_name = self.CREATE_PROJECT_JSON.get('workspaceName') - self.assertEqual(new_project.workspace_name, expected_workspace_name) + self.assertDictEqual({k: getattr(new_project, k) for k in new_project._meta.json_fields}, { + 'guid': mock.ANY, + 'name': 'new_project', + 'description': 'new project description', + 'workspace_namespace': self.CREATE_PROJECT_JSON.get('workspaceNamespace'), + 'workspace_name': expected_workspace_name, + 'has_case_review': False, + 'enable_hgmd': False, + 'is_demo': True, + 'all_user_demo': False, + 'consent_code': 'H', + 'created_date': mock.ANY, + 'last_modified_date': mock.ANY, + 'last_accessed_date': mock.ANY, + 'genome_version': '38', + 'is_mme_enabled': False, + 'mme_contact_institution': 'Broad Center for Mendelian Genomics', + 'mme_primary_data_owner': 'Samantha Baxter', + 'mme_contact_url': 'mailto:matchmaker@broadinstitute.org', + 'vlm_contact_email': 'vlm@broadinstitute.org', + }) self._check_created_project_groups(new_project) project_guid = new_project.guid diff --git a/seqr/views/utils/test_utils.py b/seqr/views/utils/test_utils.py index f0ed234d4f..7ff2f6e935 100644 --- a/seqr/views/utils/test_utils.py +++ b/seqr/views/utils/test_utils.py @@ -738,7 +738,7 @@ def _get_list_param(call, param): 'projectGuid', 'projectCategoryGuids', 'canEdit', 'name', 'description', 'createdDate', 'lastModifiedDate', 'lastAccessedDate', 'mmeContactUrl', 'genomeVersion', 'mmePrimaryDataOwner', 'mmeContactInstitution', 'isMmeEnabled', 'workspaceName', 'workspaceNamespace', 'hasCaseReview', 'enableHgmd', 'isDemo', 'allUserDemo', - 'userIsCreator', 'consentCode', 'isAnalystProject', + 'userIsCreator', 'consentCode', 'isAnalystProject', 'vlmContactEmail', } ANALYSIS_GROUP_FIELDS = {'analysisGroupGuid', 'description', 'name', 'projectGuid', 'familyGuids'} From 0c6a3d1b43791f058e90322dac5102a6211191c9 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 28 Jun 2024 13:20:14 -0400 Subject: [PATCH 393/736] edit vlm cotnact --- .../components/buttons/EditProjectButton.jsx | 45 ++++++++++++++++++- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/ui/shared/components/buttons/EditProjectButton.jsx b/ui/shared/components/buttons/EditProjectButton.jsx index f1ca2212b4..3c3bedee63 100644 --- a/ui/shared/components/buttons/EditProjectButton.jsx +++ b/ui/shared/components/buttons/EditProjectButton.jsx @@ -3,6 +3,8 @@ import { connect } from 'react-redux' import PropTypes from 'prop-types' import { updateProject } from 'redux/rootReducer' +import { BaseSemanticInput } from '../form/Inputs' +import { validators } from '../form/FormHelpers' import UpdateButton from './UpdateButton' import { EDITABLE_PROJECT_FIELDS, @@ -11,10 +13,49 @@ import { MATCHMAKER_CONTACT_URL_FIELD, } from '../../utils/constants' -const MATCHMAKER_PROJECT_FIELDS = [ +const setBoolVal = onChange => data => onChange(data.checked ? null : 'vlm@broadinstitute.org') + +const VlmContactInput = ({ value, onChange, ...props }) => ([ + , + , +]) + +VlmContactInput.propTypes = { + value: PropTypes.string, + onChange: PropTypes.func, +} + +const VLM_CONTACT_FIELD = { + name: 'vlmContactEmail', + parse: val => val || null, + format: val => val || '', + validate: value => (!value ? undefined : validators.requiredEmail(value)), + component: VlmContactInput, +} + +const MATCHMAKER_PROJECT_FIELDS = [VLM_CONTACT_FIELD, ...[ { ...MATCHMAKER_CONTACT_NAME_FIELD, name: 'mmePrimaryDataOwner' }, { ...MATCHMAKER_CONTACT_URL_FIELD, name: 'mmeContactUrl' }, -].map(({ label, ...field }) => ({ ...field, label: `Matchmaker ${label}` })) +].map(({ label, ...field }) => ({ ...field, label: `Matchmaker ${label}` }))] // Field mapping based on whether project has matchmaker and user is a PM. Usage: FIELD_LOOKUP[isMmeEnabled][isPm] const FIELD_LOOKUP = { From 1433dedf01dde2e4f88b9a2a4356f121a72aa2b7 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 1 Jul 2024 10:58:01 -0400 Subject: [PATCH 394/736] actually test gregor finding row with notes --- seqr/views/apis/individual_api_tests.py | 14 +++++++------- seqr/views/apis/report_api_tests.py | 10 +++++++--- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/seqr/views/apis/individual_api_tests.py b/seqr/views/apis/individual_api_tests.py index 079275b04a..72f72df621 100644 --- a/seqr/views/apis/individual_api_tests.py +++ b/seqr/views/apis/individual_api_tests.py @@ -994,7 +994,7 @@ def _set_metadata_file_iter(self, mock_subprocess, genetic_findings_table): @mock.patch('seqr.utils.file_utils.subprocess.Popen') def test_import_gregor_metadata(self, mock_subprocess): genetic_findings_table = deepcopy(GENETIC_FINDINGS_TABLE) - genetic_findings_table[2] = genetic_findings_table[2][:11] + genetic_findings_table[3][11:14] + \ + genetic_findings_table[2] = genetic_findings_table[2][:11] + genetic_findings_table[4][11:14] + \ genetic_findings_table[2][14:] self._set_metadata_file_iter(mock_subprocess, genetic_findings_table) @@ -1021,7 +1021,7 @@ def test_import_gregor_metadata(self, mock_subprocess): 'Created 1 new families, 3 new individuals', 'Updated 1 existing families, 1 existing individuals', 'Skipped 0 unchanged individuals', - 'Loaded 3 new and 0 updated findings tags', + 'Loaded 4 new and 0 updated findings tags', ], }}) @@ -1036,7 +1036,7 @@ def test_import_gregor_metadata(self, mock_subprocess): 'metadataTitle': None, 'color': '#c25fc4', 'order': 0.5, - 'numTags': 4, + 'numTags': 5, }) self.assertEqual(len(response_json['familiesByGuid']), 2) @@ -1047,7 +1047,7 @@ def test_import_gregor_metadata(self, mock_subprocess): self.assertDictEqual(response_json['familyTagTypeCounts'], { 'F000012_12': {'GREGoR Finding': 3, 'MME Submission': 2, 'Tier 1 - Novel gene and phenotype': 1}, - new_family_guid: {'GREGoR Finding': 1}, + new_family_guid: {'GREGoR Finding': 2}, }) self.assertEqual(len(response_json['individualsByGuid']), 4) @@ -1126,7 +1126,7 @@ def test_import_gregor_metadata(self, mock_subprocess): 'saved_variant_json__transcripts', 'saved_variant_json__genotypes', 'saved_variant_json__mainTranscriptId', 'saved_variant_json__hgvsc', ) - self.assertEqual(len(saved_variants), 3) + self.assertEqual(len(saved_variants), 4) self.assertDictEqual(saved_variants[0], { 'guid': 'SV0000006_1248367227_r0003_tes', 'variant_id': '1-248367227-TC-T', @@ -1220,12 +1220,12 @@ def test_import_gregor_metadata(self, mock_subprocess): 'Created 0 new families, 0 new individuals', 'Updated 0 existing families, 0 existing individuals', 'Skipped 4 unchanged individuals', - 'Loaded 1 new and 2 updated findings tags', + 'Loaded 1 new and 3 updated findings tags', ], }}) self.assertDictEqual(response_json['individualsByGuid'], {}) - no_gene_saved_variant_json = SavedVariant.objects.get(family__guid=new_family_guid).saved_variant_json + no_gene_saved_variant_json = SavedVariant.objects.get(family__guid=new_family_guid, variant_id='1-248367227-TC-T').saved_variant_json self.assertDictEqual(no_gene_saved_variant_json['transcripts'], {}) self.assertDictEqual(no_gene_saved_variant_json['genotypes'], new_family_genotypes) self.assertNotIn('mainTranscriptId', no_gene_saved_variant_json) diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index ad185e4d6c..a304da2407 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -608,6 +608,10 @@ 'Broad_HG00731_1_248367227', 'Broad_HG00731', 'Broad_exome_VCGS_FAM203_621_D2', 'SNV/INDEL', 'GRCh37', '1', '248367227', 'TC', 'T', 'CA1501729', 'RP11', '', '', '', 'Homozygous', '', 'paternal', '', '', 'Known', '', 'MONDO:0044970', '', 'Uncertain', '', 'Broad_HG00732', 'SR-ES', '', '', '', '', '', '', '', + ], [ + 'Broad_HG00731_19_1912634', 'Broad_HG00731', 'Broad_exome_VCGS_FAM203_621_D2', 'SNV/INDEL', 'GRCh38', '19', + '1912634', 'C', 'T', 'CA403171634', 'OR4G11P', 'ENST00000371839', '', '', 'Heterozygous', '', 'unknown', + 'Broad_HG00731_19_1912633', '', 'Known', '', 'MONDO:0044970', '', 'Full', '', '', 'SR-ES', '', '', '', '', '', '', '', ], [ 'Broad_NA20889_1_248367227', 'Broad_NA20889', '', 'SNV/INDEL', 'GRCh37', '1', '248367227', 'TC', 'T', 'CA1501729', 'OR4G11P', 'ENST00000505820', 'c.3955G>A', 'c.1586-17C>G', 'Heterozygous', '', 'unknown', @@ -1100,9 +1104,9 @@ def _assert_expected_gregor_files(self, mock_open, mock_subprocess, has_second_p self._assert_expected_file( genetic_findings_file, - expected_rows=GENETIC_FINDINGS_TABLE if has_second_project else GENETIC_FINDINGS_TABLE[:3], - absent_rows=None if has_second_project else EXPERIMENT_LOOKUP_TABLE[3:], - additional_calls=3, + expected_rows=GENETIC_FINDINGS_TABLE if has_second_project else GENETIC_FINDINGS_TABLE[:4], + absent_rows=None, + additional_calls=2, ) def _assert_expected_file(self, actual_rows, expected_rows, additional_calls=0, absent_rows=None): From b0678e866c60fda53d14ca0505f4a12b1d277690 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Mon, 1 Jul 2024 12:07:35 -0400 Subject: [PATCH 395/736] static files --- requirements-dev.in | 10 ++-- requirements-dev.txt | 12 ++--- requirements.in | 4 +- requirements.txt | 6 ++- .../static}/fonts/icon-overrides.eot | Bin .../static}/fonts/icon-overrides.svg | 0 .../static}/fonts/icon-overrides.ttf | Bin .../static}/fonts/icon-overrides.woff | Bin .../static}/images/landing_page_icon1.png | Bin .../static}/images/landing_page_icon2.png | Bin .../static}/images/landing_page_icon3.png | Bin .../static}/images/table_excel.png | Bin {static => seqr/static}/images/table_tsv.png | Bin settings.py | 48 +++++++++--------- 14 files changed, 37 insertions(+), 43 deletions(-) rename {static => seqr/static}/fonts/icon-overrides.eot (100%) rename {static => seqr/static}/fonts/icon-overrides.svg (100%) rename {static => seqr/static}/fonts/icon-overrides.ttf (100%) rename {static => seqr/static}/fonts/icon-overrides.woff (100%) rename {static => seqr/static}/images/landing_page_icon1.png (100%) rename {static => seqr/static}/images/landing_page_icon2.png (100%) rename {static => seqr/static}/images/landing_page_icon3.png (100%) rename {static => seqr/static}/images/table_excel.png (100%) rename {static => seqr/static}/images/table_tsv.png (100%) diff --git a/requirements-dev.in b/requirements-dev.in index ff056bf0d8..7d689c6013 100644 --- a/requirements-dev.in +++ b/requirements-dev.in @@ -1,8 +1,8 @@ -c requirements.txt # use the generated reqs as a constraint coverage<5.2 django-compressor -django-debug-toolbar<3.3 # https://github.com/jazzband/django-debug-toolbar -mock # mock objects for unit tests -pip-tools # tool for managing our python dependency tree -responses # mock HTTP responses for unit tests -urllib3-mock # mock urllib3 for tests +django-debug-toolbar==4.4.2 # https://github.com/jazzband/django-debug-toolbar +mock # mock objects for unit tests +pip-tools # tool for managing our python dependency tree +responses # mock HTTP responses for unit tests +urllib3-mock # mock urllib3 for tests diff --git a/requirements-dev.txt b/requirements-dev.txt index 38ec6ac2fc..a1399355e1 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -22,7 +22,7 @@ click==8.1.3 # via pip-tools coverage==5.1 # via -r requirements-dev.in -django==3.2.25 +django==4.2.13 # via # -c requirements.txt # django-appconf @@ -31,7 +31,7 @@ django-appconf==1.0.5 # via django-compressor django-compressor==4.3.1 # via -r requirements-dev.in -django-debug-toolbar==3.2.4 +django-debug-toolbar==4.4.2 # via -r requirements-dev.in idna==3.7 # via @@ -47,10 +47,6 @@ pip-tools==6.12.2 # via -r requirements-dev.in pyproject-hooks==1.0.0 # via build -pytz==2022.7.1 - # via - # -c requirements.txt - # django rcssmin==1.1.1 # via django-compressor requests==2.32.2 @@ -69,9 +65,7 @@ sqlparse==0.5.0 toml==0.10.2 # via responses tomli==2.0.1 - # via - # build - # pyproject-hooks + # via build types-toml==0.10.8.5 # via responses urllib3==1.26.19 diff --git a/requirements.in b/requirements.in index 4a76de1a1f..602875d970 100644 --- a/requirements.in +++ b/requirements.in @@ -1,4 +1,4 @@ -Django==4.2 # core server-side framework +Django==4.2.13 # core server-side framework django-anymail # for sending emails using cloud-based mail service providers django-csp # for setting CSP headers django-guardian # object-level permissions for database records. Behind a major version due to missing Python 2 support @@ -14,7 +14,7 @@ gunicorn # web server jmespath openpyxl # library for reading/writing Excel files pillow # required dependency of Djagno ImageField-type database records -psycopg2 # postgres database access +psycopg # postgres database access pyliftover # GRCh37/GRCh38 liftover requests # simpler way to make http requests redis<4.6 # client lib for the redis in-memory database - used for caching server-side objects diff --git a/requirements.txt b/requirements.txt index 83322e1a7a..f099f1b249 100644 --- a/requirements.txt +++ b/requirements.txt @@ -26,7 +26,7 @@ defusedxml==0.7.1 # via # python3-openid # social-auth-core -django==4.2 +django==4.2.13 # via # -r requirements.in # django-anymail @@ -107,7 +107,7 @@ protobuf==3.20.2 # via # google-api-core # googleapis-common-protos -psycopg2==2.9.5 +psycopg==3.2.1 # via -r requirements.in pyasn1==0.4.8 # via @@ -173,6 +173,8 @@ tenacity==8.3.0 # via -r requirements.in tqdm==4.66.3 # via -r requirements.in +typing-extensions==4.12.2 + # via psycopg urllib3==1.26.19 # via # elasticsearch diff --git a/static/fonts/icon-overrides.eot b/seqr/static/fonts/icon-overrides.eot similarity index 100% rename from static/fonts/icon-overrides.eot rename to seqr/static/fonts/icon-overrides.eot diff --git a/static/fonts/icon-overrides.svg b/seqr/static/fonts/icon-overrides.svg similarity index 100% rename from static/fonts/icon-overrides.svg rename to seqr/static/fonts/icon-overrides.svg diff --git a/static/fonts/icon-overrides.ttf b/seqr/static/fonts/icon-overrides.ttf similarity index 100% rename from static/fonts/icon-overrides.ttf rename to seqr/static/fonts/icon-overrides.ttf diff --git a/static/fonts/icon-overrides.woff b/seqr/static/fonts/icon-overrides.woff similarity index 100% rename from static/fonts/icon-overrides.woff rename to seqr/static/fonts/icon-overrides.woff diff --git a/static/images/landing_page_icon1.png b/seqr/static/images/landing_page_icon1.png similarity index 100% rename from static/images/landing_page_icon1.png rename to seqr/static/images/landing_page_icon1.png diff --git a/static/images/landing_page_icon2.png b/seqr/static/images/landing_page_icon2.png similarity index 100% rename from static/images/landing_page_icon2.png rename to seqr/static/images/landing_page_icon2.png diff --git a/static/images/landing_page_icon3.png b/seqr/static/images/landing_page_icon3.png similarity index 100% rename from static/images/landing_page_icon3.png rename to seqr/static/images/landing_page_icon3.png diff --git a/static/images/table_excel.png b/seqr/static/images/table_excel.png similarity index 100% rename from static/images/table_excel.png rename to seqr/static/images/table_excel.png diff --git a/static/images/table_tsv.png b/seqr/static/images/table_tsv.png similarity index 100% rename from static/images/table_tsv.png rename to seqr/static/images/table_tsv.png diff --git a/settings.py b/settings.py index 5598d9dadd..2080bf7a8d 100644 --- a/settings.py +++ b/settings.py @@ -2,7 +2,7 @@ import os import random import string -import subprocess # nosec +import subprocess # nosec from ssl import create_default_context @@ -16,7 +16,7 @@ # Django settings ######################################################### -# Password validation - https://docs.djangoproject.com/en/1.10/ref/settings/#auth-password-validators +# Password validation - https://docs.djangoproject.com/en/4.2/ref/settings/#auth-password-validators AUTH_PASSWORD_VALIDATORS = [ { 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', @@ -71,7 +71,7 @@ CSRF_COOKIE_NAME = 'csrf_token' CSRF_COOKIE_HTTPONLY = False -SESSION_COOKIE_AGE = 86400 # seconds in 1 day +SESSION_COOKIE_AGE = 86400 # seconds in 1 day X_FRAME_OPTIONS = 'SAMEORIGIN' SECURE_BROWSER_XSS_FILTER = True @@ -127,10 +127,28 @@ USE_L10N = True USE_TZ = True +# Static files (CSS, JavaScript, Images) +# https://docs.djangoproject.com/en/4.2/howto/static-files/ +STATIC_URL = '/static/' +STATICFILES_DIRS = ['ui/dist'] +STATIC_ROOT = os.path.join(BASE_DIR, 'static') +STATICFILES_FINDERS = ( + 'django.contrib.staticfiles.finders.FileSystemFinder', + 'django.contrib.staticfiles.finders.AppDirectoriesFinder', +) +STORAGES = { + "staticfiles": { + "BACKEND": "whitenoise.storage.CompressedManifestStaticFilesStorage", + }, +} + # If specified, store data in the named GCS bucket and use the gcloud storage backend. # Else, fall back to a path on the local filesystem. GCS_MEDIA_ROOT_BUCKET = os.environ.get('GCS_MEDIA_ROOT_BUCKET') if GCS_MEDIA_ROOT_BUCKET: + STORAGES['default'] = { + "BACKEND": "storages.backends.gcloud.GoogleCloudStorage", + } DEFAULT_FILE_STORAGE = 'storages.backends.gcloud.GoogleCloudStorage' GS_BUCKET_NAME = GCS_MEDIA_ROOT_BUCKET GS_DEFAULT_ACL = 'publicRead' @@ -209,7 +227,7 @@ LOGOUT_URL = '/logout' POSTGRES_DB_CONFIG = { - 'ENGINE': 'django.db.backends.postgresql_psycopg2', + 'ENGINE': 'django.db.backends.postgresql', 'HOST': os.environ.get('POSTGRES_SERVICE_HOSTNAME', 'localhost'), 'PORT': int(os.environ.get('POSTGRES_SERVICE_PORT', '5432')), 'USER': os.environ.get('POSTGRES_USERNAME', 'postgres'), @@ -252,10 +270,6 @@ 'http://localhost:3000', 'http://localhost:8000', ) - # TODO: ? - # the collectstatic step in docker build runs without env variables set, and uncommenting these lines breaks the docker build - # STATICFILES_DIRS.append(STATIC_ROOT) - # STATIC_ROOT = None CORS_ALLOW_CREDENTIALS = True CORS_REPLACE_HTTPS_REFERER = True # django-hijack plugin @@ -264,22 +278,6 @@ HIJACK_LOGIN_REDIRECT_URL = '/' TEMPLATE_DIRS.append('ui') - -# Static files (CSS, JavaScript, Images) -# https://docs.djangoproject.com/en/4.2/howto/static-files/ -STATIC_URL = '/static/' -STATICFILES_DIRS = ['ui/dist'] -if DEBUG: - STATICFILES_DIRS.append(os.path.join(BASE_DIR, 'static')) -else: - STATIC_ROOT = os.path.join(BASE_DIR, 'static') - -STATICFILES_FINDERS = ( - 'django.contrib.staticfiles.finders.FileSystemFinder', - 'django.contrib.staticfiles.finders.AppDirectoriesFinder', -) - - TEMPLATES = [ { 'BACKEND': 'django.template.backends.django.DjangoTemplates', @@ -289,7 +287,7 @@ 'context_processors': [ 'django.contrib.auth.context_processors.auth', 'django.contrib.messages.context_processors.messages', # required for admin template - 'django.template.context_processors.request', # must be enabled in DjangoTemplates (TEMPLATES) in order to use the admin navigation sidebar + 'django.template.context_processors.request', # must be enabled in DjangoTemplates (TEMPLATES) in order to use the admin navigation sidebar 'social_django.context_processors.backends', # required for social_auth, same for below 'social_django.context_processors.login_redirect', ], From 26e0ff3eb5522ea059dbdd975f7ec15e3379b85d Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Mon, 1 Jul 2024 12:27:09 -0400 Subject: [PATCH 396/736] outputfield --- seqr/migrations/0024_varianttag_metadata.py | 7 ++++++- settings.py | 1 - 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/seqr/migrations/0024_varianttag_metadata.py b/seqr/migrations/0024_varianttag_metadata.py index 8031a4c29f..e522320cd4 100644 --- a/seqr/migrations/0024_varianttag_metadata.py +++ b/seqr/migrations/0024_varianttag_metadata.py @@ -2,6 +2,7 @@ from collections import defaultdict from django.contrib.postgres.aggregates import StringAgg from django.db import migrations, models +from django.db.models import TextField from django.db.models.functions import Concat from django.utils import timezone from seqr.utils.logging_utils import log_model_update, log_model_bulk_update, SeqrLogger @@ -120,7 +121,11 @@ def merge_duplicate_tags(apps, schema_editor): db_alias = schema_editor.connection.alias updated_tags = VariantTag.objects.using(db_alias).filter(variant_tag_type__name__in=SANGER_TAGS.values()).annotate( - group_id=Concat('variant_tag_type__guid', StringAgg('saved_variants__guid', ',', ordering='saved_variants__guid'))) + group_id=Concat( + 'variant_tag_type__guid', + StringAgg('saved_variants__guid', ',', ordering='saved_variants__guid'), + output_field=TextField() + )) if not updated_tags: logger.info('No updated tags found, skipping validation tag merging', user=None) return diff --git a/settings.py b/settings.py index 2080bf7a8d..a29165845c 100644 --- a/settings.py +++ b/settings.py @@ -149,7 +149,6 @@ STORAGES['default'] = { "BACKEND": "storages.backends.gcloud.GoogleCloudStorage", } - DEFAULT_FILE_STORAGE = 'storages.backends.gcloud.GoogleCloudStorage' GS_BUCKET_NAME = GCS_MEDIA_ROOT_BUCKET GS_DEFAULT_ACL = 'publicRead' MEDIA_ROOT = False From ba90486ffeda07bb3fab94d3629114093f460d42 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Mon, 1 Jul 2024 12:48:00 -0400 Subject: [PATCH 397/736] fix terra api tests --- seqr/views/utils/terra_api_utils.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/seqr/views/utils/terra_api_utils.py b/seqr/views/utils/terra_api_utils.py index 4a81f15c18..dbda82aee8 100644 --- a/seqr/views/utils/terra_api_utils.py +++ b/seqr/views/utils/terra_api_utils.py @@ -73,8 +73,10 @@ def is_google_authenticated(user): def remove_token(user): social = _safe_get_social(user) if social and social.extra_data: - social.extra_data.pop('access_token', None) - social.extra_data['expires'] = 0 + extra_data = json.loads(social.extra_data) + extra_data.pop('access_token', None) + extra_data['expires'] = 0 + social.extra_data = extra_data social.save() @@ -84,7 +86,7 @@ def is_anvil_authenticated(user): social = _safe_get_social(user) if social and social.extra_data: - return social.extra_data.get('access_token', '') != '' + return json.loads(social.extra_data).get('access_token', '') != '' return False @@ -109,7 +111,8 @@ def _safe_get_social(user): def _get_social_access_token(user): social = _safe_get_social(user) - if (social.extra_data['auth_time'] + social.extra_data['expires'] - 10) <= int( + extra_data = json.loads(social.extra_data) + if (extra_data['auth_time'] + extra_data['expires'] - 10) <= int( time.time()): # token expired or expiring? strategy = load_strategy() logger.info('Refreshing access token', user) @@ -118,7 +121,7 @@ def _get_social_access_token(user): except Exception as ee: logger.warning('Refresh token failed. {}'.format(str(ee)), user) raise TerraRefreshTokenFailedException('Refresh token failed. {}'.format(str(ee))) - return social.extra_data['access_token'] + return extra_data['access_token'] def _get_service_account_access_token(): From 288db1293e537cb8ca587439d33bbf35fcd6bd41 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Mon, 1 Jul 2024 12:53:04 -0400 Subject: [PATCH 398/736] fix superuser tests --- settings.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/settings.py b/settings.py index a29165845c..a3e7e502f5 100644 --- a/settings.py +++ b/settings.py @@ -136,18 +136,15 @@ 'django.contrib.staticfiles.finders.FileSystemFinder', 'django.contrib.staticfiles.finders.AppDirectoriesFinder', ) -STORAGES = { - "staticfiles": { - "BACKEND": "whitenoise.storage.CompressedManifestStaticFilesStorage", - }, -} # If specified, store data in the named GCS bucket and use the gcloud storage backend. # Else, fall back to a path on the local filesystem. GCS_MEDIA_ROOT_BUCKET = os.environ.get('GCS_MEDIA_ROOT_BUCKET') if GCS_MEDIA_ROOT_BUCKET: - STORAGES['default'] = { - "BACKEND": "storages.backends.gcloud.GoogleCloudStorage", + STORAGES = { + "default": { + "BACKEND": "storages.backends.gcloud.GoogleCloudStorage", + } } GS_BUCKET_NAME = GCS_MEDIA_ROOT_BUCKET GS_DEFAULT_ACL = 'publicRead' From 6bbf0b89dc8efbf3593d79d2cb26f64d4de1ee80 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Mon, 1 Jul 2024 13:28:08 -0400 Subject: [PATCH 399/736] fix social a better way --- seqr/views/utils/terra_api_utils.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/seqr/views/utils/terra_api_utils.py b/seqr/views/utils/terra_api_utils.py index dbda82aee8..46d7ab80e7 100644 --- a/seqr/views/utils/terra_api_utils.py +++ b/seqr/views/utils/terra_api_utils.py @@ -73,10 +73,8 @@ def is_google_authenticated(user): def remove_token(user): social = _safe_get_social(user) if social and social.extra_data: - extra_data = json.loads(social.extra_data) - extra_data.pop('access_token', None) - extra_data['expires'] = 0 - social.extra_data = extra_data + social.extra_data.pop('access_token', None) + social.extra_data['expires'] = 0 social.save() @@ -86,7 +84,7 @@ def is_anvil_authenticated(user): social = _safe_get_social(user) if social and social.extra_data: - return json.loads(social.extra_data).get('access_token', '') != '' + return social.extra_data.get('access_token', '') != '' return False @@ -105,14 +103,19 @@ def _safe_get_social(user): if not google_auth_enabled() or not hasattr(user, 'social_auth'): return None - social = user.social_auth.filter(provider=SOCIAL_AUTH_PROVIDER) - return social.first() if social else None + social_auth = user.social_auth.filter(provider=SOCIAL_AUTH_PROVIDER) + if not social_auth: + return None + + social = social_auth.first() + if type(social.extra_data) is str: # JSONField extra_data is returned as a string + social.extra_data = json.loads(social.extra_data) + return social def _get_social_access_token(user): social = _safe_get_social(user) - extra_data = json.loads(social.extra_data) - if (extra_data['auth_time'] + extra_data['expires'] - 10) <= int( + if (social.extra_data['auth_time'] + social.extra_data['expires'] - 10) <= int( time.time()): # token expired or expiring? strategy = load_strategy() logger.info('Refreshing access token', user) @@ -121,7 +124,7 @@ def _get_social_access_token(user): except Exception as ee: logger.warning('Refresh token failed. {}'.format(str(ee)), user) raise TerraRefreshTokenFailedException('Refresh token failed. {}'.format(str(ee))) - return extra_data['access_token'] + return social.extra_data['access_token'] def _get_service_account_access_token(): From 6891bf4f542ea04aa27ae01b5d0a6b91fdcde39f Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 1 Jul 2024 13:59:21 -0400 Subject: [PATCH 400/736] include notes in gregor export --- seqr/views/apis/report_api.py | 3 +- seqr/views/apis/report_api_tests.py | 9 +++-- seqr/views/utils/anvil_metadata_utils.py | 50 +++++++++++++----------- 3 files changed, 35 insertions(+), 27 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 6a6948a0fb..12382aae14 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -400,6 +400,7 @@ def _add_row(row, family_id, row_type): format_id=_format_gregor_id, get_additional_individual_fields=_get_participant_row, post_process_variant=_post_process_gregor_variant, + include_parent_mnvs=True, # TODO flag needed? include_svs=False, airtable_fields=[[PARTICIPANT_ID_FIELD, 'Recontactable'], [SMID_FIELD]], include_mondo=True, @@ -576,7 +577,7 @@ def _get_phenotype_row(feature): } -def _post_process_gregor_variant(row, gene_variants, **kwargs): +def _post_process_gregor_variant(row, gene_variants): return {'linked_variant': next( v['genetic_findings_id'] for v in gene_variants if v['genetic_findings_id'] != row['genetic_findings_id'] ) if len(gene_variants) > 1 else None} diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index a304da2407..c91bb4e3cf 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -611,7 +611,9 @@ ], [ 'Broad_HG00731_19_1912634', 'Broad_HG00731', 'Broad_exome_VCGS_FAM203_621_D2', 'SNV/INDEL', 'GRCh38', '19', '1912634', 'C', 'T', 'CA403171634', 'OR4G11P', 'ENST00000371839', '', '', 'Heterozygous', '', 'unknown', - 'Broad_HG00731_19_1912633', '', 'Known', '', 'MONDO:0044970', '', 'Full', '', '', 'SR-ES', '', '', '', '', '', '', '', + 'Broad_HG00731_19_1912633', '', 'Known', '', 'MONDO:0044970', '', 'Full', '', '', 'SR-ES', + 'The following variants are part of the multinucleotide variant 19-1912632-GC-TT (c.586_587delinsTT, p.Ala196Leu): 19-1912633-G-T, 19-1912634-C-T', + '', '', '', '', '', '', ], [ 'Broad_NA20889_1_248367227', 'Broad_NA20889', '', 'SNV/INDEL', 'GRCh37', '1', '248367227', 'TC', 'T', 'CA1501729', 'OR4G11P', 'ENST00000505820', 'c.3955G>A', 'c.1586-17C>G', 'Heterozygous', '', 'unknown', @@ -912,8 +914,9 @@ def test_gregor_export(self, mock_subprocess, mock_temp_dir, mock_open, mock_dat ], [ 'Broad_HG00731_19_1912634', 'Broad_HG00731', 'Broad_exome_VCGS_FAM203_621_D2', 'SNV/INDEL', 'GRCh38', '19', '1912634', 'C', 'T', 'CA403171634', 'OR4G11P', 'ENST00000371839', '', '', 'Heterozygous', '', 'unknown', - 'Broad_HG00731_19_1912633', '', 'Known', '', 'MONDO:0044970', '', 'Full', '', '', 'SR-ES', '', '', '', '', - '', '', '', + 'Broad_HG00731_19_1912633', '', 'Known', '', 'MONDO:0044970', '', 'Full', '', '', 'SR-ES', + 'The following variants are part of the multinucleotide variant 19-1912632-GC-TT (c.586_587delinsTT, p.Ala196Leu): 19-1912633-G-T, 19-1912634-C-T', + '', '', '', '', '', '', ]], additional_calls=2) responses.calls.reset() diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index 77f0f3b4dc..d71bbb9cb0 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -305,23 +305,17 @@ def _get_genotype_zygosity(genotype): return None -def _post_process_variant_metadata(v, gene_variants, include_parent_mnvs=False): - discovery_notes = None - if len(gene_variants) > 2: - parent_mnv = next((v for v in gene_variants if len(v['individual_genotype']) == 1), gene_variants[0]) - if parent_mnv['genetic_findings_id'] == v['genetic_findings_id'] and not include_parent_mnvs: - return None - variant_type = 'complex structural' if parent_mnv.get('svType') else 'multinucleotide' - parent_name = _get_nested_variant_name(parent_mnv) - parent_details = [parent_mnv[key] for key in ['hgvsc', 'hgvsp'] if parent_mnv.get(key)] - parent = f'{parent_name} ({", ".join(parent_details)})' if parent_details else parent_name - mnv_names = [_get_nested_variant_name(v) for v in gene_variants] - nested_mnvs = sorted([v for v in mnv_names if v != parent_name]) - discovery_notes = f'The following variants are part of the {variant_type} variant {parent}: {", ".join(nested_mnvs)}' - return { - 'sv_name': _get_sv_name(v), - 'notes': discovery_notes, - } +def _get_discovery_notes(v, gene_variants, include_parent_mnvs): + parent_mnv = next((v for v in gene_variants if len(v['individual_genotype']) == 1), gene_variants[0]) + if parent_mnv['genetic_findings_id'] == v['genetic_findings_id'] and not include_parent_mnvs: + return None + variant_type = 'complex structural' if parent_mnv.get('svType') else 'multinucleotide' + parent_name = _get_nested_variant_name(parent_mnv) + parent_details = [parent_mnv[key] for key in ['hgvsc', 'hgvsp'] if parent_mnv.get(key)] + parent = f'{parent_name} ({", ".join(parent_details)})' if parent_details else parent_name + mnv_names = [_get_nested_variant_name(v) for v in gene_variants] + nested_mnvs = sorted([v for v in mnv_names if v != parent_name]) + return f'The following variants are part of the {variant_type} variant {parent}: {", ".join(nested_mnvs)}' def _get_parsed_saved_discovery_variants_by_family( @@ -376,6 +370,10 @@ def _get_parsed_saved_discovery_variants_by_family( parsed_variant.update({ 'seqr_chosen_consequence': main_transcript.get('majorConsequence'), }) + if include_svs: + parsed_variant.update({ + 'sv_name': _get_sv_name(parsed_variant), + }) variants.append(parsed_variant) genes_by_id = get_genes(gene_ids) @@ -510,12 +508,18 @@ def _get_genetic_findings_rows(rows: list[dict], individual: Individual, partici to_remove = [] for row in parsed_rows: del row['genotypes'] - process_func = post_process_variant or _post_process_variant_metadata - update = process_func(row, variants_by_gene[row[GENE_COLUMN]], include_parent_mnvs=include_parent_mnvs) - if update: - row.update(update) - else: - to_remove.append(row) + + gene_variants = variants_by_gene[row[GENE_COLUMN]] + discovery_notes = None + if len(gene_variants) > 2: + discovery_notes = _get_discovery_notes(row, gene_variants, include_parent_mnvs) + if discovery_notes is None: + to_remove.append(row) + continue + row['notes'] = discovery_notes + + if post_process_variant: + row.update(post_process_variant(row, gene_variants)) return [row for row in parsed_rows if row not in to_remove] From 2a00ffc93b9678adfae838b927a80975bbb2de14 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Mon, 1 Jul 2024 14:00:39 -0400 Subject: [PATCH 401/736] use isinstance --- seqr/views/utils/terra_api_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seqr/views/utils/terra_api_utils.py b/seqr/views/utils/terra_api_utils.py index 46d7ab80e7..314bbc4fec 100644 --- a/seqr/views/utils/terra_api_utils.py +++ b/seqr/views/utils/terra_api_utils.py @@ -108,7 +108,7 @@ def _safe_get_social(user): return None social = social_auth.first() - if type(social.extra_data) is str: # JSONField extra_data is returned as a string + if isinstance(social.extra_data, str): # JSONField extra_data is returned as a string social.extra_data = json.loads(social.extra_data) return social From 38a58dc82a5723f590359c3ace4c0530793c5287 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Mon, 1 Jul 2024 14:01:59 -0400 Subject: [PATCH 402/736] Support only a single callset path (#4192) --- seqr/views/apis/anvil_workspace_api_tests.py | 4 ++-- seqr/views/apis/data_manager_api_tests.py | 4 +--- seqr/views/utils/airflow_utils.py | 2 +- seqr/views/utils/test_utils.py | 2 +- 4 files changed, 5 insertions(+), 7 deletions(-) diff --git a/seqr/views/apis/anvil_workspace_api_tests.py b/seqr/views/apis/anvil_workspace_api_tests.py index f24cc485d1..b29c40183f 100644 --- a/seqr/views/apis/anvil_workspace_api_tests.py +++ b/seqr/views/apis/anvil_workspace_api_tests.py @@ -741,7 +741,7 @@ def _assert_valid_operation(self, project, test_add_data=True): dag_json = { 'projects_to_run': [project.guid], - 'callset_paths': ['gs://test_bucket/test_path.vcf'], + 'callset_path': 'gs://test_bucket/test_path.vcf', 'sample_source': 'AnVIL', 'sample_type': 'WES', 'reference_genome': genome_version, @@ -824,7 +824,7 @@ def _test_mv_file_and_triggering_dag_exception(self, url, workspace, sample_data dag_id=self.DAG_NAME, dag=json.dumps({ 'projects_to_run': [project.guid], - 'callset_paths': ['gs://test_bucket/test_path.vcf'], + 'callset_path': 'gs://test_bucket/test_path.vcf', 'sample_source': 'AnVIL', 'sample_type': 'WES', 'reference_genome': genome_version, diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index 881edc5dd7..f2d53ef5c3 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -1588,9 +1588,7 @@ def test_load_data(self, mock_subprocess, mock_temp_dir, mock_open): "R0001_1kg", "R0004_non_analyst_project" ], - "callset_paths": [ - "gs://test_bucket/mito_callset.mt" - ], + "callset_path": "gs://test_bucket/mito_callset.mt", "sample_source": "Broad_Internal", "sample_type": "WGS", "reference_genome": "GRCh38" diff --git a/seqr/views/utils/airflow_utils.py b/seqr/views/utils/airflow_utils.py index 1e7c0c11b3..7f27cbf4d5 100644 --- a/seqr/views/utils/airflow_utils.py +++ b/seqr/views/utils/airflow_utils.py @@ -34,7 +34,7 @@ def trigger_data_loading(projects: list[Project], sample_type: str, dataset_type project_guids = sorted([p.guid for p in projects]) updated_variables = { 'projects_to_run': project_guids, - 'callset_paths': [data_path], + 'callset_path': data_path, 'sample_source': 'Broad_Internal' if is_internal else 'AnVIL', 'sample_type': sample_type, 'reference_genome': GENOME_VERSION_LOOKUP[genome_version], diff --git a/seqr/views/utils/test_utils.py b/seqr/views/utils/test_utils.py index f0ed234d4f..93ab1f312d 100644 --- a/seqr/views/utils/test_utils.py +++ b/seqr/views/utils/test_utils.py @@ -659,7 +659,7 @@ def assert_airflow_calls(self, trigger_error=False, additional_tasks_check=False dag_variable_overrides = self._get_dag_variable_overrides(additional_tasks_check) dag_variables = { 'projects_to_run': [dag_variable_overrides['project']] if 'project' in dag_variable_overrides else self.PROJECTS, - 'callset_paths': [f'gs://test_bucket/{dag_variable_overrides["callset_path"]}'], + 'callset_path': f'gs://test_bucket/{dag_variable_overrides["callset_path"]}', 'sample_source': dag_variable_overrides['sample_source'], 'sample_type': dag_variable_overrides['sample_type'], 'reference_genome': dag_variable_overrides.get('reference_genome', 'GRCh38'), From 735c70398c081fd1165d5ac50bacbeca636a6ffd Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Mon, 1 Jul 2024 14:09:15 -0400 Subject: [PATCH 403/736] try setting --- seqr/views/utils/terra_api_utils.py | 10 ++-------- settings.py | 1 + 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/seqr/views/utils/terra_api_utils.py b/seqr/views/utils/terra_api_utils.py index 314bbc4fec..4a81f15c18 100644 --- a/seqr/views/utils/terra_api_utils.py +++ b/seqr/views/utils/terra_api_utils.py @@ -103,14 +103,8 @@ def _safe_get_social(user): if not google_auth_enabled() or not hasattr(user, 'social_auth'): return None - social_auth = user.social_auth.filter(provider=SOCIAL_AUTH_PROVIDER) - if not social_auth: - return None - - social = social_auth.first() - if isinstance(social.extra_data, str): # JSONField extra_data is returned as a string - social.extra_data = json.loads(social.extra_data) - return social + social = user.social_auth.filter(provider=SOCIAL_AUTH_PROVIDER) + return social.first() if social else None def _get_social_access_token(user): diff --git a/settings.py b/settings.py index a3e7e502f5..9c3a07e613 100644 --- a/settings.py +++ b/settings.py @@ -376,6 +376,7 @@ ######################################################### # Social auth specific settings ######################################################### +SOCIAL_AUTH_JSONFIELD_ENABLED = True SOCIAL_AUTH_GOOGLE_OAUTH2_IGNORE_DEFAULT_SCOPE = True SOCIAL_AUTH_GOOGLE_OAUTH2_SCOPE = [ 'https://www.googleapis.com/auth/userinfo.profile', From edaaf47a3b25be1f43be2b78210dd9b9dd178b46 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 1 Jul 2024 14:11:38 -0400 Subject: [PATCH 404/736] default include parent mnvs --- seqr/views/apis/report_api.py | 4 +--- seqr/views/utils/anvil_metadata_utils.py | 12 ++++++------ 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 12382aae14..fbc185b835 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -160,7 +160,7 @@ def _add_row(row, family_id, row_type): max_loaded_date = request.GET.get('loadedBefore') or (datetime.now() - timedelta(days=365)).strftime('%Y-%m-%d') parse_anvil_metadata( - [project], request.user, _add_row, max_loaded_date=max_loaded_date, include_discovery_sample_id=True, + [project], request.user, _add_row, max_loaded_date=max_loaded_date, include_discovery_sample_id=True, omit_parent_mnvs=True, get_additional_individual_fields=lambda individual, airtable_metadata, has_dbgap_submission, *args: { 'congenital_status': Individual.ONSET_AGE_LOOKUP[individual.onset_age] if individual.onset_age else 'Unknown', **anvil_export_airtable_fields(airtable_metadata, has_dbgap_submission), @@ -400,7 +400,6 @@ def _add_row(row, family_id, row_type): format_id=_format_gregor_id, get_additional_individual_fields=_get_participant_row, post_process_variant=_post_process_gregor_variant, - include_parent_mnvs=True, # TODO flag needed? include_svs=False, airtable_fields=[[PARTICIPANT_ID_FIELD, 'Recontactable'], [SMID_FIELD]], include_mondo=True, @@ -951,7 +950,6 @@ def _add_row(row, family_id, row_type): include_mondo=True, omit_airtable=True, proband_only_variants=True, - include_parent_mnvs=True, ) return create_json_response({'rows': variant_rows}) diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index d71bbb9cb0..2bb27b54f4 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -165,7 +165,7 @@ def parse_anvil_metadata( airtable_fields: Iterable[str] = None, mme_value: Aggregate = None, include_svs: bool = True, variant_json_fields: Iterable[str] = None, variant_attr_fields: Iterable[str] = None, post_process_variant: Callable[[dict, list[dict]], dict] = None, include_no_individual_families: bool = False, omit_airtable: bool = False, include_family_name_display: bool = False, include_family_sample_metadata: bool = False, - include_discovery_sample_id: bool = False, include_mondo: bool = False, include_parent_mnvs: bool = False, + include_discovery_sample_id: bool = False, include_mondo: bool = False, omit_parent_mnvs: bool = False, proband_only_variants: bool = False): individual_samples = individual_samples or (_get_loaded_before_date_project_individual_samples(projects, max_loaded_date) \ @@ -253,7 +253,7 @@ def parse_anvil_metadata( continue discovery_row = _get_genetic_findings_rows( saved_variants, individual, participant_id=participant_id, - format_id=format_id, include_parent_mnvs=include_parent_mnvs, + format_id=format_id, omit_parent_mnvs=omit_parent_mnvs, individual_data_types=(individual_data_types or {}).get(participant_id), family_individuals=family_individuals if proband_only_variants else None, sample=sample if include_discovery_sample_id else None, @@ -305,9 +305,9 @@ def _get_genotype_zygosity(genotype): return None -def _get_discovery_notes(v, gene_variants, include_parent_mnvs): +def _get_discovery_notes(variant, gene_variants, omit_parent_mnvs): parent_mnv = next((v for v in gene_variants if len(v['individual_genotype']) == 1), gene_variants[0]) - if parent_mnv['genetic_findings_id'] == v['genetic_findings_id'] and not include_parent_mnvs: + if parent_mnv['genetic_findings_id'] == variant['genetic_findings_id'] and omit_parent_mnvs: return None variant_type = 'complex structural' if parent_mnv.get('svType') else 'multinucleotide' parent_name = _get_nested_variant_name(parent_mnv) @@ -470,7 +470,7 @@ def _get_sample_row(sample, participant_id, has_dbgap_submission, airtable_metad def _get_genetic_findings_rows(rows: list[dict], individual: Individual, participant_id: str, individual_data_types: Iterable[str], family_individuals: dict[str, str], post_process_variant: Callable[[dict, list[dict]], dict], - format_id: Callable[[str], str], include_parent_mnvs: bool, sample: Sample) -> list[dict]: + format_id: Callable[[str], str], omit_parent_mnvs: bool, sample: Sample) -> list[dict]: parsed_rows = [] variants_by_gene = defaultdict(list) for row in (rows or []): @@ -512,7 +512,7 @@ def _get_genetic_findings_rows(rows: list[dict], individual: Individual, partici gene_variants = variants_by_gene[row[GENE_COLUMN]] discovery_notes = None if len(gene_variants) > 2: - discovery_notes = _get_discovery_notes(row, gene_variants, include_parent_mnvs) + discovery_notes = _get_discovery_notes(row, gene_variants, omit_parent_mnvs) if discovery_notes is None: to_remove.append(row) continue From 76379aa62c342c59c37ebac5547efc28bb21800d Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Mon, 1 Jul 2024 14:43:50 -0400 Subject: [PATCH 405/736] revert --- requirements.in | 2 +- requirements.txt | 2 +- seqr/views/utils/terra_api_utils.py | 10 ++++++++-- settings.py | 1 - 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/requirements.in b/requirements.in index 602875d970..3ba3fe97b2 100644 --- a/requirements.in +++ b/requirements.in @@ -5,7 +5,7 @@ django-guardian # object-level permissions for database record django-hijack # allows admins to login as other user django-notifications-hq # notification app django-cors-headers # allows CORS requests for client-side development -django-storages[google]==1.11.1 # alternative GCS storage backend for the django media_root +django-storages[google]==1.14.3 # alternative GCS storage backend for the django media_root social-auth-app-django # the package for Django to authenticate users with social medieas social-auth-core # the Python social authentication package. Required by social-auth-app-django elasticsearch==7.9.1 # elasticsearch client diff --git a/requirements.txt b/requirements.txt index f099f1b249..5066b63a47 100644 --- a/requirements.txt +++ b/requirements.txt @@ -52,7 +52,7 @@ django-model-utils==4.3.1 # via django-notifications-hq django-notifications-hq==1.8.3 # via -r requirements.in -django-storages[google]==1.11.1 +django-storages[google]==1.14.3 # via -r requirements.in elasticsearch==7.9.1 # via diff --git a/seqr/views/utils/terra_api_utils.py b/seqr/views/utils/terra_api_utils.py index 4a81f15c18..314bbc4fec 100644 --- a/seqr/views/utils/terra_api_utils.py +++ b/seqr/views/utils/terra_api_utils.py @@ -103,8 +103,14 @@ def _safe_get_social(user): if not google_auth_enabled() or not hasattr(user, 'social_auth'): return None - social = user.social_auth.filter(provider=SOCIAL_AUTH_PROVIDER) - return social.first() if social else None + social_auth = user.social_auth.filter(provider=SOCIAL_AUTH_PROVIDER) + if not social_auth: + return None + + social = social_auth.first() + if isinstance(social.extra_data, str): # JSONField extra_data is returned as a string + social.extra_data = json.loads(social.extra_data) + return social def _get_social_access_token(user): diff --git a/settings.py b/settings.py index 9c3a07e613..a3e7e502f5 100644 --- a/settings.py +++ b/settings.py @@ -376,7 +376,6 @@ ######################################################### # Social auth specific settings ######################################################### -SOCIAL_AUTH_JSONFIELD_ENABLED = True SOCIAL_AUTH_GOOGLE_OAUTH2_IGNORE_DEFAULT_SCOPE = True SOCIAL_AUTH_GOOGLE_OAUTH2_SCOPE = [ 'https://www.googleapis.com/auth/userinfo.profile', From bf6efba817f0edc193f08afef1fa9b5ab28008c1 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Mon, 1 Jul 2024 14:53:17 -0400 Subject: [PATCH 406/736] try pinning sqlparse --- requirements.in | 1 + requirements.txt | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/requirements.in b/requirements.in index 3ba3fe97b2..5fda7b8d07 100644 --- a/requirements.in +++ b/requirements.in @@ -8,6 +8,7 @@ django-cors-headers # allows CORS requests for client-side develop django-storages[google]==1.14.3 # alternative GCS storage backend for the django media_root social-auth-app-django # the package for Django to authenticate users with social medieas social-auth-core # the Python social authentication package. Required by social-auth-app-django +sqlparse==0.5.0 # required by Django, pinned to version without vulnerabilities elasticsearch==7.9.1 # elasticsearch client elasticsearch-dsl==7.2.1 # elasticsearch query utilities gunicorn # web server diff --git a/requirements.txt b/requirements.txt index 5066b63a47..1c3db4627c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -166,7 +166,9 @@ social-auth-core==4.3.0 soupsieve==2.5 # via beautifulsoup4 sqlparse==0.5.0 - # via django + # via + # -r requirements.in + # django swapper==1.3.0 # via django-notifications-hq tenacity==8.3.0 From ae5fa255ecfc91c4abe017d56664b710f35a9b8e Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Mon, 1 Jul 2024 14:54:52 -0400 Subject: [PATCH 407/736] revert --- requirements.in | 1 - requirements.txt | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/requirements.in b/requirements.in index 5fda7b8d07..3ba3fe97b2 100644 --- a/requirements.in +++ b/requirements.in @@ -8,7 +8,6 @@ django-cors-headers # allows CORS requests for client-side develop django-storages[google]==1.14.3 # alternative GCS storage backend for the django media_root social-auth-app-django # the package for Django to authenticate users with social medieas social-auth-core # the Python social authentication package. Required by social-auth-app-django -sqlparse==0.5.0 # required by Django, pinned to version without vulnerabilities elasticsearch==7.9.1 # elasticsearch client elasticsearch-dsl==7.2.1 # elasticsearch query utilities gunicorn # web server diff --git a/requirements.txt b/requirements.txt index 1c3db4627c..5066b63a47 100644 --- a/requirements.txt +++ b/requirements.txt @@ -166,9 +166,7 @@ social-auth-core==4.3.0 soupsieve==2.5 # via beautifulsoup4 sqlparse==0.5.0 - # via - # -r requirements.in - # django + # via django swapper==1.3.0 # via django-notifications-hq tenacity==8.3.0 From 0c6df993109ad9f38fef5a680c8a52eb6fc16517 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 1 Jul 2024 15:47:04 -0400 Subject: [PATCH 408/736] add publication summary to discovery notes --- seqr/views/apis/report_api_tests.py | 39 ++++++++++++++++++++--- seqr/views/apis/summary_data_api_tests.py | 6 ++-- seqr/views/utils/anvil_metadata_utils.py | 12 ++++--- 3 files changed, 46 insertions(+), 11 deletions(-) diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index c91bb4e3cf..5b074c0be9 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -504,7 +504,7 @@ 'hgvsc': '', 'hgvsp': '', 'method_of_discovery': 'SR-ES', - 'notes': None, + 'notes': '', 'phenotype_contribution': 'Full', 'partial_contribution_explained': '', 'seqr_chosen_consequence': None, @@ -603,7 +603,7 @@ 'Broad_NA19675_1_21_3343353', 'Broad_NA19675_1', '', 'SNV/INDEL', 'GRCh37', '21', '3343353', 'GAGA', 'G', '', 'RP11', 'ENST00000258436.5', 'c.375_377delTCT', 'p.Leu126del', 'Heterozygous', '', 'de novo', '', '', 'Candidate', 'Myasthenic syndrome, congenital, 8, with pre- and postsynaptic defects', 'OMIM:615120', 'Autosomal recessive|X-linked', - 'Full', '', '', 'SR-ES', '', '', '', '', '', '', '', + 'Full', '', '', 'SR-ES', 'This individual is published in PMID34415322', '', '', '', '', '', '', ], [ 'Broad_HG00731_1_248367227', 'Broad_HG00731', 'Broad_exome_VCGS_FAM203_621_D2', 'SNV/INDEL', 'GRCh37', '1', '248367227', 'TC', 'T', 'CA1501729', 'RP11', '', '', '', 'Homozygous', '', 'paternal', '', '', 'Known', '', @@ -746,7 +746,7 @@ def test_anvil_export(self, mock_google_authenticated, mock_zip): self.assertIn([ '21_3343353_NA19675_1', 'NA19675_1', 'NA19675', 'RP11', 'Candidate', 'de novo', 'Heterozygous', 'GRCh37', '21', '3343353', 'GAGA', 'G', 'c.375_377delTCT', 'p.Leu126del', 'ENST00000258436.5', - '-', '-', '-', '-'], discovery_file) + '-', '-', '-', 'This individual is published in PMID34415322'], discovery_file) self.assertIn([ '19_1912633_HG00731', 'HG00731', 'HG00731', 'OR4G11P', 'Known', 'unknown', 'Heterozygous', 'GRCh38', '19', '1912633', 'G', 'T', '-', '-', 'ENST00000371839', '-', '-', '-', @@ -906,7 +906,8 @@ def test_gregor_export(self, mock_subprocess, mock_temp_dir, mock_open, mock_dat 'Broad_NA19675_1_21_3343353', 'Broad_NA19675_1', '', 'SNV/INDEL', 'GRCh37', '21', '3343353', 'GAGA', 'G', '', 'RP11', 'ENST00000258436.5', 'c.375_377delTCT', 'p.Leu126del', 'Heterozygous', '', 'de novo', '', '', 'Candidate', 'Myasthenic syndrome, congenital, 8, with pre- and postsynaptic defects', 'OMIM:615120', - 'Autosomal recessive|X-linked', 'Full', '', '', 'SR-ES', '', '', '', '', '', '', '', + 'Autosomal recessive|X-linked', 'Full', '', '', 'SR-ES', 'This individual is published in PMID34415322', + '', '', '', '', '', '', ], [ 'Broad_HG00731_1_248367227', 'Broad_HG00731', 'Broad_exome_VCGS_FAM203_621_D2', 'SNV/INDEL', 'GRCh37', '1', '248367227', 'TC', 'T', 'CA1501729', 'RP11', '', '', '', 'Homozygous', '', 'paternal', '', '', 'Known', '', @@ -1259,6 +1260,36 @@ def test_variant_metadata(self): self.assertListEqual(list(response_json.keys()), ['rows']) row_ids = ['NA19675_1_21_3343353', 'HG00731_1_248367227', 'HG00731_19_1912634', 'HG00731_19_1912633', 'HG00731_19_1912632'] self.assertListEqual([r['genetic_findings_id'] for r in response_json['rows']], row_ids) + self.assertDictEqual(response_json['rows'][0], { + **BASE_VARIANT_METADATA_ROW, + 'alt': 'G', + 'chrom': '21', + 'clinvar': {'alleleId': None, 'clinicalSignificance': '', 'goldStars': None, 'variationId': None}, + 'condition_id': 'OMIM:615120', + 'condition_inheritance': 'Autosomal recessive|X-linked', + 'displayName': '1', + 'familyGuid': 'F000001_1', + 'family_id': '1', + 'gene_of_interest': 'RP11', + 'gene_id': 'ENSG00000135953', + 'gene_known_for_phenotype': 'Candidate', + 'genetic_findings_id': 'NA19675_1_21_3343353', + 'hgvsc': 'c.375_377delTCT', + 'hgvsp': 'p.Leu126del', + 'known_condition_name': 'Myasthenic syndrome, congenital, 8, with pre- and postsynaptic defects', + 'MME': True, + 'notes': 'This individual is published in PMID34415322', + 'participant_id': 'NA19675_1', + 'pos': 3343353, + 'projectGuid': 'R0001_1kg', + 'ref': 'GAGA', + 'seqr_chosen_consequence': 'inframe_deletion', + 'tags': ['Tier 1 - Novel gene and phenotype'], + 'transcript': 'ENST00000258436.5', + 'variant_inheritance': 'de novo', + 'variant_reference_assembly': 'GRCh37', + 'zygosity': 'Heterozygous', + }) expected_row = { **BASE_VARIANT_METADATA_ROW, 'additional_family_members_with_variant': 'HG00732', diff --git a/seqr/views/apis/summary_data_api_tests.py b/seqr/views/apis/summary_data_api_tests.py index 835c9ac946..350e218dd6 100644 --- a/seqr/views/apis/summary_data_api_tests.py +++ b/seqr/views/apis/summary_data_api_tests.py @@ -98,8 +98,8 @@ 'svType-1': None, 'sv_name-1': None, 'end-1': None, - 'notes-1': None, - 'notes-2': None, + 'notes-1': '', + 'notes-2': '', 'phenotype_contribution-1': 'Partial', 'phenotype_contribution-2': 'Full', 'partial_contribution_explained-1': 'HP:0000501|HP:0000365', @@ -161,7 +161,7 @@ 'gene_id-1': None, 'hgvsc-1': '', 'hgvsp-1': '', - 'notes-1': None, + 'notes-1': '', 'seqr_chosen_consequence-1': None, 'svType-1': None, 'sv_name-1': None, diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index 2bb27b54f4..2f47aca5d0 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -252,7 +252,7 @@ def parse_anvil_metadata( if proband_only_variants and individual.proband_relationship != Individual.SELF_RELATIONSHIP: continue discovery_row = _get_genetic_findings_rows( - saved_variants, individual, participant_id=participant_id, + saved_variants, individual, subject_family_row, participant_id=participant_id, format_id=format_id, omit_parent_mnvs=omit_parent_mnvs, individual_data_types=(individual_data_types or {}).get(participant_id), family_individuals=family_individuals if proband_only_variants else None, @@ -467,7 +467,7 @@ def _get_sample_row(sample, participant_id, has_dbgap_submission, airtable_metad return sample_row -def _get_genetic_findings_rows(rows: list[dict], individual: Individual, participant_id: str, +def _get_genetic_findings_rows(rows: list[dict], individual: Individual, family_row: dict, participant_id: str, individual_data_types: Iterable[str], family_individuals: dict[str, str], post_process_variant: Callable[[dict, list[dict]], dict], format_id: Callable[[str], str], omit_parent_mnvs: bool, sample: Sample) -> list[dict]: @@ -510,13 +510,17 @@ def _get_genetic_findings_rows(rows: list[dict], individual: Individual, partici del row['genotypes'] gene_variants = variants_by_gene[row[GENE_COLUMN]] - discovery_notes = None + notes = [] if len(gene_variants) > 2: discovery_notes = _get_discovery_notes(row, gene_variants, omit_parent_mnvs) if discovery_notes is None: to_remove.append(row) continue - row['notes'] = discovery_notes + else: + notes.append(discovery_notes) + if family_row['pmid_id']: + notes.append(f'This individual is published in PMID{family_row["pmid_id"]}') + row['notes'] = '. '.join(notes) if post_process_variant: row.update(post_process_variant(row, gene_variants)) From d6546949db1dc342360e8fd7a747ca2da60ff401 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Tue, 2 Jul 2024 12:25:43 -0400 Subject: [PATCH 409/736] add seqr notification to rna data loading --- seqr/views/apis/data_manager_api_tests.py | 39 ++++++++++++++--------- seqr/views/utils/dataset_utils.py | 21 +++++++++--- 2 files changed, 41 insertions(+), 19 deletions(-) diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index f2d53ef5c3..34ffec4b53 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -16,7 +16,6 @@ from seqr.models import Individual, RnaSeqOutlier, RnaSeqTpm, RnaSeqSpliceOutlier, Sample, Project, PhenotypePrioritization from settings import SEQR_SLACK_LOADING_NOTIFICATION_CHANNEL -SEQR_URL = 'https://seqr.broadinstitute.org/' PROJECT_GUID = 'R0001_1kg' ES_CAT_ALLOCATION=[{ @@ -873,6 +872,7 @@ def test_update_rna_splice_outlier(self, *args, **kwargs): @mock.patch('seqr.views.utils.dataset_utils.BASE_URL', 'https://test-seqr.org/') @mock.patch('seqr.views.utils.dataset_utils.SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL', 'seqr-data-loading') @mock.patch('seqr.views.utils.file_utils.tempfile.gettempdir', lambda: 'tmp/') + @mock.patch('seqr.utils.communication_utils.send_html_email') @mock.patch('seqr.views.utils.dataset_utils.safe_post_to_slack') @mock.patch('seqr.views.apis.data_manager_api.datetime') @mock.patch('seqr.views.apis.data_manager_api.os.mkdir') @@ -881,7 +881,7 @@ def test_update_rna_splice_outlier(self, *args, **kwargs): @mock.patch('seqr.utils.file_utils.subprocess.Popen') @mock.patch('seqr.views.apis.data_manager_api.gzip.open') def _test_update_rna_seq(self, data_type, mock_open, mock_subprocess, mock_load_uploaded_file, - mock_rename, mock_mkdir, mock_datetime, mock_send_slack): + mock_rename, mock_mkdir, mock_datetime, mock_send_slack, mock_send_email): url = reverse(update_rna_seq) self.check_pm_login(url) @@ -966,6 +966,7 @@ def _set_file_iter_stdout(rows): self._has_expected_file_loading_logs('gs://rna_data/muscle_samples.tsv.gz', info=info, warnings=warnings, user=self.pm_user) self.assertEqual(model_cls.objects.count(), params['initial_model_count']) mock_send_slack.assert_not_called() + mock_send_email.assert_not_called() self.assertEqual(mock_subprocess.call_count, 2) mock_subprocess.assert_has_calls([mock.call(command, stdout=-1, stderr=-2, shell=True) for command in [ # nosec f'gsutil ls {body["file"]}', @@ -1031,13 +1032,21 @@ def _test_basic_data_loading(data, num_parsed_samples, num_loaded_samples, new_s mock_send_slack.assert_has_calls([ mock.call( 'seqr-data-loading', - f'0 new RNA {params["message_data_type"]} samples are loaded in \n``````', + f'0 new RNA {params["message_data_type"]} samples are loaded in 1kg project nåme with uniçøde\n``````', ), mock.call( 'seqr-data-loading', - f'1 new RNA {params["message_data_type"]} samples are loaded in \n```NA20888```', + f'1 new RNA {params["message_data_type"]} samples are loaded in Test Reprocessed Project\n```NA20888```', ), ]) + self.assertEqual(mock_send_email.call_count, 2) + self._assert_expected_notifications(mock_send_email, [ + {'data_type': f'RNA {params["message_data_type"]}', 'user': self.data_manager_user, + 'email_body': f'data for 0 new RNA {params["message_data_type"]} sample(s)'}, + {'data_type': f'RNA {params["message_data_type"]}', 'user': self.data_manager_user, + 'email_body': f'data for 1 new RNA {params["message_data_type"]} sample(s)', + 'project_guid': 'R0003_test', 'project_name': 'Test Reprocessed Project'} + ]) # test database models are correct self.assertEqual(model_cls.objects.count(), params['initial_model_count'] - deleted_count) @@ -1187,7 +1196,7 @@ def test_load_rna_seq_sample_data(self): def _join_data(cls, data): return ['\t'.join(line).encode('utf-8') for line in data] - @mock.patch('seqr.views.apis.data_manager_api.BASE_URL', SEQR_URL) + @mock.patch('seqr.views.apis.data_manager_api.BASE_URL', 'https://test-seqr.org/') @mock.patch('seqr.models.random') @mock.patch('seqr.utils.communication_utils.send_html_email') @mock.patch('seqr.utils.file_utils.subprocess.Popen') @@ -1270,8 +1279,8 @@ def test_load_phenotype_prioritization_data(self, mock_subprocess, mock_send_ema self.assertListEqual(saved_data, EXPECTED_LIRICAL_DATA) mock_subprocess.assert_called_with('gsutil cat gs://seqr_data/lirical_data.tsv.gz | gunzip -c -q - ', stdout=-1, stderr=-2, shell=True) # nosec self._assert_expected_notifications(mock_send_email, [ - {'tool': 'lirical', 'num_samples': 1, 'user': self.data_manager_user}, - {'tool': 'lirical', 'num_samples': 1, 'user': self.data_manager_user, + {'data_type': 'Lirical', 'user': self.data_manager_user, 'email_body': 'Lirical data for 1 sample(s)'}, + {'data_type': 'Lirical', 'user': self.data_manager_user, 'email_body': 'Lirical data for 1 sample(s)', 'project_guid': 'R0003_test', 'project_name': 'Test Reprocessed Project'} ]) @@ -1301,7 +1310,7 @@ def test_load_phenotype_prioritization_data(self, mock_subprocess, mock_send_ema nested_fields=[{'fields': ('individual', 'guid'), 'key': 'individualGuid'}]) self.assertListEqual(saved_data, EXPECTED_UPDATED_LIRICAL_DATA) self._assert_expected_notifications(mock_send_email, [ - {'tool': 'lirical', 'num_samples': 2, 'user': self.data_manager_user}, + {'data_type': 'Lirical', 'user': self.data_manager_user, 'email_body': 'Lirical data for 2 sample(s)'}, ]) @staticmethod @@ -1310,16 +1319,16 @@ def _assert_expected_notifications(mock_send_email, expected_notifs: list[dict]) for notif_dict in expected_notifs: project_guid = notif_dict.get('project_guid', PROJECT_GUID) project_name = notif_dict.get('project_name', '1kg project nåme with uniçøde') - url = f'{SEQR_URL}project/{project_guid}/project_page' + url = f'https://test-seqr.org/project/{project_guid}/project_page' project_link = f'{project_name}' - email = ( - f'This is to notify you that {notif_dict["tool"].title()} data for {notif_dict["num_samples"]} sample(s) ' - f'has been loaded in seqr project {project_link}' + expected_email_body = ( + f'Dear seqr user,\n\nThis is to notify you that {notif_dict["email_body"]} ' + f'has been loaded in seqr project {project_link}\n\nAll the best,\nThe seqr team' ) calls.append( mock.call( - email_body=f'Dear seqr user,\n\n{email}\n\nAll the best,\nThe seqr team', - subject=f'New {notif_dict["tool"].title()} data available in seqr', + email_body=expected_email_body, + subject=f'New {notif_dict["data_type"]} data available in seqr', to=['test_user_manager@test.com'], process_message=_set_bulk_notification_stream, ) diff --git a/seqr/views/utils/dataset_utils.py b/seqr/views/utils/dataset_utils.py index 4fa33c2048..f49eb1f086 100644 --- a/seqr/views/utils/dataset_utils.py +++ b/seqr/views/utils/dataset_utils.py @@ -5,7 +5,7 @@ from tqdm import tqdm from seqr.models import Sample, Individual, Family, Project, RnaSeqOutlier, RnaSeqTpm, RnaSeqSpliceOutlier -from seqr.utils.communication_utils import safe_post_to_slack +from seqr.utils.communication_utils import safe_post_to_slack, send_project_notification from seqr.utils.file_utils import file_iter from seqr.utils.logging_utils import SeqrLogger from seqr.utils.middleware import ErrorsWarningsException @@ -519,7 +519,7 @@ def match_sample(sample_key, unmatched_samples, sample_id_to_individual_id_mappi info.append(message) logger.info(message, user) - _notify_rna_loading(model_cls, sample_projects) + _notify_rna_loading(model_cls, sample_projects, projects) for warning in warnings: logger.warning(warning, user) @@ -564,15 +564,28 @@ def post_process_rna_data(sample_guid, data, get_unique_key=None, format_fields= RnaSeqTpm: 'Expression', } -def _notify_rna_loading(model_cls, sample_projects): + +def _notify_rna_loading(model_cls, sample_projects, internal_projects): + projects_by_name = {project.name: project for project in internal_projects} data_type = RNA_MODEL_DISPLAY_NAME[model_cls] for project_agg in sample_projects: new_ids = project_agg["new_sample_ids"] - project_link = f'<{BASE_URL}project/{project_agg["guid"]}/project_page|{project_agg["name"]}>' + url = f'{BASE_URL}project/{project_agg["guid"]}/project_page' + project_link = f'{project_agg["name"]}' safe_post_to_slack( SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL, f'{len(new_ids)} new RNA {data_type} samples are loaded in {project_link}\n```{", ".join(new_ids)}```' ) + email = ( + f'This is to notify you that data for {len(new_ids)} new RNA {data_type} sample(s) ' + f'has been loaded in seqr project {project_link}' + ) + send_project_notification( + project=projects_by_name[project_agg["name"]], + notification=f'Loaded {len(new_ids)} new RNA {data_type} sample(s)', + email=email, + subject=f'New RNA {data_type} data available in seqr', + ) PHENOTYPE_PRIORITIZATION_HEADER = ['tool', 'project', 'sampleId', 'rank', 'geneId', 'diseaseId', 'diseaseName'] From 32245d984d03a246c37684969b3e2e4dd153b395 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Tue, 2 Jul 2024 12:46:50 -0400 Subject: [PATCH 410/736] be more lax with requirements --- requirements-dev.in | 10 +++++----- requirements-dev.txt | 2 +- requirements.in | 4 ++-- requirements.txt | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/requirements-dev.in b/requirements-dev.in index 7d689c6013..f85a139ed5 100644 --- a/requirements-dev.in +++ b/requirements-dev.in @@ -1,8 +1,8 @@ -c requirements.txt # use the generated reqs as a constraint coverage<5.2 django-compressor -django-debug-toolbar==4.4.2 # https://github.com/jazzband/django-debug-toolbar -mock # mock objects for unit tests -pip-tools # tool for managing our python dependency tree -responses # mock HTTP responses for unit tests -urllib3-mock # mock urllib3 for tests +django-debug-toolbar # https://github.com/jazzband/django-debug-toolbar +mock # mock objects for unit tests +pip-tools # tool for managing our python dependency tree +responses # mock HTTP responses for unit tests +urllib3-mock # mock urllib3 for tests diff --git a/requirements-dev.txt b/requirements-dev.txt index a1399355e1..5ef5890f4a 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -31,7 +31,7 @@ django-appconf==1.0.5 # via django-compressor django-compressor==4.3.1 # via -r requirements-dev.in -django-debug-toolbar==4.4.2 +django-debug-toolbar==3.2.4 # via -r requirements-dev.in idna==3.7 # via diff --git a/requirements.in b/requirements.in index 3ba3fe97b2..e38886e965 100644 --- a/requirements.in +++ b/requirements.in @@ -1,11 +1,11 @@ -Django==4.2.13 # core server-side framework +Django>=4.2,<4.3 # core server-side framework django-anymail # for sending emails using cloud-based mail service providers django-csp # for setting CSP headers django-guardian # object-level permissions for database records. Behind a major version due to missing Python 2 support django-hijack # allows admins to login as other user django-notifications-hq # notification app django-cors-headers # allows CORS requests for client-side development -django-storages[google]==1.14.3 # alternative GCS storage backend for the django media_root +django-storages[google] # alternative GCS storage backend for the django media_root social-auth-app-django # the package for Django to authenticate users with social medieas social-auth-core # the Python social authentication package. Required by social-auth-app-django elasticsearch==7.9.1 # elasticsearch client diff --git a/requirements.txt b/requirements.txt index 5066b63a47..f099f1b249 100644 --- a/requirements.txt +++ b/requirements.txt @@ -52,7 +52,7 @@ django-model-utils==4.3.1 # via django-notifications-hq django-notifications-hq==1.8.3 # via -r requirements.in -django-storages[google]==1.14.3 +django-storages[google]==1.11.1 # via -r requirements.in elasticsearch==7.9.1 # via From b4777d4bc9cd575e00134ae8d338e200a90a8230 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Tue, 2 Jul 2024 13:57:36 -0400 Subject: [PATCH 411/736] be explicit abuot storages --- settings.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/settings.py b/settings.py index bec2499f88..0679ec0cb6 100644 --- a/settings.py +++ b/settings.py @@ -136,16 +136,16 @@ 'django.contrib.staticfiles.finders.FileSystemFinder', 'django.contrib.staticfiles.finders.AppDirectoriesFinder', ) +STORAGES = { + 'default': {'BACKEND': 'django.core.files.storage.FileSystemStorage'}, + 'staticfiles': {'BACKEND': 'django.contrib.staticfiles.storage.StaticFilesStorage'} +} # If specified, store data in the named GCS bucket and use the gcloud storage backend. # Else, fall back to a path on the local filesystem. GCS_MEDIA_ROOT_BUCKET = os.environ.get('GCS_MEDIA_ROOT_BUCKET') if GCS_MEDIA_ROOT_BUCKET: - STORAGES = { - "default": { - "BACKEND": "storages.backends.gcloud.GoogleCloudStorage", - } - } + STORAGES['default'] = {'BACKEND': 'storages.backends.gcloud.GoogleCloudStorage'} GS_BUCKET_NAME = GCS_MEDIA_ROOT_BUCKET GS_DEFAULT_ACL = 'publicRead' MEDIA_ROOT = False From 813a379878950d0f4ac0f6dae845a68033e366ae Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Tue, 2 Jul 2024 14:34:26 -0400 Subject: [PATCH 412/736] bump social auth and fix fixtures --- requirements.in | 2 +- requirements.txt | 7 ++--- seqr/fixtures/social_auth.json | 42 ++++++++++++++++++++++++----- seqr/views/utils/terra_api_utils.py | 10 +------ settings.py | 1 + 5 files changed, 43 insertions(+), 19 deletions(-) diff --git a/requirements.in b/requirements.in index e38886e965..b40fcdafc4 100644 --- a/requirements.in +++ b/requirements.in @@ -6,7 +6,7 @@ django-hijack # allows admins to login as other user django-notifications-hq # notification app django-cors-headers # allows CORS requests for client-side development django-storages[google] # alternative GCS storage backend for the django media_root -social-auth-app-django # the package for Django to authenticate users with social medieas +social-auth-app-django>5.0.0 # the package for Django to authenticate users with social medieas social-auth-core # the Python social authentication package. Required by social-auth-app-django elasticsearch==7.9.1 # elasticsearch client elasticsearch-dsl==7.2.1 # elasticsearch query utilities diff --git a/requirements.txt b/requirements.txt index f099f1b249..4abc425ee9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -38,6 +38,7 @@ django==4.2.13 # django-notifications-hq # django-storages # jsonfield + # social-auth-app-django django-anymail==9.0 # via -r requirements.in django-cors-headers==3.13.0 @@ -117,7 +118,7 @@ pyasn1-modules==0.2.8 # via google-auth pycparser==2.21 # via cffi -pyjwt==2.6.0 +pyjwt==2.8.0 # via social-auth-core pyliftover==0.4 # via -r requirements.in @@ -157,9 +158,9 @@ slacker==0.14.0 # via -r requirements.in slugify==0.0.1 # via -r requirements.in -social-auth-app-django==5.0.0 +social-auth-app-django==5.4.1 # via -r requirements.in -social-auth-core==4.3.0 +social-auth-core==4.5.4 # via # -r requirements.in # social-auth-app-django diff --git a/seqr/fixtures/social_auth.json b/seqr/fixtures/social_auth.json index 7d951c13ef..e092f257fa 100644 --- a/seqr/fixtures/social_auth.json +++ b/seqr/fixtures/social_auth.json @@ -6,7 +6,12 @@ "user": 10, "provider": "google-oauth2", "uid": "test_user@broadinstitute.org", - "extra_data": "{\"expires\": 3599, \"auth_time\": 1603287741, \"token_type\": \"Bearer\", \"access_token\": \"ya29.EXAMPLE\"}", + "extra_data": { + "expires": 3599, + "auth_time": 1603287741, + "token_type": "Bearer", + "access_token": "ya29.EXAMPLE" + }, "created": "2020-03-12T23:09:54.180Z", "modified": "2020-03-12T23:09:54.180Z" } @@ -17,7 +22,12 @@ "user": 11, "provider": "google-oauth2", "uid": "test_user_manager@test.com", - "extra_data": "{\"expires\": 3599, \"auth_time\": 1603287741, \"token_type\": \"Bearer\", \"access_token\": \"ya29.EXAMPLE\"}", + "extra_data": { + "expires": 3599, + "auth_time": 1603287741, + "token_type": "Bearer", + "access_token": "ya29.EXAMPLE" + }, "created": "2020-03-12T23:09:54.180Z", "modified": "2020-03-12T23:09:54.180Z" } @@ -28,7 +38,12 @@ "user": 12, "provider": "google-oauth2", "uid": "test_user_no_staff@test.com", - "extra_data": "{\"expires\": 3599, \"auth_time\": 1603287741, \"token_type\": \"Bearer\", \"access_token\": \"ya29.EXAMPLE\"}", + "extra_data": { + "expires": 6666, + "auth_time": 1603287741, + "token_type": "Bearer", + "access_token": "ya29.EXAMPLE" + }, "created": "2020-03-12T23:09:54.180Z", "modified": "2020-03-12T23:09:54.180Z" } @@ -39,7 +54,12 @@ "user": 13, "provider": "google-oauth2", "uid": "test_user_no_access@test.com", - "extra_data": "{\"expires\": 3599, \"auth_time\": 1603287741, \"token_type\": \"Bearer\", \"access_token\": \"ya29.EXAMPLE\"}", + "extra_data": { + "expires": 3599, + "auth_time": 1603287741, + "token_type": "Bearer", + "access_token": "ya29.EXAMPLE" + }, "created": "2020-03-12T23:09:54.180Z", "modified": "2020-03-12T23:09:54.180Z" } @@ -50,7 +70,12 @@ "user": 17, "provider": "google-oauth2", "uid": "test_pm_user@test.com", - "extra_data": "{\"expires\": 3599, \"auth_time\": 1603287741, \"token_type\": \"Bearer\", \"access_token\": \"ya29.EXAMPLE\"}", + "extra_data": { + "expires": 3599, + "auth_time": 1603287741, + "token_type": "Bearer", + "access_token": "ya29.EXAMPLE" + }, "created": "2020-03-12T23:09:54.180Z", "modified": "2020-03-12T23:09:54.180Z" } @@ -61,7 +86,12 @@ "user": 15, "provider": "google-oauth2", "uid": "test_superuser@test.com", - "extra_data": "{\"expires\": 3599, \"auth_time\": 1603287741, \"token_type\": \"Bearer\", \"access_token\": \"ya29.EXAMPLE\"}", + "extra_data": { + "expires": 3599, + "auth_time": 1603287741, + "token_type": "Bearer", + "access_token": "ya29.EXAMPLE" + }, "created": "2020-03-12T23:09:54.180Z", "modified": "2020-03-12T23:09:54.180Z" } diff --git a/seqr/views/utils/terra_api_utils.py b/seqr/views/utils/terra_api_utils.py index 314bbc4fec..9ec6427254 100644 --- a/seqr/views/utils/terra_api_utils.py +++ b/seqr/views/utils/terra_api_utils.py @@ -102,15 +102,7 @@ def _get_call_args(path, headers=None, root_url=None): def _safe_get_social(user): if not google_auth_enabled() or not hasattr(user, 'social_auth'): return None - - social_auth = user.social_auth.filter(provider=SOCIAL_AUTH_PROVIDER) - if not social_auth: - return None - - social = social_auth.first() - if isinstance(social.extra_data, str): # JSONField extra_data is returned as a string - social.extra_data = json.loads(social.extra_data) - return social + return user.social_auth.filter(provider=SOCIAL_AUTH_PROVIDER).first() def _get_social_access_token(user): diff --git a/settings.py b/settings.py index 0679ec0cb6..a0b1fc001d 100644 --- a/settings.py +++ b/settings.py @@ -378,6 +378,7 @@ ######################################################### # Social auth specific settings ######################################################### +SOCIAL_AUTH_JSONFIELD_ENABLED = True SOCIAL_AUTH_GOOGLE_OAUTH2_IGNORE_DEFAULT_SCOPE = True SOCIAL_AUTH_GOOGLE_OAUTH2_SCOPE = [ 'https://www.googleapis.com/auth/userinfo.profile', From 27698f536b5c80e75b674cc06488d91beff95b67 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Wed, 3 Jul 2024 11:39:38 -0400 Subject: [PATCH 413/736] lint docker compose (#4213) --- .github/workflows/docker-lint.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/docker-lint.yaml b/.github/workflows/docker-lint.yaml index ab1399ba69..a0b1e5de9f 100644 --- a/.github/workflows/docker-lint.yaml +++ b/.github/workflows/docker-lint.yaml @@ -11,6 +11,7 @@ on: - deploy/docker/seqr/Dockerfile - hail_search/deploy/Dockerfile - .hadolint.yaml + - .docker-compose.yaml - .github/workflows/docker-lint.yaml pull_request: types: [opened, synchronize, reopened] @@ -21,13 +22,16 @@ on: - deploy/docker/seqr/Dockerfile - hail_search/deploy/Dockerfile - .hadolint.yaml + - .docker-compose.yaml - .github/workflows/docker-lint.yaml jobs: hadolint: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v2 + - name: Validate docker compose + run: docker-compose -f docker-compose.yml config - uses: hadolint/hadolint-action@v1.5.0 with: dockerfile: deploy/docker/seqr/Dockerfile From d55aa3b859f7916716f94a97868f9a76b8d7d7b0 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 3 Jul 2024 14:44:54 -0400 Subject: [PATCH 414/736] update display for extended splice region --- .../components/panel/variants/Annotations.jsx | 10 ++++++++-- .../components/panel/variants/Transcripts.jsx | 14 ++++---------- ui/shared/utils/constants.js | 4 +++- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/ui/shared/components/panel/variants/Annotations.jsx b/ui/shared/components/panel/variants/Annotations.jsx index 5581233aca..8ce120aca1 100644 --- a/ui/shared/components/panel/variants/Annotations.jsx +++ b/ui/shared/components/panel/variants/Annotations.jsx @@ -22,7 +22,7 @@ import Modal from '../../modal/Modal' import { ButtonLink, HelpIcon } from '../../StyledComponents' import RnaSeqJunctionOutliersTable from '../../table/RnaSeqJunctionOutliersTable' import { getOtherGeneNames } from '../genes/GeneDetail' -import Transcripts, { ConsequenceDetails, ExtendedSpliceLabel, isManeSelect } from './Transcripts' +import Transcripts, { ConsequenceDetails, isManeSelect } from './Transcripts' import VariantGenes, { GeneLabelContent, omimPhenotypesDetail } from './VariantGene' import { getLocus, @@ -35,6 +35,7 @@ import { } from './VariantUtils' import { GENOME_VERSION_37, GENOME_VERSION_38, getVariantMainTranscript, SVTYPE_LOOKUP, SVTYPE_DETAILS, SCREEN_LABELS, + EXTENDED_INTRONIC_DESCRIPTION, } from '../../../utils/constants' import { camelcaseToTitlecase } from '../../../utils/stringUtils' @@ -639,7 +640,12 @@ const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcripts )} - + {mainTranscript.spliceregion?.extended_intronic_splice_region_variant && ( +
+ Extended Intronic Splice Region + } content={EXTENDED_INTRONIC_DESCRIPTION} /> +
+ )} {mainTranscript.utrannotator?.fiveutrConsequence && (
UTRAnnotator:   diff --git a/ui/shared/components/panel/variants/Transcripts.jsx b/ui/shared/components/panel/variants/Transcripts.jsx index 0feee268c7..f0e894e127 100644 --- a/ui/shared/components/panel/variants/Transcripts.jsx +++ b/ui/shared/components/panel/variants/Transcripts.jsx @@ -156,19 +156,13 @@ const transcriptIdDetails = (transcript, variant, { transcriptsById, project, up
) -export const ExtendedSpliceLabel = ({ spliceregion }) => (spliceregion?.extended_intronic_splice_region_variant ? ( -
), }, + }, { + [ENGLISH]: { + header: 'Q. Which browsers are supported for seqr?', + content: `seqr is only supported in Google Chrome. While it may sometimes function in other browsers, to ensure + reliable behavior you should only use seqr in Chrome`, + }, + [SPANISH]: { + header: 'P: ¿Cuáles navegadores son compatibles con seqr?', + content: `seqr solamente es compatible con Google Chrome. Aunque a veces puede funcionar en otros navegadores, + para garantizar un funcionamiento fiable sólo debe usar seqr en Chrome.`, + }, }, { [ENGLISH]: { header: 'Q. How can I set up seqr locally?', From 0c96bc36400ee7fab6f15f80d4fef9bfb4734a14 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 31 Jul 2024 11:52:03 -0400 Subject: [PATCH 535/736] shared saved variant model util --- .../check_for_new_samples_from_pipeline.py | 10 +++++----- seqr/views/utils/variant_utils.py | 17 ++++++++++++++--- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/seqr/management/commands/check_for_new_samples_from_pipeline.py b/seqr/management/commands/check_for_new_samples_from_pipeline.py index c530eef6b6..bc68c142ba 100644 --- a/seqr/management/commands/check_for_new_samples_from_pipeline.py +++ b/seqr/management/commands/check_for_new_samples_from_pipeline.py @@ -15,7 +15,7 @@ from seqr.utils.search.hail_search_utils import hail_variant_multi_lookup, search_data_type from seqr.views.utils.dataset_utils import match_and_update_search_samples from seqr.views.utils.variant_utils import reset_cached_search_results, update_projects_saved_variant_json, \ - saved_variants_dataset_type_filter + get_saved_variants from settings import SEQR_SLACK_LOADING_NOTIFICATION_CHANNEL logger = logging.getLogger(__name__) @@ -153,10 +153,10 @@ def _reload_shared_variant_annotations(data_type, genome_version, updated_varian if is_sv: updated_annotation_samples = updated_annotation_samples.filter(sample_type=data_type.split('_')[1]) - variant_models = SavedVariant.objects.filter( - family_id__in=updated_annotation_samples.values_list('individual__family', flat=True).distinct(), - **saved_variants_dataset_type_filter(dataset_type), - ).filter(Q(saved_variant_json__genomeVersion__isnull=True) | Q(saved_variant_json__genomeVersion=db_genome_version)) + variant_models = get_saved_variants( + family_guids=updated_annotation_samples.values_list('individual__family__guid', flat=True).distinct(), + dataset_type=dataset_type, genome_version=genome_version, + ) if not variant_models: logger.info('No additional saved variants to update') diff --git a/seqr/views/utils/variant_utils.py b/seqr/views/utils/variant_utils.py index 17d1f7e36a..5015c78f3a 100644 --- a/seqr/views/utils/variant_utils.py +++ b/seqr/views/utils/variant_utils.py @@ -66,13 +66,24 @@ def update_projects_saved_variant_json(projects, user_email, **kwargs): return updated_variants_by_id -def update_project_saved_variant_json(project_id, family_guids=None, dataset_type=None, user=None, user_email=None): - saved_variants = SavedVariant.objects.filter(family__project_id=project_id).select_related('family') +def get_saved_variants(project_id=None, family_guids=None, dataset_type=None, genome_version=None): + saved_variants = SavedVariant.objects.all() + if project_id: + saved_variants = saved_variants.filter(family__project_id=project_id) if family_guids: saved_variants = saved_variants.filter(family__guid__in=family_guids) - if dataset_type: saved_variants = saved_variants.filter(**saved_variants_dataset_type_filter(dataset_type)) + if genome_version: + db_genome_version = genome_version.replace('GRCh', '') + saved_variants = saved_variants.filter( + Q(saved_variant_json__genomeVersion__isnull=True) | Q(saved_variant_json__genomeVersion=db_genome_version) + ) + return saved_variants + + +def update_project_saved_variant_json(project_id, family_guids=None, dataset_type=None, user=None, user_email=None): + saved_variants = get_saved_variants(project_id, family_guids, dataset_type).select_related('family') if not saved_variants: return None From 3523d98e666848e0604c60f62ae8427f8d4fdf05 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 31 Jul 2024 12:02:45 -0400 Subject: [PATCH 536/736] only reload variants on project genome build --- .../check_for_new_samples_from_pipeline.py | 4 ++-- .../commands/reload_saved_variant_json.py | 2 +- seqr/views/apis/saved_variant_api.py | 2 +- seqr/views/utils/variant_utils.py | 20 +++++++++---------- 4 files changed, 13 insertions(+), 15 deletions(-) diff --git a/seqr/management/commands/check_for_new_samples_from_pipeline.py b/seqr/management/commands/check_for_new_samples_from_pipeline.py index bc68c142ba..17824d8114 100644 --- a/seqr/management/commands/check_for_new_samples_from_pipeline.py +++ b/seqr/management/commands/check_for_new_samples_from_pipeline.py @@ -108,7 +108,7 @@ def handle(self, *args, **options): ) project_families = project_sample_data['family_guids'] updated_families.update(project_families) - updated_project_families.append((project.id, project.name, project_families)) + updated_project_families.append((project.id, project.name, project.genome_version, project_families)) # Send failure notifications failed_family_samples = metadata.get('failed_family_samples', {}) @@ -154,8 +154,8 @@ def _reload_shared_variant_annotations(data_type, genome_version, updated_varian updated_annotation_samples = updated_annotation_samples.filter(sample_type=data_type.split('_')[1]) variant_models = get_saved_variants( + genome_version, dataset_type=dataset_type, family_guids=updated_annotation_samples.values_list('individual__family__guid', flat=True).distinct(), - dataset_type=dataset_type, genome_version=genome_version, ) if not variant_models: diff --git a/seqr/management/commands/reload_saved_variant_json.py b/seqr/management/commands/reload_saved_variant_json.py index 2e83305eaa..eea208cf32 100644 --- a/seqr/management/commands/reload_saved_variant_json.py +++ b/seqr/management/commands/reload_saved_variant_json.py @@ -27,6 +27,6 @@ def handle(self, *args, **options): logging.info("Processing all %s projects" % len(projects)) family_ids = [family_guid] if family_guid else None - project_list = [(*project, family_ids) for project in projects.values_list('id', 'name')] + project_list = [(*project, family_ids) for project in projects.values_list('id', 'name', 'genome_version')] update_projects_saved_variant_json(project_list, user_email='manage_command') logger.info("Done") diff --git a/seqr/views/apis/saved_variant_api.py b/seqr/views/apis/saved_variant_api.py index 9220d0cbeb..18740ba3b9 100644 --- a/seqr/views/apis/saved_variant_api.py +++ b/seqr/views/apis/saved_variant_api.py @@ -303,7 +303,7 @@ def update_saved_variant_json(request, project_guid): project = get_project_and_check_permissions(project_guid, request.user, can_edit=True) reset_cached_search_results(project) try: - updated_saved_variant_guids = update_project_saved_variant_json(project.id, user=request.user) + updated_saved_variant_guids = update_project_saved_variant_json(project.id, project.genome_version, user=request.user) except Exception as e: logger.error('Unable to reset saved variant json for {}: {}'.format(project_guid, e)) updated_saved_variant_guids = [] diff --git a/seqr/views/utils/variant_utils.py b/seqr/views/utils/variant_utils.py index 5015c78f3a..859f73b9cc 100644 --- a/seqr/views/utils/variant_utils.py +++ b/seqr/views/utils/variant_utils.py @@ -37,10 +37,10 @@ def update_projects_saved_variant_json(projects, user_email, **kwargs): error = {} updated_variants_by_id = {} logger.info(f'Reloading saved variants in {len(projects)} projects') - for project_id, project_name, family_guids in tqdm(projects, unit=' project'): + for project_id, project_name, genome_version, family_guids in tqdm(projects, unit=' project'): try: updated_saved_variants = update_project_saved_variant_json( - project_id, user_email=user_email, family_guids=family_guids, **kwargs) + project_id, genome_version, user_email=user_email, family_guids=family_guids, **kwargs) if updated_saved_variants is None: skipped[project_name] = True else: @@ -66,24 +66,22 @@ def update_projects_saved_variant_json(projects, user_email, **kwargs): return updated_variants_by_id -def get_saved_variants(project_id=None, family_guids=None, dataset_type=None, genome_version=None): - saved_variants = SavedVariant.objects.all() +def get_saved_variants(genome_version, project_id=None, family_guids=None, dataset_type=None): + saved_variants = SavedVariant.objects.filter( + Q(saved_variant_json__genomeVersion__isnull=True) | + Q(saved_variant_json__genomeVersion=genome_version.replace('GRCh', '')) + ) if project_id: saved_variants = saved_variants.filter(family__project_id=project_id) if family_guids: saved_variants = saved_variants.filter(family__guid__in=family_guids) if dataset_type: saved_variants = saved_variants.filter(**saved_variants_dataset_type_filter(dataset_type)) - if genome_version: - db_genome_version = genome_version.replace('GRCh', '') - saved_variants = saved_variants.filter( - Q(saved_variant_json__genomeVersion__isnull=True) | Q(saved_variant_json__genomeVersion=db_genome_version) - ) return saved_variants -def update_project_saved_variant_json(project_id, family_guids=None, dataset_type=None, user=None, user_email=None): - saved_variants = get_saved_variants(project_id, family_guids, dataset_type).select_related('family') +def update_project_saved_variant_json(project_id, genome_version, family_guids=None, dataset_type=None, user=None, user_email=None): + saved_variants = get_saved_variants(genome_version, project_id, family_guids, dataset_type).select_related('family') if not saved_variants: return None From e5e3140954cacf9563e0f675facbf4eaa8186f60 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 31 Jul 2024 13:18:35 -0400 Subject: [PATCH 537/736] update unit tests --- .../check_for_new_samples_from_pipeline_tests.py | 7 ++++--- .../tests/reload_saved_variant_json_tests.py | 14 +++++++------- seqr/views/apis/saved_variant_api_tests.py | 6 +++--- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py b/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py index 193d7dee86..f9f5f83748 100644 --- a/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py +++ b/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py @@ -193,9 +193,10 @@ def test_command(self, mock_email, mock_airtable_utils): # Update fixture data to allow testing edge cases Project.objects.filter(id__in=[1, 3]).update(genome_version=38) - sv = SavedVariant.objects.get(guid='SV0000002_1248367227_r0390_100') - sv.saved_variant_json['genomeVersion'] = '38' - sv.save() + svs = SavedVariant.objects.filter(guid__in=['SV0000002_1248367227_r0390_100', 'SV0000006_1248367227_r0003_tes']) + for sv in svs: + sv.saved_variant_json['genomeVersion'] = '38' + sv.save() with self.assertRaises(ValueError) as ce: call_command('check_for_new_samples_from_pipeline', 'GRCh38/SNV_INDEL', 'auto__2023-08-08') diff --git a/seqr/management/tests/reload_saved_variant_json_tests.py b/seqr/management/tests/reload_saved_variant_json_tests.py index 04175a94fc..4ceb4314b6 100644 --- a/seqr/management/tests/reload_saved_variant_json_tests.py +++ b/seqr/management/tests/reload_saved_variant_json_tests.py @@ -27,12 +27,12 @@ def test_with_param_command(self, mock_get_variants, mock_logger): family_1 = Family.objects.get(id=1) mock_get_variants.assert_called_with( - [family_1], ['1-1562437-G-CA', '1-46859832-G-A','21-3343353-GAGA-G'], user=None, user_email='manage_command') + [family_1], ['1-46859832-G-A','21-3343353-GAGA-G'], user=None, user_email='manage_command') logger_info_calls = [ - mock.call('Updated 3 variants for project 1kg project n\xe5me with uni\xe7\xf8de'), + mock.call('Updated 2 variants for project 1kg project n\xe5me with uni\xe7\xf8de'), mock.call('Reload Summary: '), - mock.call(' 1kg project n\xe5me with uni\xe7\xf8de: Updated 3 variants') + mock.call(' 1kg project n\xe5me with uni\xe7\xf8de: Updated 2 variants') ] mock_logger.info.assert_has_calls(logger_info_calls) mock_get_variants.reset_mock() @@ -45,7 +45,7 @@ def test_with_param_command(self, mock_get_variants, mock_logger): family_2 = Family.objects.get(id=2) mock_get_variants.assert_has_calls([ mock.call( - [family_1, family_2], ['1-1562437-G-CA', '1-248367227-TC-T', '1-46859832-G-A', '21-3343353-GAGA-G'], user=None, user_email='manage_command', + [family_1, family_2], ['1-248367227-TC-T', '1-46859832-G-A', '21-3343353-GAGA-G'], user=None, user_email='manage_command', ), mock.call([Family.objects.get(id=12)], ['1-248367227-TC-T', 'prefix_19107_DEL'], user=None, user_email='manage_command'), mock.call([Family.objects.get(id=14)], ['1-248367227-TC-T'], user=None, user_email='manage_command') @@ -53,11 +53,11 @@ def test_with_param_command(self, mock_get_variants, mock_logger): logger_info_calls = [ mock.call('Reloading saved variants in 4 projects'), - mock.call('Updated 4 variants for project 1kg project n\xe5me with uni\xe7\xf8de'), + mock.call('Updated 3 variants for project 1kg project n\xe5me with uni\xe7\xf8de'), mock.call('Updated 2 variants for project Test Reprocessed Project'), mock.call('Updated 1 variants for project Non-Analyst Project'), mock.call('Reload Summary: '), - mock.call(' 1kg project n\xe5me with uni\xe7\xf8de: Updated 4 variants'), + mock.call(' 1kg project n\xe5me with uni\xe7\xf8de: Updated 3 variants'), mock.call(' Test Reprocessed Project: Updated 2 variants'), mock.call(' Non-Analyst Project: Updated 1 variants'), mock.call('Skipped the following 1 project with no saved variants: Empty Project'), @@ -72,7 +72,7 @@ def test_with_param_command(self, mock_get_variants, mock_logger): PROJECT_GUID, '--family-guid={}'.format(FAMILY_GUID)) - mock_get_variants.assert_called_with([family_1], ['1-1562437-G-CA', '1-46859832-G-A', '21-3343353-GAGA-G'], user=None, user_email='manage_command') + mock_get_variants.assert_called_with([family_1], ['1-46859832-G-A', '21-3343353-GAGA-G'], user=None, user_email='manage_command') logger_info_calls = [ mock.call('Reload Summary: '), diff --git a/seqr/views/apis/saved_variant_api_tests.py b/seqr/views/apis/saved_variant_api_tests.py index 7d13c1afb5..a2a09a9765 100644 --- a/seqr/views/apis/saved_variant_api_tests.py +++ b/seqr/views/apis/saved_variant_api_tests.py @@ -906,7 +906,7 @@ def test_update_compound_hets_variant_functional_data(self): self.assertEqual(response.status_code, 400) self.assertDictEqual(response.json(), {'error': 'Unable to find the following variant(s): not_variant'}) - @mock.patch('seqr.views.utils.variant_utils.MAX_VARIANTS_FETCH', 3) + @mock.patch('seqr.views.utils.variant_utils.MAX_VARIANTS_FETCH', 2) @mock.patch('seqr.utils.search.utils.es_backend_enabled') @mock.patch('seqr.views.apis.saved_variant_api.logger') @mock.patch('seqr.views.utils.variant_utils.get_variants_for_variant_ids') @@ -925,12 +925,12 @@ def test_update_saved_variant_json(self, mock_get_variants, mock_logger, mock_es self.assertDictEqual( response.json(), {'SV0000002_1248367227_r0390_100': None, 'SV0000001_2103343353_r0390_100': None, - 'SV0059957_11562437_f019313_1': None, 'SV0059956_11560662_f019313_1': None} + 'SV0059956_11560662_f019313_1': None} ) families = [Family.objects.get(guid='F000001_1'), Family.objects.get(guid='F000002_2')] mock_get_variants.assert_has_calls([ - mock.call(families, ['1-1562437-G-CA', '1-248367227-TC-T', '1-46859832-G-A'], user=self.manager_user, user_email=None), + mock.call(families, ['1-248367227-TC-T', '1-46859832-G-A'], user=self.manager_user, user_email=None), mock.call(families, ['21-3343353-GAGA-G'], user=self.manager_user, user_email=None), ]) mock_logger.error.assert_not_called() From d86935fe99dd9a48f6b0162e3ec610c0c23eafe1 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 1 Aug 2024 15:10:55 -0400 Subject: [PATCH 538/736] fix error for loading context when no variants present --- seqr/views/apis/saved_variant_api_tests.py | 30 +++++++++++++++------- seqr/views/utils/variant_utils.py | 3 +++ 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/seqr/views/apis/saved_variant_api_tests.py b/seqr/views/apis/saved_variant_api_tests.py index 7d13c1afb5..3ea5f54ece 100644 --- a/seqr/views/apis/saved_variant_api_tests.py +++ b/seqr/views/apis/saved_variant_api_tests.py @@ -25,10 +25,12 @@ COMPOUND_HET_2_GUID = 'SV0059957_11562437_f019313_1' GENE_GUID_2 = 'ENSG00000197530' +VARIANT_TAG_RESPONSE_KEYS = { + 'variantTagsByGuid', 'variantNotesByGuid', 'variantFunctionalDataByGuid', 'savedVariantsByGuid', +} SAVED_VARIANT_RESPONSE_KEYS = { - 'variantTagsByGuid', 'variantNotesByGuid', 'variantFunctionalDataByGuid', 'savedVariantsByGuid', 'familiesByGuid', + *VARIANT_TAG_RESPONSE_KEYS, 'familiesByGuid', 'omimIntervals', 'genesById', 'locusListsByGuid', 'rnaSeqData', 'mmeSubmissionsByGuid', 'transcriptsById', 'phenotypeGeneScores', - 'omimIntervals', } COMPOUND_HET_3_JSON = { @@ -235,6 +237,10 @@ def test_saved_variant_data(self): # get variants with no tags for whole project response = self.client.get('{}?includeNoteVariants=true'.format(url)) self.assertEqual(response.status_code, 200) + no_families_response_keys = {*SAVED_VARIANT_RESPONSE_KEYS} + no_families_response_keys.remove('familiesByGuid') + no_families_response_keys.remove('transcriptsById') + self.assertSetEqual(set(response.json().keys()), no_families_response_keys) variants = response.json()['savedVariantsByGuid'] self.assertSetEqual(set(variants.keys()), {COMPOUND_HET_1_GUID, COMPOUND_HET_2_GUID}) self.assertListEqual(variants[COMPOUND_HET_1_GUID]['tagGuids'], []) @@ -266,10 +272,7 @@ def test_saved_variant_data(self): response = self.client.get(url.replace(PROJECT_GUID, 'R0003_test')) self.assertEqual(response.status_code, 200) response_json = response.json() - response_keys = {*SAVED_VARIANT_RESPONSE_KEYS} - response_keys.remove('familiesByGuid') - response_keys.remove('transcriptsById') - self.assertSetEqual(set(response_json.keys()), response_keys) + self.assertSetEqual(set(response_json.keys()), no_families_response_keys) self.assertSetEqual( set(response_json['savedVariantsByGuid'].keys()), @@ -330,6 +333,17 @@ def test_saved_variant_data(self): self.assertListEqual(variants['SV0000002_1248367227_r0390_100']['familyGuids'], ['F000002_2']) self.assertEqual(set(response_json['familiesByGuid'].keys()), {'F000001_1', 'F000002_2', 'F000012_12'}) + # Test empty project + empty_project_url = url.replace(PROJECT_GUID, 'R0002_empty') + response = self.client.get(empty_project_url) + self.assertEqual(response.status_code, 200) + empty_response = {k: {} for k in VARIANT_TAG_RESPONSE_KEYS} + self.assertDictEqual(response.json(), empty_response) + + response = self.client.get(f'{empty_project_url}?loadProjectTagTypes=true&loadFamilyContext=true') + self.assertEqual(response.status_code, 200) + self.assertDictEqual(response.json(), empty_response) + def test_create_saved_variant(self): create_saved_variant_url = reverse(create_saved_variant_handler) self.check_collaborator_login(create_saved_variant_url, request_data={'familyGuid': 'F000001_1'}) @@ -410,9 +424,7 @@ def test_create_saved_sv_variant(self): self.assertEqual(response.status_code, 200) response_json = response.json() - self.assertSetEqual(set(response_json.keys()), { - 'variantTagsByGuid', 'variantNotesByGuid', 'variantFunctionalDataByGuid', 'savedVariantsByGuid', 'genesById', - }) + self.assertSetEqual(set(response_json.keys()), {*VARIANT_TAG_RESPONSE_KEYS, 'genesById'}) self.assertEqual(len(response_json['savedVariantsByGuid']), 1) variant_guid = next(iter(response_json['savedVariantsByGuid'])) diff --git a/seqr/views/utils/variant_utils.py b/seqr/views/utils/variant_utils.py index 17d1f7e36a..7f98e14653 100644 --- a/seqr/views/utils/variant_utils.py +++ b/seqr/views/utils/variant_utils.py @@ -374,6 +374,9 @@ def get_variants_response(request, saved_variants, response_variants=None, add_a if saved_variants is not None else {'savedVariantsByGuid': {}} variants = list(response['savedVariantsByGuid'].values()) if response_variants is None else response_variants + if not variants: + return response + genes, transcripts, family_genes = _saved_variant_genes_transcripts(variants) projects = Project.objects.filter(family__guid__in=family_genes.keys()).distinct() From b3ebaec088dd9ddffe26023a378f2749b7cf2c82 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 1 Aug 2024 15:25:09 -0400 Subject: [PATCH 539/736] fix unit tests --- seqr/views/apis/summary_data_api_tests.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/seqr/views/apis/summary_data_api_tests.py b/seqr/views/apis/summary_data_api_tests.py index 8d90812dbf..cc80bc86fb 100644 --- a/seqr/views/apis/summary_data_api_tests.py +++ b/seqr/views/apis/summary_data_api_tests.py @@ -26,10 +26,12 @@ u'dateGenerated': '2020-04-27' } +VARIANT_TAG_RESPONSE_KEYS = { + 'variantTagsByGuid', 'variantNotesByGuid', 'variantFunctionalDataByGuid', 'savedVariantsByGuid', +} SAVED_VARIANT_RESPONSE_KEYS = { - 'projectsByGuid', 'locusListsByGuid', 'savedVariantsByGuid', 'variantFunctionalDataByGuid', 'genesById', - 'variantNotesByGuid', 'individualsByGuid', 'variantTagsByGuid', 'familiesByGuid', 'familyNotesByGuid', - 'mmeSubmissionsByGuid', 'transcriptsById', + *VARIANT_TAG_RESPONSE_KEYS, 'projectsByGuid', 'locusListsByGuid', 'genesById', + 'individualsByGuid', 'familiesByGuid', 'familyNotesByGuid', 'mmeSubmissionsByGuid', 'transcriptsById', } EXPECTED_NO_AIRTABLE_SAMPLE_METADATA_ROW = { @@ -317,7 +319,7 @@ def test_saved_variants_page(self): response = self.client.get('{}?gene=ENSG00000135953'.format(url)) self.assertEqual(response.status_code, 200) - self.assertDictEqual(response.json(), {k: {} for k in SAVED_VARIANT_RESPONSE_KEYS if k != 'transcriptsById'}) + self.assertDictEqual(response.json(), {k: {} for k in VARIANT_TAG_RESPONSE_KEYS}) self.login_manager() response = self.client.get(url) From a18bf966e14b8352ee6ef50bb88132aae34c419b Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 1 Aug 2024 16:00:37 -0400 Subject: [PATCH 540/736] fix unit tests --- seqr/views/apis/saved_variant_api_tests.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/seqr/views/apis/saved_variant_api_tests.py b/seqr/views/apis/saved_variant_api_tests.py index 3ea5f54ece..e70c410aa7 100644 --- a/seqr/views/apis/saved_variant_api_tests.py +++ b/seqr/views/apis/saved_variant_api_tests.py @@ -1011,8 +1011,10 @@ def test_saved_variant_data(self, *args): super(AnvilSavedVariantAPITest, self).test_saved_variant_data(*args) self.mock_list_workspaces.assert_called_with(self.analyst_user) self.mock_get_ws_access_level.assert_called_with( + mock.ANY, 'my-seqr-billing', 'empty') + self.mock_get_ws_access_level.assert_any_call( mock.ANY, 'my-seqr-billing', 'anvil-1kg project n\u00e5me with uni\u00e7\u00f8de') - self.assertEqual(self.mock_get_ws_access_level.call_count, 15) + self.assertEqual(self.mock_get_ws_access_level.call_count, 17) self.mock_get_groups.assert_has_calls([mock.call(self.collaborator_user), mock.call(self.analyst_user)]) self.assertEqual(self.mock_get_groups.call_count, 11) self.mock_get_ws_acl.assert_not_called() From f5c9b02fafa33e2c7fb67067540b502ef96c56cb Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 2 Aug 2024 13:35:53 -0400 Subject: [PATCH 541/736] bump changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e0184a538f..e730f2820d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ # _seqr_ Changes ## dev + +## 8/2/24 * Adds index_file_path to IGV Sample model (REQUIRES DB MIGRATION) ## 7/24/24 From 0d7dba43cfb0d76e55f655d06cb0305e876b8ab5 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 2 Aug 2024 13:49:08 -0400 Subject: [PATCH 542/736] fix flapping test --- seqr/views/apis/anvil_workspace_api_tests.py | 2 +- seqr/views/utils/pedigree_info_utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/seqr/views/apis/anvil_workspace_api_tests.py b/seqr/views/apis/anvil_workspace_api_tests.py index d8bdf7e9e7..2265fb7507 100644 --- a/seqr/views/apis/anvil_workspace_api_tests.py +++ b/seqr/views/apis/anvil_workspace_api_tests.py @@ -813,7 +813,7 @@ def _assert_valid_operation(self, project, test_add_data=True): self.assertIn({ 'family__family_id': '1', 'individual_id': 'NA19675_1', 'mother__individual_id': None, 'father__individual_id': 'NA19678', 'sex': 'F', 'affected': 'A', 'notes': 'A affected individual, test1-zsf', - 'features': [{'id': 'HP:0012469'}, {'id': 'HP:0011675'}], + 'features': [{'id': 'HP:0011675'}, {'id': 'HP:0012469'}], }, individual_model_data) self.assertIn({ 'family__family_id': '1', 'individual_id': 'NA19678', 'mother__individual_id': None, diff --git a/seqr/views/utils/pedigree_info_utils.py b/seqr/views/utils/pedigree_info_utils.py index fffe0426c7..91b74f8566 100644 --- a/seqr/views/utils/pedigree_info_utils.py +++ b/seqr/views/utils/pedigree_info_utils.py @@ -151,7 +151,7 @@ def parse_hpo_terms(hpo_term_string): if not hpo_term_string: return [] terms = {hpo_term.strip() for hpo_term in re.sub(r'\(.*?\)', '', hpo_term_string).replace(',', ';').split(';')} - return[{'id': term} for term in terms if term] + return[{'id': term} for term in sorted(terms) if term] def _convert_fam_file_rows_to_json(column_map, rows, required_columns=None, update_features=False): From 640ee46ad86642b384ae4afeea9108f42cece5d4 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 2 Aug 2024 14:07:57 -0400 Subject: [PATCH 543/736] update to latest docker compose syntax --- deploy/LOCAL_DEVELOPMENT_INSTALL.md | 2 +- deploy/LOCAL_INSTALL.md | 36 ++++++++++++++--------------- test_local_deployment.sh | 20 ++++++++-------- 3 files changed, 29 insertions(+), 29 deletions(-) diff --git a/deploy/LOCAL_DEVELOPMENT_INSTALL.md b/deploy/LOCAL_DEVELOPMENT_INSTALL.md index 919a0acc9b..02e34f8859 100644 --- a/deploy/LOCAL_DEVELOPMENT_INSTALL.md +++ b/deploy/LOCAL_DEVELOPMENT_INSTALL.md @@ -116,7 +116,7 @@ Before running seqr, make sure the following are currently running/ started: - If you want ES running but do not need production data/ are working with a standalone seqr instance, use docker-compose ```bash - docker-compose up elasticsearch + docker compose up elasticsearch ``` ### Run ui asset server diff --git a/deploy/LOCAL_INSTALL.md b/deploy/LOCAL_INSTALL.md index 6ed9162d40..d766931d45 100644 --- a/deploy/LOCAL_INSTALL.md +++ b/deploy/LOCAL_INSTALL.md @@ -31,10 +31,10 @@ SEQR_DIR=$(pwd) wget https://raw.githubusercontent.com/broadinstitute/seqr/master/docker-compose.yml -docker-compose up -d seqr # start up the seqr docker image in the background after also starting other components it depends on (postgres, redis, elasticsearch). This may take 10+ minutes. -docker-compose logs -f seqr # (optional) continuously print seqr logs to see when it is done starting up or if there are any errors. Type Ctrl-C to exit from the logs. +docker compose up -d seqr # start up the seqr docker image in the background after also starting other components it depends on (postgres, redis, elasticsearch). This may take 10+ minutes. +docker compose logs -f seqr # (optional) continuously print seqr logs to see when it is done starting up or if there are any errors. Type Ctrl-C to exit from the logs. -docker-compose exec seqr python manage.py createsuperuser # create a seqr Admin user +docker compose exec seqr python manage.py createsuperuser # create a seqr Admin user open http://localhost # open the seqr landing page in your browser. Log in to seqr using the email and password from the previous step ``` @@ -45,15 +45,15 @@ Updating your local installation of seqr involves pulling the latest version of ```bash # run this from the directory containing your docker-compose.yml file -docker-compose pull -docker-compose up -d seqr +docker compose pull +docker compose up -d seqr -docker-compose logs -f seqr # (optional) continuously print seqr logs to see when it is done starting up or if there are any errors. Type Ctrl-C to exit from the logs. +docker compose logs -f seqr # (optional) continuously print seqr logs to see when it is done starting up or if there are any errors. Type Ctrl-C to exit from the logs. ``` To update reference data in seqr, such as OMIM, HPO, etc., run the following ```bash -docker-compose exec seqr ./manage.py update_all_reference_data --use-cached-omim --skip-gencode +docker compose exec seqr ./manage.py update_all_reference_data --use-cached-omim --skip-gencode ``` ### Annotating and loading VCF callsets @@ -79,7 +79,7 @@ The steps below describe how to annotate a callset and then load it into your on 1. start a pipeline-runner container which has the necessary tools and environment for starting and submitting jobs to a Dataproc cluster. ```bash - docker-compose up -d pipeline-runner # start the pipeline-runner container + docker compose up -d pipeline-runner # start the pipeline-runner container ``` 1. if you haven't already, upload reference data to your own google bucket. @@ -88,7 +88,7 @@ This is expected to take a while ```bash BUILD_VERSION=38 # can be 37 or 38 - docker-compose exec pipeline-runner copy_reference_data_to_gs.sh $BUILD_VERSION $GS_BUCKET + docker compose exec pipeline-runner copy_reference_data_to_gs.sh $BUILD_VERSION $GS_BUCKET ``` Periodically, you may want to update the reference data in order to get the latest versions of these annotations. @@ -115,7 +115,7 @@ annotations, but you will need to re-load previously loaded projects to get the INPUT_FILE_PATH=/${GS_FILE_PATH}/${FILENAME} - docker-compose exec pipeline-runner load_data_dataproc.sh $BUILD_VERSION $SAMPLE_TYPE $INDEX_NAME $GS_BUCKET $INPUT_FILE_PATH + docker compose exec pipeline-runner load_data_dataproc.sh $BUILD_VERSION $SAMPLE_TYPE $INDEX_NAME $GS_BUCKET $INPUT_FILE_PATH ``` @@ -138,13 +138,13 @@ The steps below describe how to annotate a callset and then load it into your on 1. start a pipeline-runner container ```bash - docker-compose up -d pipeline-runner # start the pipeline-runner container + docker compose up -d pipeline-runner # start the pipeline-runner container ``` 1. authenticate into your google cloud account. This is required for hail to access buckets hosted on gcloud. ```bash - docker-compose exec pipeline-runner gcloud auth application-default login + docker compose exec pipeline-runner gcloud auth application-default login ``` 1. if you haven't already, download VEP and other reference data to the docker image's mounted directories. @@ -153,7 +153,7 @@ This is expected to take a while ```bash BUILD_VERSION=38 # can be 37 or 38 - docker-compose exec pipeline-runner download_reference_data.sh $BUILD_VERSION + docker compose exec pipeline-runner download_reference_data.sh $BUILD_VERSION ``` Periodically, you may want to update the reference data in order to get the latest versions of these annotations. @@ -163,12 +163,12 @@ annotations, but you will need to re-load previously loaded projects to get the BUILD_VERSION=38 # can be 37 or 38 # Update clinvar - docker-compose exec pipeline-runner rm -rf "/seqr-reference-data/GRCh${BUILD_VERSION}/clinvar.GRCh${BUILD_VERSION}.ht" - docker-compose exec pipeline-runner gsutil rsync -r "gs://seqr-reference-data/GRCh${BUILD_VERSION}/clinvar/clinvar.GRCh${BUILD_VERSION}.ht" "/seqr-reference-data/GRCh${BUILD_VERSION}/clinvar.GRCh${BUILD_VERSION}.ht" + docker compose exec pipeline-runner rm -rf "/seqr-reference-data/GRCh${BUILD_VERSION}/clinvar.GRCh${BUILD_VERSION}.ht" + docker compose exec pipeline-runner gsutil rsync -r "gs://seqr-reference-data/GRCh${BUILD_VERSION}/clinvar/clinvar.GRCh${BUILD_VERSION}.ht" "/seqr-reference-data/GRCh${BUILD_VERSION}/clinvar.GRCh${BUILD_VERSION}.ht" # Update all other reference data - docker-compose exec pipeline-runner rm -rf "/seqr-reference-data/GRCh${BUILD_VERSION}/combined_reference_data_grch${BUILD_VERSION}.ht" - docker-compose exec pipeline-runner gsutil rsync -r "gs://seqr-reference-data/GRCh${BUILD_VERSION}/all_reference_data/combined_reference_data_grch${BUILD_VERSION}.ht" "/seqr-reference-data/GRCh${BUILD_VERSION}/combined_reference_data_grch${BUILD_VERSION}.ht" + docker compose exec pipeline-runner rm -rf "/seqr-reference-data/GRCh${BUILD_VERSION}/combined_reference_data_grch${BUILD_VERSION}.ht" + docker compose exec pipeline-runner gsutil rsync -r "gs://seqr-reference-data/GRCh${BUILD_VERSION}/all_reference_data/combined_reference_data_grch${BUILD_VERSION}.ht" "/seqr-reference-data/GRCh${BUILD_VERSION}/combined_reference_data_grch${BUILD_VERSION}.ht" ``` 1. run the loading command in the pipeline-runner container. Adjust the arguments as needed @@ -179,7 +179,7 @@ annotations, but you will need to re-load previously loaded projects to get the INPUT_FILE_PATH=${FILE_PATH}/${FILENAME} - docker-compose exec pipeline-runner load_data.sh $BUILD_VERSION $SAMPLE_TYPE $INDEX_NAME $INPUT_FILE_PATH + docker compose exec pipeline-runner load_data.sh $BUILD_VERSION $SAMPLE_TYPE $INDEX_NAME $INPUT_FILE_PATH ``` diff --git a/test_local_deployment.sh b/test_local_deployment.sh index e6d38d908f..b964a2e235 100755 --- a/test_local_deployment.sh +++ b/test_local_deployment.sh @@ -3,15 +3,15 @@ set -ex # Due to travis filesystem issues, need to explicitly grant permissions for the volume mount from the container -# This is not required to use docker-compose locally, only for testing -docker-compose up -d elasticsearch -docker-compose exec -T elasticsearch chmod 777 ./data +# This is not required to use docker compose locally, only for testing +docker compose up -d elasticsearch +docker compose exec -T elasticsearch chmod 777 ./data -docker-compose up -d seqr -docker-compose logs postgres -docker-compose logs elasticsearch -docker-compose logs redis -docker-compose exec -T seqr curl elasticsearch:9200 +docker compose up -d seqr +docker compose logs postgres +docker compose logs elasticsearch +docker compose logs redis +docker compose exec -T seqr curl elasticsearch:9200 sleep 30 -docker-compose logs seqr -echo -ne 'testpassword\n' docker-compose exec -T seqr python manage.py createsuperuser --username test --email test@test.com +docker compose logs seqr +echo -ne 'testpassword\n' docker compose exec -T seqr python manage.py createsuperuser --username test --email test@test.com From 15a153ae4da523efd3fa8923a97c2b1815ee7b15 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 2 Aug 2024 15:08:20 -0400 Subject: [PATCH 544/736] use family and project tables in sample_type directory --- .../{ => WES}/F000002_2.ht/.README.txt.crc | Bin .../{ => WES}/F000002_2.ht/._SUCCESS.crc | Bin .../F000002_2.ht/.metadata.json.gz.crc | Bin .../{ => WES}/F000002_2.ht/README.txt | 0 .../families/{ => WES}/F000002_2.ht/_SUCCESS | 0 .../globals/.metadata.json.gz.crc | Bin .../F000002_2.ht/globals/metadata.json.gz | Bin .../F000002_2.ht/globals/parts/.part-0.crc | Bin .../F000002_2.ht/globals/parts/part-0 | Bin .../.index.crc | Bin .../.metadata.json.gz.crc | Bin .../index | Bin .../metadata.json.gz | Bin .../{ => WES}/F000002_2.ht/metadata.json.gz | Bin .../F000002_2.ht/rows/.metadata.json.gz.crc | Bin .../F000002_2.ht/rows/metadata.json.gz | Bin ...0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.crc | Bin ...art-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce | Bin .../{ => WES}/R0001_1kg.ht/.README.txt.crc | Bin .../{ => WES}/R0001_1kg.ht/._SUCCESS.crc | Bin .../R0001_1kg.ht/.metadata.json.gz.crc | Bin .../{ => WES}/R0001_1kg.ht/README.txt | 0 .../projects/{ => WES}/R0001_1kg.ht/_SUCCESS | 0 .../globals/.metadata.json.gz.crc | Bin .../R0001_1kg.ht/globals/metadata.json.gz | Bin .../R0001_1kg.ht/globals/parts/.part-0.crc | Bin .../R0001_1kg.ht/globals/parts/part-0 | Bin .../.index.crc | Bin .../.metadata.json.gz.crc | Bin .../index | Bin .../metadata.json.gz | Bin .../{ => WES}/R0001_1kg.ht/metadata.json.gz | Bin .../R0001_1kg.ht/rows/.metadata.json.gz.crc | Bin .../R0001_1kg.ht/rows/metadata.json.gz | Bin ...0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.crc | Bin ...art-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f | Bin .../{ => WES}/F000002_2.ht/.README.txt.crc | Bin .../{ => WES}/F000002_2.ht/._SUCCESS.crc | Bin .../F000002_2.ht/.metadata.json.gz.crc | Bin .../{ => WES}/F000002_2.ht/README.txt | 0 .../families/{ => WES}/F000002_2.ht/_SUCCESS | 0 .../globals/.metadata.json.gz.crc | Bin .../F000002_2.ht/globals/metadata.json.gz | Bin .../F000002_2.ht/globals/parts/.part-0.crc | Bin .../F000002_2.ht/globals/parts/part-0 | Bin .../.index.crc | Bin .../.metadata.json.gz.crc | Bin .../index | Bin .../metadata.json.gz | Bin .../{ => WES}/F000002_2.ht/metadata.json.gz | Bin .../F000002_2.ht/rows/.metadata.json.gz.crc | Bin .../F000002_2.ht/rows/metadata.json.gz | Bin ...0-1db7379b-e75c-4ed2-b79b-28ffb9b115e8.crc | Bin ...art-0-1db7379b-e75c-4ed2-b79b-28ffb9b115e8 | Bin .../{ => WES}/R0001_1kg.ht/.README.txt.crc | Bin .../{ => WES}/R0001_1kg.ht/._SUCCESS.crc | Bin .../R0001_1kg.ht/.metadata.json.gz.crc | Bin .../{ => WES}/R0001_1kg.ht/README.txt | 0 .../projects/{ => WES}/R0001_1kg.ht/_SUCCESS | 0 .../globals/.metadata.json.gz.crc | Bin .../R0001_1kg.ht/globals/metadata.json.gz | Bin .../R0001_1kg.ht/globals/parts/.part-0.crc | Bin .../R0001_1kg.ht/globals/parts/part-0 | Bin .../.index.crc | Bin .../.metadata.json.gz.crc | Bin .../index | Bin .../metadata.json.gz | Bin .../{ => WES}/R0001_1kg.ht/metadata.json.gz | Bin .../R0001_1kg.ht/rows/.metadata.json.gz.crc | Bin .../R0001_1kg.ht/rows/metadata.json.gz | Bin ...0-9a202b26-a04d-4337-9aa5-bbab41b4bc06.crc | Bin ...art-0-9a202b26-a04d-4337-9aa5-bbab41b4bc06 | Bin .../{ => WES}/F000002_2.ht/.README.txt.crc | Bin .../{ => WES}/F000002_2.ht/._SUCCESS.crc | Bin .../F000002_2.ht/.metadata.json.gz.crc | Bin .../{ => WES}/F000002_2.ht/README.txt | 0 .../families/{ => WES}/F000002_2.ht/_SUCCESS | 0 .../globals/.metadata.json.gz.crc | Bin .../F000002_2.ht/globals/metadata.json.gz | Bin .../F000002_2.ht/globals/parts/.part-0.crc | Bin .../F000002_2.ht/globals/parts/part-0 | Bin .../.index.crc | Bin .../.metadata.json.gz.crc | Bin .../index | Bin .../metadata.json.gz | Bin .../{ => WES}/F000002_2.ht/metadata.json.gz | Bin .../F000002_2.ht/rows/.metadata.json.gz.crc | Bin .../F000002_2.ht/rows/metadata.json.gz | Bin ...0-5b60e665-6a2b-43ec-b282-1003ad80e87c.crc | Bin ...art-0-5b60e665-6a2b-43ec-b282-1003ad80e87c | Bin .../{ => WES}/R0001_1kg.ht/.README.txt.crc | Bin .../{ => WES}/R0001_1kg.ht/._SUCCESS.crc | Bin .../R0001_1kg.ht/.metadata.json.gz.crc | Bin .../{ => WES}/R0001_1kg.ht/README.txt | 0 .../projects/{ => WES}/R0001_1kg.ht/_SUCCESS | 0 .../globals/.metadata.json.gz.crc | Bin .../R0001_1kg.ht/globals/metadata.json.gz | Bin .../R0001_1kg.ht/globals/parts/.part-0.crc | Bin .../R0001_1kg.ht/globals/parts/part-0 | Bin .../.index.crc | Bin .../.metadata.json.gz.crc | Bin .../index | Bin .../metadata.json.gz | Bin .../{ => WES}/R0001_1kg.ht/metadata.json.gz | Bin .../R0001_1kg.ht/rows/.metadata.json.gz.crc | Bin .../R0001_1kg.ht/rows/metadata.json.gz | Bin ...0-ad3760b2-5a76-4b94-9268-9673bf62e956.crc | Bin ...art-0-ad3760b2-5a76-4b94-9268-9673bf62e956 | Bin .../{ => WGS}/R0003_test.ht/.README.txt.crc | Bin .../{ => WGS}/R0003_test.ht/._SUCCESS.crc | Bin .../R0003_test.ht/.metadata.json.gz.crc | Bin .../{ => WGS}/R0003_test.ht/README.txt | 0 .../projects/{ => WGS}/R0003_test.ht/_SUCCESS | 0 .../globals/.metadata.json.gz.crc | Bin .../R0003_test.ht/globals/metadata.json.gz | Bin .../R0003_test.ht/globals/parts/.part-0.crc | Bin .../R0003_test.ht/globals/parts/part-0 | Bin .../.index.crc | Bin .../.metadata.json.gz.crc | Bin .../index | Bin .../metadata.json.gz | Bin .../{ => WGS}/R0003_test.ht/metadata.json.gz | Bin .../R0003_test.ht/rows/.metadata.json.gz.crc | Bin .../R0003_test.ht/rows/metadata.json.gz | Bin ...0-28a643dd-8eb0-4510-8718-6e98b4f4274d.crc | Bin ...art-0-28a643dd-8eb0-4510-8718-6e98b4f4274d | Bin .../{ => WES}/F000002_2.ht/.README.txt.crc | Bin .../{ => WES}/F000002_2.ht/._SUCCESS.crc | Bin .../F000002_2.ht/.metadata.json.gz.crc | Bin .../{ => WES}/F000002_2.ht/README.txt | 0 .../families/{ => WES}/F000002_2.ht/_SUCCESS | 0 .../globals/.metadata.json.gz.crc | Bin .../F000002_2.ht/globals/metadata.json.gz | Bin .../F000002_2.ht/globals/parts/.part-0.crc | Bin .../F000002_2.ht/globals/parts/part-0 | Bin .../.index.crc | Bin .../.metadata.json.gz.crc | Bin .../index | Bin .../metadata.json.gz | Bin .../{ => WES}/F000002_2.ht/metadata.json.gz | Bin .../F000002_2.ht/rows/.metadata.json.gz.crc | Bin .../F000002_2.ht/rows/metadata.json.gz | Bin ...0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.crc | Bin ...8-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48 | Bin .../{ => WES}/R0001_1kg.ht/.README.txt.crc | Bin .../{ => WES}/R0001_1kg.ht/._SUCCESS.crc | Bin .../R0001_1kg.ht/.metadata.json.gz.crc | Bin .../{ => WES}/R0001_1kg.ht/README.txt | 0 .../projects/{ => WES}/R0001_1kg.ht/_SUCCESS | 0 .../globals/.metadata.json.gz.crc | Bin .../R0001_1kg.ht/globals/metadata.json.gz | Bin .../R0001_1kg.ht/globals/parts/.part-0.crc | Bin .../R0001_1kg.ht/globals/parts/part-0 | Bin .../.index.crc | Bin .../.metadata.json.gz.crc | Bin .../index | Bin .../metadata.json.gz | Bin .../{ => WES}/R0001_1kg.ht/metadata.json.gz | Bin .../R0001_1kg.ht/rows/.metadata.json.gz.crc | Bin .../R0001_1kg.ht/rows/metadata.json.gz | Bin ...0-4bb6b390-07db-405c-abad-c57b5aa95da0.crc | Bin ...art-0-4bb6b390-07db-405c-abad-c57b5aa95da0 | Bin .../{ => WGS}/F000011_11.ht/.README.txt.crc | Bin .../{ => WGS}/F000011_11.ht/._SUCCESS.crc | Bin .../F000011_11.ht/.metadata.json.gz.crc | Bin .../{ => WGS}/F000011_11.ht/README.txt | 0 .../families/{ => WGS}/F000011_11.ht/_SUCCESS | 0 .../globals/.metadata.json.gz.crc | Bin .../F000011_11.ht/globals/metadata.json.gz | Bin .../F000011_11.ht/globals/parts/.part-0.crc | Bin .../F000011_11.ht/globals/parts/part-0 | Bin .../.index.crc | Bin .../.metadata.json.gz.crc | Bin .../index | Bin .../metadata.json.gz | Bin .../{ => WGS}/F000011_11.ht/metadata.json.gz | Bin .../F000011_11.ht/rows/.metadata.json.gz.crc | Bin .../F000011_11.ht/rows/metadata.json.gz | Bin ...0-7dd50455-5c6c-48a0-7033-11afafa5d003.crc | Bin ...8-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003 | Bin .../{ => WGS}/R0003_test.ht/.README.txt.crc | Bin .../{ => WGS}/R0003_test.ht/._SUCCESS.crc | Bin .../R0003_test.ht/.metadata.json.gz.crc | Bin .../{ => WGS}/R0003_test.ht/README.txt | 0 .../projects/{ => WGS}/R0003_test.ht/_SUCCESS | 0 .../globals/.metadata.json.gz.crc | Bin .../R0003_test.ht/globals/metadata.json.gz | Bin .../R0003_test.ht/globals/parts/.part-0.crc | Bin .../R0003_test.ht/globals/parts/part-0 | Bin .../.index.crc | Bin .../.metadata.json.gz.crc | Bin .../index | Bin .../metadata.json.gz | Bin .../{ => WGS}/R0003_test.ht/metadata.json.gz | Bin .../R0003_test.ht/rows/.metadata.json.gz.crc | Bin .../R0003_test.ht/rows/metadata.json.gz | Bin ...0-cbf84037-3354-427a-98a6-b953711ae5bc.crc | Bin ...art-0-cbf84037-3354-427a-98a6-b953711ae5bc | Bin hail_search/queries/base.py | 9 +- hail_search/test_search.py | 124 +++++++++--------- hail_search/test_utils.py | 34 ++--- seqr/utils/search/hail_search_utils.py | 19 ++- seqr/utils/search/hail_search_utils_tests.py | 2 +- 203 files changed, 98 insertions(+), 90 deletions(-) rename hail_search/fixtures/GRCh37/SNV_INDEL/families/{ => WES}/F000002_2.ht/.README.txt.crc (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/families/{ => WES}/F000002_2.ht/._SUCCESS.crc (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/families/{ => WES}/F000002_2.ht/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/families/{ => WES}/F000002_2.ht/README.txt (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/families/{ => WES}/F000002_2.ht/_SUCCESS (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/families/{ => WES}/F000002_2.ht/globals/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/families/{ => WES}/F000002_2.ht/globals/metadata.json.gz (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/families/{ => WES}/F000002_2.ht/globals/parts/.part-0.crc (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/families/{ => WES}/F000002_2.ht/globals/parts/part-0 (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/families/{ => WES}/F000002_2.ht/index/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.idx/.index.crc (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/families/{ => WES}/F000002_2.ht/index/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.idx/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/families/{ => WES}/F000002_2.ht/index/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.idx/index (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/families/{ => WES}/F000002_2.ht/index/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.idx/metadata.json.gz (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/families/{ => WES}/F000002_2.ht/metadata.json.gz (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/families/{ => WES}/F000002_2.ht/rows/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/families/{ => WES}/F000002_2.ht/rows/metadata.json.gz (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/families/{ => WES}/F000002_2.ht/rows/parts/.part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.crc (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/families/{ => WES}/F000002_2.ht/rows/parts/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/.README.txt.crc (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/._SUCCESS.crc (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/README.txt (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/_SUCCESS (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/globals/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/globals/metadata.json.gz (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/globals/parts/.part-0.crc (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/globals/parts/part-0 (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/index/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.idx/.index.crc (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/index/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.idx/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/index/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.idx/index (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/index/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.idx/metadata.json.gz (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/metadata.json.gz (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/rows/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/rows/metadata.json.gz (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/rows/parts/.part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.crc (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/rows/parts/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f (100%) rename hail_search/fixtures/GRCh38/MITO/families/{ => WES}/F000002_2.ht/.README.txt.crc (100%) rename hail_search/fixtures/GRCh38/MITO/families/{ => WES}/F000002_2.ht/._SUCCESS.crc (100%) rename hail_search/fixtures/GRCh38/MITO/families/{ => WES}/F000002_2.ht/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/MITO/families/{ => WES}/F000002_2.ht/README.txt (100%) rename hail_search/fixtures/GRCh38/MITO/families/{ => WES}/F000002_2.ht/_SUCCESS (100%) rename hail_search/fixtures/GRCh38/MITO/families/{ => WES}/F000002_2.ht/globals/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/MITO/families/{ => WES}/F000002_2.ht/globals/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/MITO/families/{ => WES}/F000002_2.ht/globals/parts/.part-0.crc (100%) rename hail_search/fixtures/GRCh38/MITO/families/{ => WES}/F000002_2.ht/globals/parts/part-0 (100%) rename hail_search/fixtures/GRCh38/MITO/families/{ => WES}/F000002_2.ht/index/part-0-1db7379b-e75c-4ed2-b79b-28ffb9b115e8.idx/.index.crc (100%) rename hail_search/fixtures/GRCh38/MITO/families/{ => WES}/F000002_2.ht/index/part-0-1db7379b-e75c-4ed2-b79b-28ffb9b115e8.idx/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/MITO/families/{ => WES}/F000002_2.ht/index/part-0-1db7379b-e75c-4ed2-b79b-28ffb9b115e8.idx/index (100%) rename hail_search/fixtures/GRCh38/MITO/families/{ => WES}/F000002_2.ht/index/part-0-1db7379b-e75c-4ed2-b79b-28ffb9b115e8.idx/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/MITO/families/{ => WES}/F000002_2.ht/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/MITO/families/{ => WES}/F000002_2.ht/rows/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/MITO/families/{ => WES}/F000002_2.ht/rows/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/MITO/families/{ => WES}/F000002_2.ht/rows/parts/.part-0-1db7379b-e75c-4ed2-b79b-28ffb9b115e8.crc (100%) rename hail_search/fixtures/GRCh38/MITO/families/{ => WES}/F000002_2.ht/rows/parts/part-0-1db7379b-e75c-4ed2-b79b-28ffb9b115e8 (100%) rename hail_search/fixtures/GRCh38/MITO/projects/{ => WES}/R0001_1kg.ht/.README.txt.crc (100%) rename hail_search/fixtures/GRCh38/MITO/projects/{ => WES}/R0001_1kg.ht/._SUCCESS.crc (100%) rename hail_search/fixtures/GRCh38/MITO/projects/{ => WES}/R0001_1kg.ht/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/MITO/projects/{ => WES}/R0001_1kg.ht/README.txt (100%) rename hail_search/fixtures/GRCh38/MITO/projects/{ => WES}/R0001_1kg.ht/_SUCCESS (100%) rename hail_search/fixtures/GRCh38/MITO/projects/{ => WES}/R0001_1kg.ht/globals/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/MITO/projects/{ => WES}/R0001_1kg.ht/globals/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/MITO/projects/{ => WES}/R0001_1kg.ht/globals/parts/.part-0.crc (100%) rename hail_search/fixtures/GRCh38/MITO/projects/{ => WES}/R0001_1kg.ht/globals/parts/part-0 (100%) rename hail_search/fixtures/GRCh38/MITO/projects/{ => WES}/R0001_1kg.ht/index/part-0-9a202b26-a04d-4337-9aa5-bbab41b4bc06.idx/.index.crc (100%) rename hail_search/fixtures/GRCh38/MITO/projects/{ => WES}/R0001_1kg.ht/index/part-0-9a202b26-a04d-4337-9aa5-bbab41b4bc06.idx/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/MITO/projects/{ => WES}/R0001_1kg.ht/index/part-0-9a202b26-a04d-4337-9aa5-bbab41b4bc06.idx/index (100%) rename hail_search/fixtures/GRCh38/MITO/projects/{ => WES}/R0001_1kg.ht/index/part-0-9a202b26-a04d-4337-9aa5-bbab41b4bc06.idx/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/MITO/projects/{ => WES}/R0001_1kg.ht/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/MITO/projects/{ => WES}/R0001_1kg.ht/rows/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/MITO/projects/{ => WES}/R0001_1kg.ht/rows/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/MITO/projects/{ => WES}/R0001_1kg.ht/rows/parts/.part-0-9a202b26-a04d-4337-9aa5-bbab41b4bc06.crc (100%) rename hail_search/fixtures/GRCh38/MITO/projects/{ => WES}/R0001_1kg.ht/rows/parts/part-0-9a202b26-a04d-4337-9aa5-bbab41b4bc06 (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/families/{ => WES}/F000002_2.ht/.README.txt.crc (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/families/{ => WES}/F000002_2.ht/._SUCCESS.crc (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/families/{ => WES}/F000002_2.ht/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/families/{ => WES}/F000002_2.ht/README.txt (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/families/{ => WES}/F000002_2.ht/_SUCCESS (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/families/{ => WES}/F000002_2.ht/globals/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/families/{ => WES}/F000002_2.ht/globals/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/families/{ => WES}/F000002_2.ht/globals/parts/.part-0.crc (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/families/{ => WES}/F000002_2.ht/globals/parts/part-0 (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/families/{ => WES}/F000002_2.ht/index/part-0-5b60e665-6a2b-43ec-b282-1003ad80e87c.idx/.index.crc (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/families/{ => WES}/F000002_2.ht/index/part-0-5b60e665-6a2b-43ec-b282-1003ad80e87c.idx/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/families/{ => WES}/F000002_2.ht/index/part-0-5b60e665-6a2b-43ec-b282-1003ad80e87c.idx/index (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/families/{ => WES}/F000002_2.ht/index/part-0-5b60e665-6a2b-43ec-b282-1003ad80e87c.idx/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/families/{ => WES}/F000002_2.ht/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/families/{ => WES}/F000002_2.ht/rows/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/families/{ => WES}/F000002_2.ht/rows/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/families/{ => WES}/F000002_2.ht/rows/parts/.part-0-5b60e665-6a2b-43ec-b282-1003ad80e87c.crc (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/families/{ => WES}/F000002_2.ht/rows/parts/part-0-5b60e665-6a2b-43ec-b282-1003ad80e87c (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/.README.txt.crc (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/._SUCCESS.crc (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/README.txt (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/_SUCCESS (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/globals/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/globals/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/globals/parts/.part-0.crc (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/globals/parts/part-0 (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/index/part-0-ad3760b2-5a76-4b94-9268-9673bf62e956.idx/.index.crc (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/index/part-0-ad3760b2-5a76-4b94-9268-9673bf62e956.idx/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/index/part-0-ad3760b2-5a76-4b94-9268-9673bf62e956.idx/index (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/index/part-0-ad3760b2-5a76-4b94-9268-9673bf62e956.idx/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/rows/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/rows/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/rows/parts/.part-0-ad3760b2-5a76-4b94-9268-9673bf62e956.crc (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WES}/R0001_1kg.ht/rows/parts/part-0-ad3760b2-5a76-4b94-9268-9673bf62e956 (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WGS}/R0003_test.ht/.README.txt.crc (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WGS}/R0003_test.ht/._SUCCESS.crc (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WGS}/R0003_test.ht/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WGS}/R0003_test.ht/README.txt (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WGS}/R0003_test.ht/_SUCCESS (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WGS}/R0003_test.ht/globals/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WGS}/R0003_test.ht/globals/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WGS}/R0003_test.ht/globals/parts/.part-0.crc (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WGS}/R0003_test.ht/globals/parts/part-0 (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WGS}/R0003_test.ht/index/part-0-28a643dd-8eb0-4510-8718-6e98b4f4274d.idx/.index.crc (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WGS}/R0003_test.ht/index/part-0-28a643dd-8eb0-4510-8718-6e98b4f4274d.idx/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WGS}/R0003_test.ht/index/part-0-28a643dd-8eb0-4510-8718-6e98b4f4274d.idx/index (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WGS}/R0003_test.ht/index/part-0-28a643dd-8eb0-4510-8718-6e98b4f4274d.idx/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WGS}/R0003_test.ht/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WGS}/R0003_test.ht/rows/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WGS}/R0003_test.ht/rows/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WGS}/R0003_test.ht/rows/parts/.part-0-28a643dd-8eb0-4510-8718-6e98b4f4274d.crc (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/projects/{ => WGS}/R0003_test.ht/rows/parts/part-0-28a643dd-8eb0-4510-8718-6e98b4f4274d (100%) rename hail_search/fixtures/GRCh38/SV_WES/families/{ => WES}/F000002_2.ht/.README.txt.crc (100%) rename hail_search/fixtures/GRCh38/SV_WES/families/{ => WES}/F000002_2.ht/._SUCCESS.crc (100%) rename hail_search/fixtures/GRCh38/SV_WES/families/{ => WES}/F000002_2.ht/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/SV_WES/families/{ => WES}/F000002_2.ht/README.txt (100%) rename hail_search/fixtures/GRCh38/SV_WES/families/{ => WES}/F000002_2.ht/_SUCCESS (100%) rename hail_search/fixtures/GRCh38/SV_WES/families/{ => WES}/F000002_2.ht/globals/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/SV_WES/families/{ => WES}/F000002_2.ht/globals/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/SV_WES/families/{ => WES}/F000002_2.ht/globals/parts/.part-0.crc (100%) rename hail_search/fixtures/GRCh38/SV_WES/families/{ => WES}/F000002_2.ht/globals/parts/part-0 (100%) rename hail_search/fixtures/GRCh38/SV_WES/families/{ => WES}/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/.index.crc (100%) rename hail_search/fixtures/GRCh38/SV_WES/families/{ => WES}/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/SV_WES/families/{ => WES}/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/index (100%) rename hail_search/fixtures/GRCh38/SV_WES/families/{ => WES}/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/SV_WES/families/{ => WES}/F000002_2.ht/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/SV_WES/families/{ => WES}/F000002_2.ht/rows/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/SV_WES/families/{ => WES}/F000002_2.ht/rows/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/SV_WES/families/{ => WES}/F000002_2.ht/rows/parts/.part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.crc (100%) rename hail_search/fixtures/GRCh38/SV_WES/families/{ => WES}/F000002_2.ht/rows/parts/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48 (100%) rename hail_search/fixtures/GRCh38/SV_WES/projects/{ => WES}/R0001_1kg.ht/.README.txt.crc (100%) rename hail_search/fixtures/GRCh38/SV_WES/projects/{ => WES}/R0001_1kg.ht/._SUCCESS.crc (100%) rename hail_search/fixtures/GRCh38/SV_WES/projects/{ => WES}/R0001_1kg.ht/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/SV_WES/projects/{ => WES}/R0001_1kg.ht/README.txt (100%) rename hail_search/fixtures/GRCh38/SV_WES/projects/{ => WES}/R0001_1kg.ht/_SUCCESS (100%) rename hail_search/fixtures/GRCh38/SV_WES/projects/{ => WES}/R0001_1kg.ht/globals/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/SV_WES/projects/{ => WES}/R0001_1kg.ht/globals/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/SV_WES/projects/{ => WES}/R0001_1kg.ht/globals/parts/.part-0.crc (100%) rename hail_search/fixtures/GRCh38/SV_WES/projects/{ => WES}/R0001_1kg.ht/globals/parts/part-0 (100%) rename hail_search/fixtures/GRCh38/SV_WES/projects/{ => WES}/R0001_1kg.ht/index/part-0-4bb6b390-07db-405c-abad-c57b5aa95da0.idx/.index.crc (100%) rename hail_search/fixtures/GRCh38/SV_WES/projects/{ => WES}/R0001_1kg.ht/index/part-0-4bb6b390-07db-405c-abad-c57b5aa95da0.idx/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/SV_WES/projects/{ => WES}/R0001_1kg.ht/index/part-0-4bb6b390-07db-405c-abad-c57b5aa95da0.idx/index (100%) rename hail_search/fixtures/GRCh38/SV_WES/projects/{ => WES}/R0001_1kg.ht/index/part-0-4bb6b390-07db-405c-abad-c57b5aa95da0.idx/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/SV_WES/projects/{ => WES}/R0001_1kg.ht/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/SV_WES/projects/{ => WES}/R0001_1kg.ht/rows/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/SV_WES/projects/{ => WES}/R0001_1kg.ht/rows/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/SV_WES/projects/{ => WES}/R0001_1kg.ht/rows/parts/.part-0-4bb6b390-07db-405c-abad-c57b5aa95da0.crc (100%) rename hail_search/fixtures/GRCh38/SV_WES/projects/{ => WES}/R0001_1kg.ht/rows/parts/part-0-4bb6b390-07db-405c-abad-c57b5aa95da0 (100%) rename hail_search/fixtures/GRCh38/SV_WGS/families/{ => WGS}/F000011_11.ht/.README.txt.crc (100%) rename hail_search/fixtures/GRCh38/SV_WGS/families/{ => WGS}/F000011_11.ht/._SUCCESS.crc (100%) rename hail_search/fixtures/GRCh38/SV_WGS/families/{ => WGS}/F000011_11.ht/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/SV_WGS/families/{ => WGS}/F000011_11.ht/README.txt (100%) rename hail_search/fixtures/GRCh38/SV_WGS/families/{ => WGS}/F000011_11.ht/_SUCCESS (100%) rename hail_search/fixtures/GRCh38/SV_WGS/families/{ => WGS}/F000011_11.ht/globals/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/SV_WGS/families/{ => WGS}/F000011_11.ht/globals/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/SV_WGS/families/{ => WGS}/F000011_11.ht/globals/parts/.part-0.crc (100%) rename hail_search/fixtures/GRCh38/SV_WGS/families/{ => WGS}/F000011_11.ht/globals/parts/part-0 (100%) rename hail_search/fixtures/GRCh38/SV_WGS/families/{ => WGS}/F000011_11.ht/index/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.idx/.index.crc (100%) rename hail_search/fixtures/GRCh38/SV_WGS/families/{ => WGS}/F000011_11.ht/index/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.idx/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/SV_WGS/families/{ => WGS}/F000011_11.ht/index/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.idx/index (100%) rename hail_search/fixtures/GRCh38/SV_WGS/families/{ => WGS}/F000011_11.ht/index/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.idx/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/SV_WGS/families/{ => WGS}/F000011_11.ht/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/SV_WGS/families/{ => WGS}/F000011_11.ht/rows/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/SV_WGS/families/{ => WGS}/F000011_11.ht/rows/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/SV_WGS/families/{ => WGS}/F000011_11.ht/rows/parts/.part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.crc (100%) rename hail_search/fixtures/GRCh38/SV_WGS/families/{ => WGS}/F000011_11.ht/rows/parts/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003 (100%) rename hail_search/fixtures/GRCh38/SV_WGS/projects/{ => WGS}/R0003_test.ht/.README.txt.crc (100%) rename hail_search/fixtures/GRCh38/SV_WGS/projects/{ => WGS}/R0003_test.ht/._SUCCESS.crc (100%) rename hail_search/fixtures/GRCh38/SV_WGS/projects/{ => WGS}/R0003_test.ht/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/SV_WGS/projects/{ => WGS}/R0003_test.ht/README.txt (100%) rename hail_search/fixtures/GRCh38/SV_WGS/projects/{ => WGS}/R0003_test.ht/_SUCCESS (100%) rename hail_search/fixtures/GRCh38/SV_WGS/projects/{ => WGS}/R0003_test.ht/globals/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/SV_WGS/projects/{ => WGS}/R0003_test.ht/globals/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/SV_WGS/projects/{ => WGS}/R0003_test.ht/globals/parts/.part-0.crc (100%) rename hail_search/fixtures/GRCh38/SV_WGS/projects/{ => WGS}/R0003_test.ht/globals/parts/part-0 (100%) rename hail_search/fixtures/GRCh38/SV_WGS/projects/{ => WGS}/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/.index.crc (100%) rename hail_search/fixtures/GRCh38/SV_WGS/projects/{ => WGS}/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/SV_WGS/projects/{ => WGS}/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/index (100%) rename hail_search/fixtures/GRCh38/SV_WGS/projects/{ => WGS}/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/SV_WGS/projects/{ => WGS}/R0003_test.ht/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/SV_WGS/projects/{ => WGS}/R0003_test.ht/rows/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/SV_WGS/projects/{ => WGS}/R0003_test.ht/rows/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/SV_WGS/projects/{ => WGS}/R0003_test.ht/rows/parts/.part-0-cbf84037-3354-427a-98a6-b953711ae5bc.crc (100%) rename hail_search/fixtures/GRCh38/SV_WGS/projects/{ => WGS}/R0003_test.ht/rows/parts/part-0-cbf84037-3354-427a-98a6-b953711ae5bc (100%) diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/.README.txt.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/.README.txt.crc similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/.README.txt.crc rename to hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/.README.txt.crc diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/._SUCCESS.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/._SUCCESS.crc similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/._SUCCESS.crc rename to hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/._SUCCESS.crc diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/README.txt b/hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/README.txt similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/README.txt rename to hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/README.txt diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/_SUCCESS b/hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/_SUCCESS similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/_SUCCESS rename to hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/_SUCCESS diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/globals/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/globals/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/globals/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/globals/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/globals/metadata.json.gz rename to hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/globals/metadata.json.gz diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/globals/parts/.part-0.crc similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/globals/parts/.part-0.crc rename to hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/globals/parts/.part-0.crc diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/globals/parts/part-0 similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/globals/parts/part-0 rename to hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/globals/parts/part-0 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/index/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.idx/.index.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/index/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.idx/.index.crc similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/index/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.idx/.index.crc rename to hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/index/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.idx/.index.crc diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/index/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/index/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.idx/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/index/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.idx/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/index/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.idx/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/index/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.idx/index b/hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/index/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.idx/index similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/index/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.idx/index rename to hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/index/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.idx/index diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/index/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.idx/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/index/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.idx/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/index/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.idx/metadata.json.gz rename to hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/index/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.idx/metadata.json.gz diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/metadata.json.gz rename to hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/metadata.json.gz diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/rows/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/rows/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/rows/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/rows/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/rows/metadata.json.gz rename to hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/rows/metadata.json.gz diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/rows/parts/.part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/rows/parts/.part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.crc similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/rows/parts/.part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.crc rename to hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/rows/parts/.part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce.crc diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/rows/parts/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce b/hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/rows/parts/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/families/F000002_2.ht/rows/parts/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce rename to hail_search/fixtures/GRCh37/SNV_INDEL/families/WES/F000002_2.ht/rows/parts/part-0-061e05c1-cd1f-474b-b8f9-472cb004e4ce diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/.README.txt.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/.README.txt.crc similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/.README.txt.crc rename to hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/.README.txt.crc diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/._SUCCESS.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/._SUCCESS.crc similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/._SUCCESS.crc rename to hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/._SUCCESS.crc diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/README.txt b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/README.txt similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/README.txt rename to hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/README.txt diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/_SUCCESS b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/_SUCCESS similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/_SUCCESS rename to hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/_SUCCESS diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/globals/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/globals/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/globals/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/globals/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/globals/metadata.json.gz rename to hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/globals/metadata.json.gz diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/globals/parts/.part-0.crc similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/globals/parts/.part-0.crc rename to hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/globals/parts/.part-0.crc diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/globals/parts/part-0 similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/globals/parts/part-0 rename to hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/globals/parts/part-0 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/index/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.idx/.index.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/index/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.idx/.index.crc similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/index/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.idx/.index.crc rename to hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/index/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.idx/.index.crc diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/index/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/index/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.idx/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/index/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.idx/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/index/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.idx/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/index/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.idx/index b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/index/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.idx/index similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/index/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.idx/index rename to hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/index/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.idx/index diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/index/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.idx/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/index/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.idx/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/index/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.idx/metadata.json.gz rename to hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/index/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.idx/metadata.json.gz diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/metadata.json.gz rename to hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/metadata.json.gz diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/rows/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/rows/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/rows/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/rows/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/rows/metadata.json.gz rename to hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/rows/metadata.json.gz diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/rows/parts/.part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/rows/parts/.part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.crc similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/rows/parts/.part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.crc rename to hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/rows/parts/.part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f.crc diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/rows/parts/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/rows/parts/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/projects/R0001_1kg.ht/rows/parts/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f rename to hail_search/fixtures/GRCh37/SNV_INDEL/projects/WES/R0001_1kg.ht/rows/parts/part-0-cb4462be-ab9b-4fa8-bc1f-ea8fab6fdf0f diff --git a/hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/.README.txt.crc similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/.README.txt.crc rename to hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/.README.txt.crc diff --git a/hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/._SUCCESS.crc b/hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/._SUCCESS.crc similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/._SUCCESS.crc rename to hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/._SUCCESS.crc diff --git a/hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/README.txt b/hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/README.txt similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/README.txt rename to hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/README.txt diff --git a/hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/_SUCCESS b/hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/_SUCCESS similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/_SUCCESS rename to hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/_SUCCESS diff --git a/hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/globals/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/globals/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/globals/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/globals/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/globals/metadata.json.gz rename to hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/globals/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/globals/parts/.part-0.crc similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/globals/parts/.part-0.crc rename to hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/globals/parts/.part-0.crc diff --git a/hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/globals/parts/part-0 similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/globals/parts/part-0 rename to hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/globals/parts/part-0 diff --git a/hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/index/part-0-1db7379b-e75c-4ed2-b79b-28ffb9b115e8.idx/.index.crc b/hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/index/part-0-1db7379b-e75c-4ed2-b79b-28ffb9b115e8.idx/.index.crc similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/index/part-0-1db7379b-e75c-4ed2-b79b-28ffb9b115e8.idx/.index.crc rename to hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/index/part-0-1db7379b-e75c-4ed2-b79b-28ffb9b115e8.idx/.index.crc diff --git a/hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/index/part-0-1db7379b-e75c-4ed2-b79b-28ffb9b115e8.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/index/part-0-1db7379b-e75c-4ed2-b79b-28ffb9b115e8.idx/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/index/part-0-1db7379b-e75c-4ed2-b79b-28ffb9b115e8.idx/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/index/part-0-1db7379b-e75c-4ed2-b79b-28ffb9b115e8.idx/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/index/part-0-1db7379b-e75c-4ed2-b79b-28ffb9b115e8.idx/index b/hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/index/part-0-1db7379b-e75c-4ed2-b79b-28ffb9b115e8.idx/index similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/index/part-0-1db7379b-e75c-4ed2-b79b-28ffb9b115e8.idx/index rename to hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/index/part-0-1db7379b-e75c-4ed2-b79b-28ffb9b115e8.idx/index diff --git a/hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/index/part-0-1db7379b-e75c-4ed2-b79b-28ffb9b115e8.idx/metadata.json.gz b/hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/index/part-0-1db7379b-e75c-4ed2-b79b-28ffb9b115e8.idx/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/index/part-0-1db7379b-e75c-4ed2-b79b-28ffb9b115e8.idx/metadata.json.gz rename to hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/index/part-0-1db7379b-e75c-4ed2-b79b-28ffb9b115e8.idx/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/metadata.json.gz b/hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/metadata.json.gz rename to hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/rows/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/rows/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/rows/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/rows/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/rows/metadata.json.gz rename to hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/rows/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/rows/parts/.part-0-1db7379b-e75c-4ed2-b79b-28ffb9b115e8.crc b/hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/rows/parts/.part-0-1db7379b-e75c-4ed2-b79b-28ffb9b115e8.crc similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/rows/parts/.part-0-1db7379b-e75c-4ed2-b79b-28ffb9b115e8.crc rename to hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/rows/parts/.part-0-1db7379b-e75c-4ed2-b79b-28ffb9b115e8.crc diff --git a/hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/rows/parts/part-0-1db7379b-e75c-4ed2-b79b-28ffb9b115e8 b/hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/rows/parts/part-0-1db7379b-e75c-4ed2-b79b-28ffb9b115e8 similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/families/F000002_2.ht/rows/parts/part-0-1db7379b-e75c-4ed2-b79b-28ffb9b115e8 rename to hail_search/fixtures/GRCh38/MITO/families/WES/F000002_2.ht/rows/parts/part-0-1db7379b-e75c-4ed2-b79b-28ffb9b115e8 diff --git a/hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/.README.txt.crc similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/.README.txt.crc rename to hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/.README.txt.crc diff --git a/hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/._SUCCESS.crc b/hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/._SUCCESS.crc similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/._SUCCESS.crc rename to hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/._SUCCESS.crc diff --git a/hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/README.txt b/hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/README.txt similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/README.txt rename to hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/README.txt diff --git a/hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/_SUCCESS b/hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/_SUCCESS similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/_SUCCESS rename to hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/_SUCCESS diff --git a/hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/globals/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/globals/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/globals/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/globals/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/globals/metadata.json.gz rename to hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/globals/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/globals/parts/.part-0.crc similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/globals/parts/.part-0.crc rename to hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/globals/parts/.part-0.crc diff --git a/hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/globals/parts/part-0 similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/globals/parts/part-0 rename to hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/globals/parts/part-0 diff --git a/hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/index/part-0-9a202b26-a04d-4337-9aa5-bbab41b4bc06.idx/.index.crc b/hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/index/part-0-9a202b26-a04d-4337-9aa5-bbab41b4bc06.idx/.index.crc similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/index/part-0-9a202b26-a04d-4337-9aa5-bbab41b4bc06.idx/.index.crc rename to hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/index/part-0-9a202b26-a04d-4337-9aa5-bbab41b4bc06.idx/.index.crc diff --git a/hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/index/part-0-9a202b26-a04d-4337-9aa5-bbab41b4bc06.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/index/part-0-9a202b26-a04d-4337-9aa5-bbab41b4bc06.idx/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/index/part-0-9a202b26-a04d-4337-9aa5-bbab41b4bc06.idx/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/index/part-0-9a202b26-a04d-4337-9aa5-bbab41b4bc06.idx/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/index/part-0-9a202b26-a04d-4337-9aa5-bbab41b4bc06.idx/index b/hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/index/part-0-9a202b26-a04d-4337-9aa5-bbab41b4bc06.idx/index similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/index/part-0-9a202b26-a04d-4337-9aa5-bbab41b4bc06.idx/index rename to hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/index/part-0-9a202b26-a04d-4337-9aa5-bbab41b4bc06.idx/index diff --git a/hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/index/part-0-9a202b26-a04d-4337-9aa5-bbab41b4bc06.idx/metadata.json.gz b/hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/index/part-0-9a202b26-a04d-4337-9aa5-bbab41b4bc06.idx/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/index/part-0-9a202b26-a04d-4337-9aa5-bbab41b4bc06.idx/metadata.json.gz rename to hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/index/part-0-9a202b26-a04d-4337-9aa5-bbab41b4bc06.idx/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/metadata.json.gz b/hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/metadata.json.gz rename to hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/rows/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/rows/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/rows/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/rows/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/rows/metadata.json.gz rename to hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/rows/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/rows/parts/.part-0-9a202b26-a04d-4337-9aa5-bbab41b4bc06.crc b/hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/rows/parts/.part-0-9a202b26-a04d-4337-9aa5-bbab41b4bc06.crc similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/rows/parts/.part-0-9a202b26-a04d-4337-9aa5-bbab41b4bc06.crc rename to hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/rows/parts/.part-0-9a202b26-a04d-4337-9aa5-bbab41b4bc06.crc diff --git a/hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/rows/parts/part-0-9a202b26-a04d-4337-9aa5-bbab41b4bc06 b/hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/rows/parts/part-0-9a202b26-a04d-4337-9aa5-bbab41b4bc06 similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/projects/R0001_1kg.ht/rows/parts/part-0-9a202b26-a04d-4337-9aa5-bbab41b4bc06 rename to hail_search/fixtures/GRCh38/MITO/projects/WES/R0001_1kg.ht/rows/parts/part-0-9a202b26-a04d-4337-9aa5-bbab41b4bc06 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/.README.txt.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/.README.txt.crc rename to hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/.README.txt.crc diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/._SUCCESS.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/._SUCCESS.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/._SUCCESS.crc rename to hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/._SUCCESS.crc diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/README.txt b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/README.txt similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/README.txt rename to hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/README.txt diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/_SUCCESS b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/_SUCCESS similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/_SUCCESS rename to hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/_SUCCESS diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/globals/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/globals/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/globals/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/globals/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/globals/metadata.json.gz rename to hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/globals/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/globals/parts/.part-0.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/globals/parts/.part-0.crc rename to hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/globals/parts/.part-0.crc diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/globals/parts/part-0 similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/globals/parts/part-0 rename to hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/globals/parts/part-0 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/index/part-0-5b60e665-6a2b-43ec-b282-1003ad80e87c.idx/.index.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/index/part-0-5b60e665-6a2b-43ec-b282-1003ad80e87c.idx/.index.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/index/part-0-5b60e665-6a2b-43ec-b282-1003ad80e87c.idx/.index.crc rename to hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/index/part-0-5b60e665-6a2b-43ec-b282-1003ad80e87c.idx/.index.crc diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/index/part-0-5b60e665-6a2b-43ec-b282-1003ad80e87c.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/index/part-0-5b60e665-6a2b-43ec-b282-1003ad80e87c.idx/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/index/part-0-5b60e665-6a2b-43ec-b282-1003ad80e87c.idx/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/index/part-0-5b60e665-6a2b-43ec-b282-1003ad80e87c.idx/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/index/part-0-5b60e665-6a2b-43ec-b282-1003ad80e87c.idx/index b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/index/part-0-5b60e665-6a2b-43ec-b282-1003ad80e87c.idx/index similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/index/part-0-5b60e665-6a2b-43ec-b282-1003ad80e87c.idx/index rename to hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/index/part-0-5b60e665-6a2b-43ec-b282-1003ad80e87c.idx/index diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/index/part-0-5b60e665-6a2b-43ec-b282-1003ad80e87c.idx/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/index/part-0-5b60e665-6a2b-43ec-b282-1003ad80e87c.idx/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/index/part-0-5b60e665-6a2b-43ec-b282-1003ad80e87c.idx/metadata.json.gz rename to hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/index/part-0-5b60e665-6a2b-43ec-b282-1003ad80e87c.idx/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/metadata.json.gz rename to hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/rows/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/rows/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/rows/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/rows/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/rows/metadata.json.gz rename to hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/rows/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/rows/parts/.part-0-5b60e665-6a2b-43ec-b282-1003ad80e87c.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/rows/parts/.part-0-5b60e665-6a2b-43ec-b282-1003ad80e87c.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/rows/parts/.part-0-5b60e665-6a2b-43ec-b282-1003ad80e87c.crc rename to hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/rows/parts/.part-0-5b60e665-6a2b-43ec-b282-1003ad80e87c.crc diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/rows/parts/part-0-5b60e665-6a2b-43ec-b282-1003ad80e87c b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/rows/parts/part-0-5b60e665-6a2b-43ec-b282-1003ad80e87c similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/families/F000002_2.ht/rows/parts/part-0-5b60e665-6a2b-43ec-b282-1003ad80e87c rename to hail_search/fixtures/GRCh38/SNV_INDEL/families/WES/F000002_2.ht/rows/parts/part-0-5b60e665-6a2b-43ec-b282-1003ad80e87c diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/.README.txt.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/.README.txt.crc rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/.README.txt.crc diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/._SUCCESS.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/._SUCCESS.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/._SUCCESS.crc rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/._SUCCESS.crc diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/README.txt b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/README.txt similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/README.txt rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/README.txt diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/_SUCCESS b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/_SUCCESS similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/_SUCCESS rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/_SUCCESS diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/globals/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/globals/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/globals/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/globals/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/globals/metadata.json.gz rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/globals/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/globals/parts/.part-0.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/globals/parts/.part-0.crc rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/globals/parts/.part-0.crc diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/globals/parts/part-0 similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/globals/parts/part-0 rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/globals/parts/part-0 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/index/part-0-ad3760b2-5a76-4b94-9268-9673bf62e956.idx/.index.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/index/part-0-ad3760b2-5a76-4b94-9268-9673bf62e956.idx/.index.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/index/part-0-ad3760b2-5a76-4b94-9268-9673bf62e956.idx/.index.crc rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/index/part-0-ad3760b2-5a76-4b94-9268-9673bf62e956.idx/.index.crc diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/index/part-0-ad3760b2-5a76-4b94-9268-9673bf62e956.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/index/part-0-ad3760b2-5a76-4b94-9268-9673bf62e956.idx/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/index/part-0-ad3760b2-5a76-4b94-9268-9673bf62e956.idx/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/index/part-0-ad3760b2-5a76-4b94-9268-9673bf62e956.idx/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/index/part-0-ad3760b2-5a76-4b94-9268-9673bf62e956.idx/index b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/index/part-0-ad3760b2-5a76-4b94-9268-9673bf62e956.idx/index similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/index/part-0-ad3760b2-5a76-4b94-9268-9673bf62e956.idx/index rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/index/part-0-ad3760b2-5a76-4b94-9268-9673bf62e956.idx/index diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/index/part-0-ad3760b2-5a76-4b94-9268-9673bf62e956.idx/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/index/part-0-ad3760b2-5a76-4b94-9268-9673bf62e956.idx/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/index/part-0-ad3760b2-5a76-4b94-9268-9673bf62e956.idx/metadata.json.gz rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/index/part-0-ad3760b2-5a76-4b94-9268-9673bf62e956.idx/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/metadata.json.gz rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/rows/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/rows/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/rows/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/rows/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/rows/metadata.json.gz rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/rows/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/rows/parts/.part-0-ad3760b2-5a76-4b94-9268-9673bf62e956.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/rows/parts/.part-0-ad3760b2-5a76-4b94-9268-9673bf62e956.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/rows/parts/.part-0-ad3760b2-5a76-4b94-9268-9673bf62e956.crc rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/rows/parts/.part-0-ad3760b2-5a76-4b94-9268-9673bf62e956.crc diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/rows/parts/part-0-ad3760b2-5a76-4b94-9268-9673bf62e956 b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/rows/parts/part-0-ad3760b2-5a76-4b94-9268-9673bf62e956 similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0001_1kg.ht/rows/parts/part-0-ad3760b2-5a76-4b94-9268-9673bf62e956 rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0001_1kg.ht/rows/parts/part-0-ad3760b2-5a76-4b94-9268-9673bf62e956 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/.README.txt.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/.README.txt.crc rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/.README.txt.crc diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/._SUCCESS.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/._SUCCESS.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/._SUCCESS.crc rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/._SUCCESS.crc diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/README.txt b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/README.txt similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/README.txt rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/README.txt diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/_SUCCESS b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/_SUCCESS similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/_SUCCESS rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/_SUCCESS diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/globals/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/globals/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/globals/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/globals/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/globals/metadata.json.gz rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/globals/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/globals/parts/.part-0.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/globals/parts/.part-0.crc rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/globals/parts/.part-0.crc diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/globals/parts/part-0 similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/globals/parts/part-0 rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/globals/parts/part-0 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/index/part-0-28a643dd-8eb0-4510-8718-6e98b4f4274d.idx/.index.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/index/part-0-28a643dd-8eb0-4510-8718-6e98b4f4274d.idx/.index.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/index/part-0-28a643dd-8eb0-4510-8718-6e98b4f4274d.idx/.index.crc rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/index/part-0-28a643dd-8eb0-4510-8718-6e98b4f4274d.idx/.index.crc diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/index/part-0-28a643dd-8eb0-4510-8718-6e98b4f4274d.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/index/part-0-28a643dd-8eb0-4510-8718-6e98b4f4274d.idx/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/index/part-0-28a643dd-8eb0-4510-8718-6e98b4f4274d.idx/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/index/part-0-28a643dd-8eb0-4510-8718-6e98b4f4274d.idx/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/index/part-0-28a643dd-8eb0-4510-8718-6e98b4f4274d.idx/index b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/index/part-0-28a643dd-8eb0-4510-8718-6e98b4f4274d.idx/index similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/index/part-0-28a643dd-8eb0-4510-8718-6e98b4f4274d.idx/index rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/index/part-0-28a643dd-8eb0-4510-8718-6e98b4f4274d.idx/index diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/index/part-0-28a643dd-8eb0-4510-8718-6e98b4f4274d.idx/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/index/part-0-28a643dd-8eb0-4510-8718-6e98b4f4274d.idx/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/index/part-0-28a643dd-8eb0-4510-8718-6e98b4f4274d.idx/metadata.json.gz rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/index/part-0-28a643dd-8eb0-4510-8718-6e98b4f4274d.idx/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/metadata.json.gz rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/rows/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/rows/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/rows/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/rows/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/rows/metadata.json.gz rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/rows/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/rows/parts/.part-0-28a643dd-8eb0-4510-8718-6e98b4f4274d.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/rows/parts/.part-0-28a643dd-8eb0-4510-8718-6e98b4f4274d.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/rows/parts/.part-0-28a643dd-8eb0-4510-8718-6e98b4f4274d.crc rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/rows/parts/.part-0-28a643dd-8eb0-4510-8718-6e98b4f4274d.crc diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/rows/parts/part-0-28a643dd-8eb0-4510-8718-6e98b4f4274d b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/rows/parts/part-0-28a643dd-8eb0-4510-8718-6e98b4f4274d similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/projects/R0003_test.ht/rows/parts/part-0-28a643dd-8eb0-4510-8718-6e98b4f4274d rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WGS/R0003_test.ht/rows/parts/part-0-28a643dd-8eb0-4510-8718-6e98b4f4274d diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/.README.txt.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/.README.txt.crc rename to hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/.README.txt.crc diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/._SUCCESS.crc b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/._SUCCESS.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/._SUCCESS.crc rename to hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/._SUCCESS.crc diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/README.txt b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/README.txt similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/README.txt rename to hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/README.txt diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/_SUCCESS b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/_SUCCESS similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/_SUCCESS rename to hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/_SUCCESS diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/globals/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/globals/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/globals/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/globals/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/globals/metadata.json.gz rename to hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/globals/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/globals/parts/.part-0.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/globals/parts/.part-0.crc rename to hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/globals/parts/.part-0.crc diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/globals/parts/part-0 similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/globals/parts/part-0 rename to hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/globals/parts/part-0 diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/.index.crc b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/.index.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/.index.crc rename to hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/.index.crc diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/index b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/index similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/index rename to hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/index diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/metadata.json.gz rename to hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/metadata.json.gz rename to hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/rows/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/rows/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/rows/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/rows/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/rows/metadata.json.gz rename to hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/rows/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/rows/parts/.part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.crc b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/rows/parts/.part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/rows/parts/.part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.crc rename to hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/rows/parts/.part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.crc diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/rows/parts/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48 b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/rows/parts/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48 similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/rows/parts/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48 rename to hail_search/fixtures/GRCh38/SV_WES/families/WES/F000002_2.ht/rows/parts/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48 diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/.README.txt.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/.README.txt.crc rename to hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/.README.txt.crc diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/._SUCCESS.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/._SUCCESS.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/._SUCCESS.crc rename to hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/._SUCCESS.crc diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/README.txt b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/README.txt similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/README.txt rename to hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/README.txt diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/_SUCCESS b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/_SUCCESS similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/_SUCCESS rename to hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/_SUCCESS diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/globals/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/globals/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/globals/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/globals/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/globals/metadata.json.gz rename to hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/globals/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/globals/parts/.part-0.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/globals/parts/.part-0.crc rename to hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/globals/parts/.part-0.crc diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/globals/parts/part-0 similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/globals/parts/part-0 rename to hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/globals/parts/part-0 diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-4bb6b390-07db-405c-abad-c57b5aa95da0.idx/.index.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/index/part-0-4bb6b390-07db-405c-abad-c57b5aa95da0.idx/.index.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-4bb6b390-07db-405c-abad-c57b5aa95da0.idx/.index.crc rename to hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/index/part-0-4bb6b390-07db-405c-abad-c57b5aa95da0.idx/.index.crc diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-4bb6b390-07db-405c-abad-c57b5aa95da0.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/index/part-0-4bb6b390-07db-405c-abad-c57b5aa95da0.idx/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-4bb6b390-07db-405c-abad-c57b5aa95da0.idx/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/index/part-0-4bb6b390-07db-405c-abad-c57b5aa95da0.idx/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-4bb6b390-07db-405c-abad-c57b5aa95da0.idx/index b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/index/part-0-4bb6b390-07db-405c-abad-c57b5aa95da0.idx/index similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-4bb6b390-07db-405c-abad-c57b5aa95da0.idx/index rename to hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/index/part-0-4bb6b390-07db-405c-abad-c57b5aa95da0.idx/index diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-4bb6b390-07db-405c-abad-c57b5aa95da0.idx/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/index/part-0-4bb6b390-07db-405c-abad-c57b5aa95da0.idx/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-4bb6b390-07db-405c-abad-c57b5aa95da0.idx/metadata.json.gz rename to hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/index/part-0-4bb6b390-07db-405c-abad-c57b5aa95da0.idx/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/metadata.json.gz rename to hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/rows/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/rows/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/rows/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/metadata.json.gz rename to hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/rows/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/parts/.part-0-4bb6b390-07db-405c-abad-c57b5aa95da0.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/rows/parts/.part-0-4bb6b390-07db-405c-abad-c57b5aa95da0.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/parts/.part-0-4bb6b390-07db-405c-abad-c57b5aa95da0.crc rename to hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/rows/parts/.part-0-4bb6b390-07db-405c-abad-c57b5aa95da0.crc diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/parts/part-0-4bb6b390-07db-405c-abad-c57b5aa95da0 b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/rows/parts/part-0-4bb6b390-07db-405c-abad-c57b5aa95da0 similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/parts/part-0-4bb6b390-07db-405c-abad-c57b5aa95da0 rename to hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0001_1kg.ht/rows/parts/part-0-4bb6b390-07db-405c-abad-c57b5aa95da0 diff --git a/hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/.README.txt.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/.README.txt.crc rename to hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/.README.txt.crc diff --git a/hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/._SUCCESS.crc b/hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/._SUCCESS.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/._SUCCESS.crc rename to hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/._SUCCESS.crc diff --git a/hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/README.txt b/hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/README.txt similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/README.txt rename to hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/README.txt diff --git a/hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/_SUCCESS b/hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/_SUCCESS similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/_SUCCESS rename to hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/_SUCCESS diff --git a/hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/globals/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/globals/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/globals/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/globals/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/globals/metadata.json.gz rename to hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/globals/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/globals/parts/.part-0.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/globals/parts/.part-0.crc rename to hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/globals/parts/.part-0.crc diff --git a/hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/globals/parts/part-0 similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/globals/parts/part-0 rename to hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/globals/parts/part-0 diff --git a/hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/index/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.idx/.index.crc b/hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/index/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.idx/.index.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/index/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.idx/.index.crc rename to hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/index/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.idx/.index.crc diff --git a/hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/index/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/index/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.idx/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/index/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.idx/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/index/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.idx/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/index/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.idx/index b/hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/index/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.idx/index similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/index/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.idx/index rename to hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/index/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.idx/index diff --git a/hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/index/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.idx/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/index/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.idx/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/index/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.idx/metadata.json.gz rename to hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/index/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.idx/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/metadata.json.gz rename to hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/rows/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/rows/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/rows/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/rows/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/rows/metadata.json.gz rename to hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/rows/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/rows/parts/.part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.crc b/hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/rows/parts/.part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/rows/parts/.part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.crc rename to hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/rows/parts/.part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.crc diff --git a/hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/rows/parts/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003 b/hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/rows/parts/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003 similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/families/F000011_11.ht/rows/parts/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003 rename to hail_search/fixtures/GRCh38/SV_WGS/families/WGS/F000011_11.ht/rows/parts/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003 diff --git a/hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/.README.txt.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/.README.txt.crc rename to hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/.README.txt.crc diff --git a/hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/._SUCCESS.crc b/hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/._SUCCESS.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/._SUCCESS.crc rename to hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/._SUCCESS.crc diff --git a/hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/README.txt b/hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/README.txt similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/README.txt rename to hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/README.txt diff --git a/hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/_SUCCESS b/hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/_SUCCESS similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/_SUCCESS rename to hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/_SUCCESS diff --git a/hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/globals/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/globals/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/globals/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/globals/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/globals/metadata.json.gz rename to hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/globals/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/globals/parts/.part-0.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/globals/parts/.part-0.crc rename to hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/globals/parts/.part-0.crc diff --git a/hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/globals/parts/part-0 similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/globals/parts/part-0 rename to hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/globals/parts/part-0 diff --git a/hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/.index.crc b/hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/.index.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/.index.crc rename to hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/.index.crc diff --git a/hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/index b/hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/index similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/index rename to hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/index diff --git a/hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/metadata.json.gz rename to hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/metadata.json.gz rename to hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/rows/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/rows/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/rows/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/rows/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/rows/metadata.json.gz rename to hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/rows/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/rows/parts/.part-0-cbf84037-3354-427a-98a6-b953711ae5bc.crc b/hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/rows/parts/.part-0-cbf84037-3354-427a-98a6-b953711ae5bc.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/rows/parts/.part-0-cbf84037-3354-427a-98a6-b953711ae5bc.crc rename to hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/rows/parts/.part-0-cbf84037-3354-427a-98a6-b953711ae5bc.crc diff --git a/hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/rows/parts/part-0-cbf84037-3354-427a-98a6-b953711ae5bc b/hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/rows/parts/part-0-cbf84037-3354-427a-98a6-b953711ae5bc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WGS/projects/R0003_test.ht/rows/parts/part-0-cbf84037-3354-427a-98a6-b953711ae5bc rename to hail_search/fixtures/GRCh38/SV_WGS/projects/WGS/R0003_test.ht/rows/parts/part-0-cbf84037-3354-427a-98a6-b953711ae5bc diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index fb8565dcde..99ca936ac6 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -296,7 +296,8 @@ def _parse_sample_data(self, sample_data): def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, n_partitions=MAX_PARTITIONS, **kwargs): if len(project_samples) == 1: project_guid = list(project_samples.keys())[0] - project_ht = self._read_table(f'projects/{project_guid}.ht', use_ssd_dir=True) + sample_type = list(project_samples[project_guid].values())[0][0]['sample_type'] + project_ht = self._read_table(f'projects/{sample_type}/{project_guid}.ht', use_ssd_dir=True) return self._filter_entries_table(project_ht, project_samples[project_guid], **kwargs) # Need to chunk tables or else evaluating table globals throws LineTooLong exception @@ -308,8 +309,9 @@ def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, n_ project_hts = [] sample_data = {} for project_guid, project_sample_data in project_samples.items(): + sample_type = list(project_sample_data.values())[0][0]['sample_type'] project_ht = self._read_table( - f'projects/{project_guid}.ht', + f'projects/{sample_type}/{project_guid}.ht', use_ssd_dir=True, skip_missing_field='family_entries' if skip_all_missing else None, ) @@ -338,7 +340,8 @@ def import_filtered_table(self, project_samples, num_families, intervals=None, * if num_families == 1: family_sample_data = list(project_samples.values())[0] family_guid = list(family_sample_data.keys())[0] - family_ht = self._read_table(f'families/{family_guid}.ht', use_ssd_dir=True) + sample_type = family_sample_data[family_guid][0]['sample_type'] + family_ht = self._read_table(f'families/{sample_type}/{family_guid}.ht', use_ssd_dir=True) family_ht = family_ht.transmute(family_entries=[family_ht.entries]) family_ht = family_ht.annotate_globals( family_guids=[family_guid], family_samples={family_guid: family_ht.sample_ids}, diff --git a/hail_search/test_search.py b/hail_search/test_search.py index d42883b674..5363a6c115 100644 --- a/hail_search/test_search.py +++ b/hail_search/test_search.py @@ -254,7 +254,7 @@ async def test_single_family_search(self): ) await self._assert_expected_search( - [GCNV_VARIANT1, GCNV_VARIANT2, GCNV_VARIANT3, GCNV_VARIANT4], omit_sample_type='SNV_INDEL', gene_counts=GCNV_GENE_COUNTS, + [GCNV_VARIANT1, GCNV_VARIANT2, GCNV_VARIANT3, GCNV_VARIANT4], omit_data_type='SNV_INDEL', gene_counts=GCNV_GENE_COUNTS, ) await self._assert_expected_search( @@ -363,7 +363,7 @@ async def test_single_project_search(self): 'ENSG00000277258': {'total': 1, 'families': {'F000002_2': 1}}, } await self._assert_expected_search( - [VARIANT1, VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], omit_sample_type='SV_WES', gene_counts=variant_gene_counts, + [VARIANT1, VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], omit_data_type='SV_WES', gene_counts=variant_gene_counts, ) await self._assert_expected_search( @@ -411,7 +411,7 @@ async def test_inheritance_filter(self): ) await self._assert_expected_search( - [GCNV_VARIANT3], inheritance_mode=inheritance_mode, annotations=NEW_SV_FILTER, omit_sample_type='SNV_INDEL', + [GCNV_VARIANT3], inheritance_mode=inheritance_mode, annotations=NEW_SV_FILTER, omit_data_type='SNV_INDEL', ) await self._assert_expected_search( @@ -455,7 +455,7 @@ async def test_inheritance_filter(self): ) await self._assert_expected_search( - [[GCNV_VARIANT3, GCNV_VARIANT4]], inheritance_mode=inheritance_mode, omit_sample_type='SNV_INDEL', gene_counts={ + [[GCNV_VARIANT3, GCNV_VARIANT4]], inheritance_mode=inheritance_mode, omit_data_type='SNV_INDEL', gene_counts={ 'ENSG00000275023': {'total': 2, 'families': {'F000002_2': 2}}, 'ENSG00000277258': {'total': 1, 'families': {'F000002_2': 1}}, 'ENSG00000277972': {'total': 1, 'families': {'F000002_2': 1}}, @@ -497,7 +497,7 @@ async def test_inheritance_filter(self): ) await self._assert_expected_search( - [GCNV_VARIANT3, [GCNV_VARIANT3, GCNV_VARIANT4]], inheritance_mode=inheritance_mode, omit_sample_type='SNV_INDEL', gene_counts={ + [GCNV_VARIANT3, [GCNV_VARIANT3, GCNV_VARIANT4]], inheritance_mode=inheritance_mode, omit_data_type='SNV_INDEL', gene_counts={ 'ENSG00000275023': {'total': 3, 'families': {'F000002_2': 3}}, 'ENSG00000277258': {'total': 1, 'families': {'F000002_2': 1}}, 'ENSG00000277972': {'total': 1, 'families': {'F000002_2': 1}}, @@ -545,7 +545,7 @@ async def test_quality_filter(self): ) await self._assert_expected_search( - [], annotations=NEW_SV_FILTER, quality_filter=gcnv_quality_filter, omit_sample_type='SNV_INDEL', + [], annotations=NEW_SV_FILTER, quality_filter=gcnv_quality_filter, omit_data_type='SNV_INDEL', ) sv_quality_filter = {'min_gq_sv': 40} @@ -558,7 +558,7 @@ async def test_quality_filter(self): ) await self._assert_expected_search( - [VARIANT2, MULTI_FAMILY_VARIANT], quality_filter={'min_gq': 40, 'vcf_filter': 'pass'}, omit_sample_type='SV_WES', + [VARIANT2, MULTI_FAMILY_VARIANT], quality_filter={'min_gq': 40, 'vcf_filter': 'pass'}, omit_data_type='SV_WES', ) await self._assert_expected_search( @@ -571,34 +571,34 @@ async def test_quality_filter(self): ) await self._assert_expected_search( - [VARIANT1, VARIANT2, FAMILY_3_VARIANT], quality_filter={'min_ab': 50}, omit_sample_type='SV_WES', + [VARIANT1, VARIANT2, FAMILY_3_VARIANT], quality_filter={'min_ab': 50}, omit_data_type='SV_WES', ) await self._assert_expected_search( [VARIANT2, VARIANT3], quality_filter={'min_ab': 70, 'affected_only': True}, - omit_sample_type='SV_WES', + omit_data_type='SV_WES', ) quality_filter.update({'min_gq': 40, 'min_ab': 50}) await self._assert_expected_search( - [VARIANT2, FAMILY_3_VARIANT], quality_filter=quality_filter, omit_sample_type='SV_WES', + [VARIANT2, FAMILY_3_VARIANT], quality_filter=quality_filter, omit_data_type='SV_WES', ) annotations = {'splice_ai': '0.0'} # Ensures no variants are filtered out by annotation/path filters await self._assert_expected_search( - [VARIANT1, VARIANT2, FAMILY_3_VARIANT, MITO_VARIANT1, MITO_VARIANT3], quality_filter=quality_filter, omit_sample_type='SV_WES', + [VARIANT1, VARIANT2, FAMILY_3_VARIANT, MITO_VARIANT1, MITO_VARIANT3], quality_filter=quality_filter, omit_data_type='SV_WES', annotations=annotations, pathogenicity={'clinvar': ['likely_pathogenic', 'vus_or_conflicting']}, sample_data={**EXPECTED_SAMPLE_DATA, **FAMILY_2_MITO_SAMPLE_DATA}, ) await self._assert_expected_search( - [VARIANT2, FAMILY_3_VARIANT], quality_filter=quality_filter, omit_sample_type='SV_WES', + [VARIANT2, FAMILY_3_VARIANT], quality_filter=quality_filter, omit_data_type='SV_WES', annotations=annotations, pathogenicity={'clinvar': ['pathogenic']}, ) async def test_location_search(self): await self._assert_expected_search( - [MULTI_FAMILY_VARIANT, VARIANT4], omit_sample_type='SV_WES', **LOCATION_SEARCH, + [MULTI_FAMILY_VARIANT, VARIANT4], omit_data_type='SV_WES', **LOCATION_SEARCH, ) await self._assert_expected_search( @@ -606,7 +606,7 @@ async def test_location_search(self): sv_intervals = ['1:9310023-9380264', '17:38717636-38724781'] await self._assert_expected_search( - [GCNV_VARIANT3, GCNV_VARIANT4], intervals=sv_intervals, gene_ids=['ENSG00000275023'], omit_sample_type='SNV_INDEL', + [GCNV_VARIANT3, GCNV_VARIANT4], intervals=sv_intervals, gene_ids=['ENSG00000275023'], omit_data_type='SNV_INDEL', ) await self._assert_expected_search( @@ -619,11 +619,11 @@ async def test_location_search(self): ) await self._assert_expected_search( - [VARIANT1, VARIANT2], omit_sample_type='SV_WES', **EXCLUDE_LOCATION_SEARCH, + [VARIANT1, VARIANT2], omit_data_type='SV_WES', **EXCLUDE_LOCATION_SEARCH, ) await self._assert_expected_search( - [GCNV_VARIANT1, GCNV_VARIANT2], intervals=sv_intervals, exclude_intervals=True, omit_sample_type='SNV_INDEL', + [GCNV_VARIANT1, GCNV_VARIANT2], intervals=sv_intervals, exclude_intervals=True, omit_data_type='SNV_INDEL', ) await self._assert_expected_search( @@ -631,18 +631,18 @@ async def test_location_search(self): ) await self._assert_expected_search( - [SELECTED_TRANSCRIPT_MULTI_FAMILY_VARIANT], omit_sample_type='SV_WES', + [SELECTED_TRANSCRIPT_MULTI_FAMILY_VARIANT], omit_data_type='SV_WES', intervals=LOCATION_SEARCH['intervals'][-1:], gene_ids=LOCATION_SEARCH['gene_ids'][:1] ) await self._assert_expected_search( [GCNV_VARIANT4], padded_interval={'chrom': '17', 'start': 38720781, 'end': 38738703, 'padding': 0.2}, - omit_sample_type='SNV_INDEL', + omit_data_type='SNV_INDEL', ) await self._assert_expected_search( [], padded_interval={'chrom': '17', 'start': 38720781, 'end': 38738703, 'padding': 0.1}, - omit_sample_type='SNV_INDEL', + omit_data_type='SNV_INDEL', ) await self._assert_expected_search( @@ -661,20 +661,20 @@ async def test_location_search(self): ) async def test_variant_id_search(self): - await self._assert_expected_search([VARIANT2], omit_sample_type='SV_WES', **RSID_SEARCH) + await self._assert_expected_search([VARIANT2], omit_data_type='SV_WES', **RSID_SEARCH) - await self._assert_expected_search([VARIANT1], omit_sample_type='SV_WES', **VARIANT_ID_SEARCH) + await self._assert_expected_search([VARIANT1], omit_data_type='SV_WES', **VARIANT_ID_SEARCH) await self._assert_expected_search( - [VARIANT1], omit_sample_type='SV_WES', variant_ids=VARIANT_ID_SEARCH['variant_ids'][:1], + [VARIANT1], omit_data_type='SV_WES', variant_ids=VARIANT_ID_SEARCH['variant_ids'][:1], ) await self._assert_expected_search( - [], omit_sample_type='SV_WES', variant_ids=VARIANT_ID_SEARCH['variant_ids'][1:], + [], omit_data_type='SV_WES', variant_ids=VARIANT_ID_SEARCH['variant_ids'][1:], ) variant_keys = ['suffix_95340_DUP', 'suffix_140608_DUP'] - await self._assert_expected_search([GCNV_VARIANT1, GCNV_VARIANT4], omit_sample_type='SNV_INDEL', variant_keys=variant_keys) + await self._assert_expected_search([GCNV_VARIANT1, GCNV_VARIANT4], omit_data_type='SNV_INDEL', variant_keys=variant_keys) await self._assert_expected_search([VARIANT1, GCNV_VARIANT1, GCNV_VARIANT4], variant_keys=variant_keys, **VARIANT_ID_SEARCH) @@ -768,15 +768,15 @@ async def test_frequency_filter(self): ) await self._assert_expected_search( - [MULTI_FAMILY_VARIANT, VARIANT4], frequencies={'seqr': {'ac': 4}}, omit_sample_type='SV_WES', + [MULTI_FAMILY_VARIANT, VARIANT4], frequencies={'seqr': {'ac': 4}}, omit_data_type='SV_WES', ) await self._assert_expected_search( - [MULTI_FAMILY_VARIANT, VARIANT4], frequencies={'seqr': {'hh': 1}}, omit_sample_type='SV_WES', + [MULTI_FAMILY_VARIANT, VARIANT4], frequencies={'seqr': {'hh': 1}}, omit_data_type='SV_WES', ) await self._assert_expected_search( - [VARIANT4], frequencies={'seqr': {'ac': 4, 'hh': 0}}, omit_sample_type='SV_WES', + [VARIANT4], frequencies={'seqr': {'ac': 4, 'hh': 0}}, omit_data_type='SV_WES', ) await self._assert_expected_search( @@ -788,11 +788,11 @@ async def test_frequency_filter(self): ) await self._assert_expected_search( - [VARIANT1, VARIANT2, VARIANT4], frequencies={'gnomad_genomes': {'af': 0.05}}, omit_sample_type='SV_WES', + [VARIANT1, VARIANT2, VARIANT4], frequencies={'gnomad_genomes': {'af': 0.05}}, omit_data_type='SV_WES', ) await self._assert_expected_search( - [VARIANT2, VARIANT4], frequencies={'gnomad_genomes': {'af': 0.05, 'hh': 1}}, omit_sample_type='SV_WES', + [VARIANT2, VARIANT4], frequencies={'gnomad_genomes': {'af': 0.05, 'hh': 1}}, omit_data_type='SV_WES', ) await self._assert_expected_search( @@ -806,27 +806,27 @@ async def test_frequency_filter(self): await self._assert_expected_search( [VARIANT4], frequencies={'seqr': {'af': 0.2}, 'gnomad_genomes': {'ac': 50}}, - omit_sample_type='SV_WES', + omit_data_type='SV_WES', ) await self._assert_expected_search( [VARIANT1, VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], frequencies={'seqr': {}, 'gnomad_genomes': {'af': None}}, - omit_sample_type='SV_WES', + omit_data_type='SV_WES', ) annotations = {'splice_ai': '0.0'} # Ensures no variants are filtered out by annotation/path filters await self._assert_expected_search( - [VARIANT1, VARIANT2, VARIANT4], frequencies={'gnomad_genomes': {'af': 0.01}}, omit_sample_type='SV_WES', + [VARIANT1, VARIANT2, VARIANT4], frequencies={'gnomad_genomes': {'af': 0.01}}, omit_data_type='SV_WES', annotations=annotations, pathogenicity={'clinvar': ['pathogenic', 'likely_pathogenic', 'vus_or_conflicting']}, ) await self._assert_expected_search( - [VARIANT2, VARIANT4], frequencies={'gnomad_genomes': {'af': 0.01}}, omit_sample_type='SV_WES', + [VARIANT2, VARIANT4], frequencies={'gnomad_genomes': {'af': 0.01}}, omit_data_type='SV_WES', annotations=annotations, pathogenicity={'clinvar': ['pathogenic', 'vus_or_conflicting']}, ) async def test_annotations_filter(self): - await self._assert_expected_search([VARIANT2], pathogenicity={'hgmd': ['hgmd_other']}, omit_sample_type='SV_WES') + await self._assert_expected_search([VARIANT2], pathogenicity={'hgmd': ['hgmd_other']}, omit_data_type='SV_WES') pathogenicity = {'clinvar': ['likely_pathogenic', 'vus_or_conflicting', 'benign']} await self._assert_expected_search( @@ -878,7 +878,7 @@ async def test_annotations_filter(self): await self._assert_expected_search( [SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_2, SELECTED_TRANSCRIPT_MULTI_FAMILY_VARIANT], - gene_ids=LOCATION_SEARCH['gene_ids'][:1], annotations=annotations, omit_sample_type='SV_WES', + gene_ids=LOCATION_SEARCH['gene_ids'][:1], annotations=annotations, omit_data_type='SV_WES', ) annotations['other'] = annotations['other'][:1] @@ -904,17 +904,17 @@ async def test_secondary_annotations_filter(self): annotations_2 = {'other': ['intron_variant']} await self._assert_expected_search( - [[VARIANT3, SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_4]], inheritance_mode='compound_het', omit_sample_type='SV_WES', + [[VARIANT3, SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_4]], inheritance_mode='compound_het', omit_data_type='SV_WES', annotations=annotations_1, annotations_secondary=annotations_2, ) await self._assert_expected_search( - [VARIANT2, [VARIANT3, SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_4]], inheritance_mode='recessive', omit_sample_type='SV_WES', + [VARIANT2, [VARIANT3, SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_4]], inheritance_mode='recessive', omit_data_type='SV_WES', annotations=annotations_1, annotations_secondary=annotations_2, ) await self._assert_expected_search( - [[VARIANT3, SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_4]], inheritance_mode='recessive', omit_sample_type='SV_WES', + [[VARIANT3, SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_4]], inheritance_mode='recessive', omit_data_type='SV_WES', annotations=annotations_2, annotations_secondary=annotations_1, ) @@ -922,24 +922,24 @@ async def test_secondary_annotations_filter(self): gcnv_annotations_2 = {'structural_consequence': ['LOF'], 'structural': []} await self._assert_expected_search( - [[GCNV_VARIANT3, GCNV_VARIANT4]], omit_sample_type='SNV_INDEL', inheritance_mode='compound_het', + [[GCNV_VARIANT3, GCNV_VARIANT4]], omit_data_type='SNV_INDEL', inheritance_mode='compound_het', annotations=gcnv_annotations_1, annotations_secondary=gcnv_annotations_2, ) await self._assert_expected_search( - [GCNV_VARIANT3, [GCNV_VARIANT3, GCNV_VARIANT4]], omit_sample_type='SNV_INDEL', inheritance_mode='recessive', + [GCNV_VARIANT3, [GCNV_VARIANT3, GCNV_VARIANT4]], omit_data_type='SNV_INDEL', inheritance_mode='recessive', annotations=gcnv_annotations_2, annotations_secondary=gcnv_annotations_1, ) # Do not return pairs where annotations match in a non-paired gene gcnv_annotations_no_pair = {'structural_consequence': ['COPY_GAIN']} await self._assert_expected_search( - [], omit_sample_type='SNV_INDEL', inheritance_mode='compound_het', + [], omit_data_type='SNV_INDEL', inheritance_mode='compound_het', annotations=gcnv_annotations_1, annotations_secondary=gcnv_annotations_no_pair, ) await self._assert_expected_search( - [], omit_sample_type='SNV_INDEL', inheritance_mode='compound_het', + [], omit_data_type='SNV_INDEL', inheritance_mode='compound_het', annotations={**gcnv_annotations_1, **gcnv_annotations_no_pair}, ) @@ -969,7 +969,7 @@ async def test_secondary_annotations_filter(self): pathogenicity = {'clinvar': ['likely_pathogenic', 'vus_or_conflicting']} await self._assert_expected_search( - [VARIANT2, [VARIANT3, SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_4]], inheritance_mode='recessive', omit_sample_type='SV_WES', + [VARIANT2, [VARIANT3, SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_4]], inheritance_mode='recessive', omit_data_type='SV_WES', annotations=annotations_2, annotations_secondary=annotations_1, pathogenicity=pathogenicity, ) @@ -1012,25 +1012,25 @@ async def test_secondary_annotations_filter(self): screen_annotations = {'SCREEN': ['CTCF-only']} await self._assert_expected_search( - [], inheritance_mode='recessive', omit_sample_type='SV_WES', + [], inheritance_mode='recessive', omit_data_type='SV_WES', annotations=screen_annotations, annotations_secondary=annotations_1, ) await self._assert_expected_search( - [[VARIANT3, VARIANT4]], inheritance_mode='recessive', omit_sample_type='SV_WES', + [[VARIANT3, VARIANT4]], inheritance_mode='recessive', omit_data_type='SV_WES', annotations=screen_annotations, annotations_secondary=annotations_2, ) await self._assert_expected_search( [VARIANT2, [SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_3, VARIANT4]], inheritance_mode='recessive', annotations=screen_annotations, annotations_secondary=selected_transcript_annotations, - pathogenicity=pathogenicity, omit_sample_type='SV_WES', + pathogenicity=pathogenicity, omit_data_type='SV_WES', ) await self._assert_expected_search( [SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_2, [SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_3, VARIANT4]], annotations={**selected_transcript_annotations, **screen_annotations}, annotations_secondary=annotations_2, - inheritance_mode='recessive', omit_sample_type='SV_WES', + inheritance_mode='recessive', omit_data_type='SV_WES', ) async def test_in_silico_filter(self): @@ -1053,7 +1053,7 @@ async def test_in_silico_filter(self): sv_in_silico = {'strvctvre': 0.1, 'requireScore': True} await self._assert_expected_search( - [GCNV_VARIANT1, GCNV_VARIANT2, GCNV_VARIANT3, GCNV_VARIANT4], omit_sample_type='SNV_INDEL', in_silico=sv_in_silico, + [GCNV_VARIANT1, GCNV_VARIANT2, GCNV_VARIANT3, GCNV_VARIANT4], omit_data_type='SNV_INDEL', in_silico=sv_in_silico, ) await self._assert_expected_search( @@ -1074,7 +1074,7 @@ async def test_search_errors(self): self.assertEqual(reason, 'The following samples are available in seqr but missing the loaded data: NA19675_1, NA19678') search_body = get_hail_search_body( - intervals=LOCATION_SEARCH['intervals'] + ['1:1-99999999999'], omit_sample_type='SV_WES', + intervals=LOCATION_SEARCH['intervals'] + ['1:1-99999999999'], omit_data_type='SV_WES', ) async with self.client.request('POST', '/search', json=search_body) as resp: self.assertEqual(resp.status, 400) @@ -1090,7 +1090,7 @@ async def test_sort(self): await self._assert_expected_search( [_sorted(GCNV_VARIANT2, [0]), _sorted(GCNV_VARIANT3, [0]), _sorted(GCNV_VARIANT4, [0]), - _sorted(GCNV_VARIANT1, [3])], omit_sample_type='SNV_INDEL', sort='protein_consequence', + _sorted(GCNV_VARIANT1, [3])], omit_data_type='SNV_INDEL', sort='protein_consequence', ) await self._assert_expected_search( @@ -1107,7 +1107,7 @@ async def test_sort(self): await self._assert_expected_search( [_sorted(VARIANT4, [2, 2]), _sorted(SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_2, [12, 26]), _sorted(SELECTED_ANNOTATION_TRANSCRIPT_MULTI_FAMILY_VARIANT, [26, 26])], - omit_sample_type='SV_WES', sort='protein_consequence', + omit_data_type='SV_WES', sort='protein_consequence', annotations={'other': ['non_coding_transcript_exon_variant'], 'splice_ai': '0'}, ) @@ -1159,28 +1159,28 @@ async def test_sort(self): await self._assert_expected_search( [_sorted(VARIANT4, [-0.5260000228881836]), _sorted(VARIANT2, [-0.19699999690055847]), - _sorted(VARIANT1, [0]), _sorted(MULTI_FAMILY_VARIANT, [0])], omit_sample_type='SV_WES', sort='revel', + _sorted(VARIANT1, [0]), _sorted(MULTI_FAMILY_VARIANT, [0])], omit_data_type='SV_WES', sort='revel', ) await self._assert_expected_search( [_sorted(MULTI_FAMILY_VARIANT, [-0.009999999776482582]), _sorted(VARIANT2, [0]), _sorted(VARIANT4, [0]), - _sorted(VARIANT1, [0])], omit_sample_type='SV_WES', sort='splice_ai', + _sorted(VARIANT1, [0])], omit_data_type='SV_WES', sort='splice_ai', ) await self._assert_expected_search( [_sorted(VARIANT2, [-0.9977999925613403, -0.9977999925613403]), _sorted(VARIANT1, [0, 0]), - _sorted(MULTI_FAMILY_VARIANT, [0, 0]), _sorted(VARIANT4, [0, 0])], omit_sample_type='SV_WES', sort='alphamissense', + _sorted(MULTI_FAMILY_VARIANT, [0, 0]), _sorted(VARIANT4, [0, 0])], omit_data_type='SV_WES', sort='alphamissense', ) sort = 'in_omim' await self._assert_expected_search( [_sorted(MULTI_FAMILY_VARIANT, [0, -2]), _sorted(VARIANT2, [0, -1]), _sorted(VARIANT4, [0, -1]), _sorted(VARIANT1, [1, 0])], - omit_sample_type='SV_WES', sort=sort, sort_metadata=OMIM_SORT_METADATA, + omit_data_type='SV_WES', sort=sort, sort_metadata=OMIM_SORT_METADATA, ) await self._assert_expected_search( [_sorted(GCNV_VARIANT3, [-1]), _sorted(GCNV_VARIANT4, [-1]), _sorted(GCNV_VARIANT1, [0]), _sorted(GCNV_VARIANT2, [0])], - omit_sample_type='SNV_INDEL', sort=sort, sort_metadata=OMIM_SORT_METADATA, + omit_data_type='SNV_INDEL', sort=sort, sort_metadata=OMIM_SORT_METADATA, ) await self._assert_expected_search( @@ -1191,19 +1191,19 @@ async def test_sort(self): await self._assert_expected_search( [_sorted(VARIANT2, [0, -1]), _sorted(MULTI_FAMILY_VARIANT, [1, -1]), _sorted(VARIANT1, [1, 0]), _sorted(VARIANT4, [1, 0])], - omit_sample_type='SV_WES', sort=sort, sort_metadata=['ENSG00000177000'], + omit_data_type='SV_WES', sort=sort, sort_metadata=['ENSG00000177000'], ) constraint_sort_metadata = {'ENSG00000177000': 2, 'ENSG00000275023': 3, 'ENSG00000097046': 4} sort = 'constraint' await self._assert_expected_search( [_sorted(VARIANT2, [2, 2]), _sorted(MULTI_FAMILY_VARIANT, [4, 2]), _sorted(VARIANT4, [4, 4]), - _sorted(VARIANT1, [None, None])], omit_sample_type='SV_WES', sort=sort, sort_metadata=constraint_sort_metadata, + _sorted(VARIANT1, [None, None])], omit_data_type='SV_WES', sort=sort, sort_metadata=constraint_sort_metadata, ) await self._assert_expected_search( [_sorted(GCNV_VARIANT3, [3]), _sorted(GCNV_VARIANT4, [3]), _sorted(GCNV_VARIANT1, [None]), - _sorted(GCNV_VARIANT2, [None])], omit_sample_type='SNV_INDEL', sort=sort, sort_metadata=constraint_sort_metadata, + _sorted(GCNV_VARIANT2, [None])], omit_data_type='SNV_INDEL', sort=sort, sort_metadata=constraint_sort_metadata, ) await self._assert_expected_search( @@ -1215,7 +1215,7 @@ async def test_sort(self): await self._assert_expected_search( [_sorted(VARIANT2, [3, 3]), _sorted(MULTI_FAMILY_VARIANT, [None, 3]), _sorted(VARIANT1, [None, None]), - _sorted(VARIANT4, [None, None])], omit_sample_type='SV_WES', sort='prioritized_gene', + _sorted(VARIANT4, [None, None])], omit_data_type='SV_WES', sort='prioritized_gene', sort_metadata={'ENSG00000177000': 3}, ) @@ -1234,12 +1234,12 @@ async def test_sort(self): await self._assert_expected_search( [[_sorted(VARIANT4, [-0.5260000228881836]), _sorted(VARIANT3, [0])], _sorted(VARIANT2, [-0.19699999690055847])], - sort='revel', inheritance_mode='recessive', omit_sample_type='SV_WES', **COMP_HET_ALL_PASS_FILTERS, + sort='revel', inheritance_mode='recessive', omit_data_type='SV_WES', **COMP_HET_ALL_PASS_FILTERS, ) await self._assert_expected_search( [[_sorted(VARIANT3, [-0.009999999776482582]), _sorted(VARIANT4, [0])], _sorted(VARIANT2, [0])], - sort='splice_ai', inheritance_mode='recessive', omit_sample_type='SV_WES', **COMP_HET_ALL_PASS_FILTERS, + sort='splice_ai', inheritance_mode='recessive', omit_data_type='SV_WES', **COMP_HET_ALL_PASS_FILTERS, ) async def test_multi_data_type_comp_het_sort(self): diff --git a/hail_search/test_utils.py b/hail_search/test_utils.py index dda9b2c502..aef30f3304 100644 --- a/hail_search/test_utils.py +++ b/hail_search/test_utils.py @@ -3,12 +3,12 @@ FAMILY_3_SAMPLE = { 'sample_id': 'NA20870', 'individual_guid': 'I000007_na20870', 'family_guid': 'F000003_3', - 'project_guid': 'R0001_1kg', 'affected': 'A', + 'project_guid': 'R0001_1kg', 'affected': 'A', 'sample_type': 'WES', } FAMILY_2_VARIANT_SAMPLE_DATA_WITH_SEX = {'SNV_INDEL': [ - {'sample_id': 'HG00731', 'individual_guid': 'I000004_hg00731', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'A', 'sex': 'F'}, - {'sample_id': 'HG00732', 'individual_guid': 'I000005_hg00732', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'M'}, - {'sample_id': 'HG00733', 'individual_guid': 'I000006_hg00733', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'F'}, + {'sample_id': 'HG00731', 'individual_guid': 'I000004_hg00731', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'A', 'sample_type': 'WES', 'sex': 'F'}, + {'sample_id': 'HG00732', 'individual_guid': 'I000005_hg00732', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sample_type': 'WES', 'sex': 'M'}, + {'sample_id': 'HG00733', 'individual_guid': 'I000006_hg00733', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sample_type': 'WES', 'sex': 'F'}, ]} FAMILY_2_VARIANT_SAMPLE_DATA = deepcopy(FAMILY_2_VARIANT_SAMPLE_DATA_WITH_SEX) for s in FAMILY_2_VARIANT_SAMPLE_DATA['SNV_INDEL']: @@ -16,9 +16,9 @@ EXPECTED_SAMPLE_DATA_WITH_SEX = { 'SV_WES': [ - {'sample_id': 'HG00731', 'individual_guid': 'I000004_hg00731', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'A', 'sex': 'F'}, - {'sample_id': 'HG00732', 'individual_guid': 'I000005_hg00732', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'M'}, - {'sample_id': 'HG00733', 'individual_guid': 'I000006_hg00733', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'F'} + {'sample_id': 'HG00731', 'individual_guid': 'I000004_hg00731', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'A', 'sample_type': 'WES', 'sex': 'F'}, + {'sample_id': 'HG00732', 'individual_guid': 'I000005_hg00732', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sample_type': 'WES', 'sex': 'M'}, + {'sample_id': 'HG00733', 'individual_guid': 'I000006_hg00733', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sample_type': 'WES', 'sex': 'F'} ], } EXPECTED_SAMPLE_DATA_WITH_SEX.update(FAMILY_2_VARIANT_SAMPLE_DATA_WITH_SEX) @@ -36,8 +36,8 @@ FAMILY_1_SAMPLE_DATA = { 'SNV_INDEL': [ - {'sample_id': 'NA19675_1', 'individual_guid': 'I000001_na19675', 'family_guid': 'F000001_1', 'project_guid': 'R0001_1kg', 'affected': 'A'}, - {'sample_id': 'NA19678', 'individual_guid': 'I000002_na19678', 'family_guid': 'F000001_1', 'project_guid': 'R0001_1kg', 'affected': 'N'}, + {'sample_id': 'NA19675_1', 'individual_guid': 'I000001_na19675', 'family_guid': 'F000001_1', 'project_guid': 'R0001_1kg', 'sample_type': 'WES', 'affected': 'A'}, + {'sample_id': 'NA19678', 'individual_guid': 'I000002_na19678', 'family_guid': 'F000001_1', 'project_guid': 'R0001_1kg', 'sample_type': 'WES', 'affected': 'N'}, ], } FAMILY_2_MISSING_SAMPLE_DATA = deepcopy(FAMILY_1_SAMPLE_DATA) @@ -45,7 +45,7 @@ s['family_guid'] = 'F000002_2' FAMILY_2_MITO_SAMPLE_DATA = {'MITO': [ - {'sample_id': 'HG00733', 'individual_guid': 'I000006_hg00733', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N'}, + {'sample_id': 'HG00733', 'individual_guid': 'I000006_hg00733', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sample_type': 'WES'}, ]} FAMILY_2_ALL_SAMPLE_DATA = deepcopy(FAMILY_2_VARIANT_SAMPLE_DATA) FAMILY_2_ALL_SAMPLE_DATA.update(FAMILY_2_MITO_SAMPLE_DATA) @@ -53,11 +53,11 @@ ALL_AFFECTED_SAMPLE_DATA = deepcopy(EXPECTED_SAMPLE_DATA) ALL_AFFECTED_SAMPLE_DATA.update(FAMILY_2_MITO_SAMPLE_DATA) FAMILY_5_SAMPLE = { - 'sample_id': 'NA20874', 'individual_guid': 'I000009_na20874', 'family_guid': 'F000005_5', 'project_guid': 'R0001_1kg', 'affected': 'N', + 'sample_id': 'NA20874', 'individual_guid': 'I000009_na20874', 'family_guid': 'F000005_5', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sample_type': 'WES', } ALL_AFFECTED_SAMPLE_DATA['SNV_INDEL'].append(FAMILY_5_SAMPLE) FAMILY_11_SAMPLE = { - 'sample_id': 'NA20885', 'individual_guid': 'I000015_na20885', 'family_guid': 'F000011_11', 'project_guid': 'R0003_test', 'affected': 'A', + 'sample_id': 'NA20885', 'individual_guid': 'I000015_na20885', 'family_guid': 'F000011_11', 'project_guid': 'R0003_test', 'affected': 'A', 'sample_type': 'WGS', } MULTI_PROJECT_SAMPLE_DATA = deepcopy(FAMILY_2_VARIANT_SAMPLE_DATA) MULTI_PROJECT_SAMPLE_DATA['SNV_INDEL'].append(FAMILY_11_SAMPLE) @@ -65,9 +65,9 @@ MULTI_PROJECT_MISSING_SAMPLE_DATA['SNV_INDEL'].append(FAMILY_11_SAMPLE) SV_WGS_SAMPLE_DATA_WITH_SEX = {'SV_WGS': [{'sex': 'M', **FAMILY_11_SAMPLE}, { - 'sample_id': 'NA20884', 'individual_guid': 'I000025_na20884', 'family_guid': 'F000011_11', 'project_guid': 'R0003_test', 'affected': 'N', 'sex': 'M', + 'sample_id': 'NA20884', 'individual_guid': 'I000025_na20884', 'family_guid': 'F000011_11', 'project_guid': 'R0003_test', 'affected': 'N', 'sample_type': 'WGS', 'sex': 'M', }, { - 'sample_id': 'NA20883', 'individual_guid': 'I000035_na20883', 'family_guid': 'F000011_11', 'project_guid': 'R0003_test', 'affected': 'N', 'sex': 'F', + 'sample_id': 'NA20883', 'individual_guid': 'I000035_na20883', 'family_guid': 'F000011_11', 'project_guid': 'R0003_test', 'affected': 'N', 'sample_type': 'WGS', 'sex': 'F', }]} SV_WGS_SAMPLE_DATA = deepcopy(SV_WGS_SAMPLE_DATA_WITH_SEX) for s in SV_WGS_SAMPLE_DATA['SV_WGS']: @@ -957,10 +957,10 @@ } -def get_hail_search_body(genome_version='GRCh38', num_results=100, sample_data=None, omit_sample_type=None, **search_body): +def get_hail_search_body(genome_version='GRCh38', num_results=100, sample_data=None, omit_data_type=None, **search_body): sample_data = sample_data or EXPECTED_SAMPLE_DATA - if omit_sample_type: - sample_data = {k: v for k, v in sample_data.items() if k != omit_sample_type} + if omit_data_type: + sample_data = {k: v for k, v in sample_data.items() if k != omit_data_type} search = { 'sample_data': sample_data, diff --git a/seqr/utils/search/hail_search_utils.py b/seqr/utils/search/hail_search_utils.py index 8a5002e078..52e8ce8e72 100644 --- a/seqr/utils/search/hail_search_utils.py +++ b/seqr/utils/search/hail_search_utils.py @@ -1,4 +1,6 @@ from collections import defaultdict + +from django.contrib.postgres.aggregates import ArrayAgg from django.db.models import F, Min, Count from urllib3.connectionpool import connection_from_url @@ -152,9 +154,8 @@ def _get_sample_data(samples, inheritance_filter=None, inheritance_mode=None, ** sample_data_by_data_type = defaultdict(list) for s in sample_data: dataset_type = s.pop('dataset_type') - sample_type = s.pop('sample_type') - s['sample_id'] = s.pop('individual__individual_id') - data_type_key = search_data_type(dataset_type, sample_type) + s['sample_id'] = s.pop('individual__individual_id') # Note: set sample_id to individual_id + data_type_key = search_data_type(dataset_type, s['sample_type']) sample_data_by_data_type[data_type_key].append(s) return sample_data_by_data_type @@ -242,8 +243,12 @@ def validate_hail_backend_no_location_search(samples): family_count=Count('individual__family_id', distinct=True), project_count=Count('individual__family__project_id', distinct=True), ) + distinct_projects = samples.filter(dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS).values( + 'individual__family__project_id' + ).distinct() from seqr.utils.search.utils import InvalidSearchException - if sample_counts and (len(sample_counts) > 1 or sample_counts[0]['project_count'] > 1): - raise InvalidSearchException('Location must be specified to search across multiple projects') - if sample_counts and sample_counts[0]['family_count'] > MAX_FAMILY_COUNTS[sample_counts[0]['sample_type']]: - raise InvalidSearchException('Location must be specified to search across multiple families in large projects') + if sample_counts: + if distinct_projects.count() > 1 or sample_counts[0]['project_count'] > 1: + raise InvalidSearchException('Location must be specified to search across multiple projects') + if sample_counts[0]['family_count'] > MAX_FAMILY_COUNTS[sample_counts[0]['sample_type']]: + raise InvalidSearchException('Location must be specified to search across multiple families in large projects') diff --git a/seqr/utils/search/hail_search_utils_tests.py b/seqr/utils/search/hail_search_utils_tests.py index 65b25977cc..0274b913b8 100644 --- a/seqr/utils/search/hail_search_utils_tests.py +++ b/seqr/utils/search/hail_search_utils_tests.py @@ -18,7 +18,7 @@ SV_WGS_SAMPLE_DATA = [{ 'individual_guid': 'I000018_na21234', 'family_guid': 'F000014_14', 'project_guid': 'R0004_non_analyst_project', - 'affected': 'A', 'sample_id': 'NA21234', + 'affected': 'A', 'sample_id': 'NA21234', 'sample_type': 'WGS', }] EXPECTED_MITO_SAMPLE_DATA = deepcopy(FAMILY_2_MITO_SAMPLE_DATA) From 3929eab04adb28950facaa47144b638658b1354e Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 2 Aug 2024 15:10:34 -0400 Subject: [PATCH 545/736] hold off on search validation --- seqr/utils/search/hail_search_utils.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/seqr/utils/search/hail_search_utils.py b/seqr/utils/search/hail_search_utils.py index 52e8ce8e72..678cafbb9f 100644 --- a/seqr/utils/search/hail_search_utils.py +++ b/seqr/utils/search/hail_search_utils.py @@ -1,6 +1,5 @@ from collections import defaultdict -from django.contrib.postgres.aggregates import ArrayAgg from django.db.models import F, Min, Count from urllib3.connectionpool import connection_from_url @@ -243,12 +242,8 @@ def validate_hail_backend_no_location_search(samples): family_count=Count('individual__family_id', distinct=True), project_count=Count('individual__family__project_id', distinct=True), ) - distinct_projects = samples.filter(dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS).values( - 'individual__family__project_id' - ).distinct() from seqr.utils.search.utils import InvalidSearchException - if sample_counts: - if distinct_projects.count() > 1 or sample_counts[0]['project_count'] > 1: - raise InvalidSearchException('Location must be specified to search across multiple projects') - if sample_counts[0]['family_count'] > MAX_FAMILY_COUNTS[sample_counts[0]['sample_type']]: - raise InvalidSearchException('Location must be specified to search across multiple families in large projects') + if sample_counts and (len(sample_counts) > 1 or sample_counts[0]['project_count'] > 1): + raise InvalidSearchException('Location must be specified to search across multiple projects') + if sample_counts and sample_counts[0]['family_count'] > MAX_FAMILY_COUNTS[sample_counts[0]['sample_type']]: + raise InvalidSearchException('Location must be specified to search across multiple families in large projects') From 535c55adbfd5462bfaa18e54ae74cdd3e3e70e02 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 2 Aug 2024 16:43:52 -0400 Subject: [PATCH 546/736] handle variant lookup! --- .../{ => WES}/F000002_2.ht/.README.txt.crc | Bin .../{ => WES}/F000002_2.ht/._SUCCESS.crc | Bin .../F000002_2.ht/.metadata.json.gz.crc | Bin .../{ => WES}/F000002_2.ht/README.txt | 0 .../families/{ => WES}/F000002_2.ht/_SUCCESS | 0 .../globals/.metadata.json.gz.crc | Bin .../F000002_2.ht/globals/metadata.json.gz | Bin .../F000002_2.ht/globals/parts/.part-0.crc | Bin .../F000002_2.ht/globals/parts/part-0 | Bin .../.index.crc | Bin .../.metadata.json.gz.crc | Bin .../index | Bin .../metadata.json.gz | Bin .../{ => WES}/F000002_2.ht/metadata.json.gz | Bin .../F000002_2.ht/rows/.metadata.json.gz.crc | Bin .../F000002_2.ht/rows/metadata.json.gz | Bin ...0-7e735aac-e66b-4a34-9b45-5fdd65e9a5b4.crc | Bin ...art-0-7e735aac-e66b-4a34-9b45-5fdd65e9a5b4 | Bin hail_search/queries/base.py | 35 ++++++++++++++---- ...eck_for_new_samples_from_pipeline_tests.py | 8 ++-- seqr/utils/search/hail_search_utils_tests.py | 6 +-- 21 files changed, 34 insertions(+), 15 deletions(-) rename hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/{ => WES}/F000002_2.ht/.README.txt.crc (100%) rename hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/{ => WES}/F000002_2.ht/._SUCCESS.crc (100%) rename hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/{ => WES}/F000002_2.ht/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/{ => WES}/F000002_2.ht/README.txt (100%) rename hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/{ => WES}/F000002_2.ht/_SUCCESS (100%) rename hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/{ => WES}/F000002_2.ht/globals/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/{ => WES}/F000002_2.ht/globals/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/{ => WES}/F000002_2.ht/globals/parts/.part-0.crc (100%) rename hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/{ => WES}/F000002_2.ht/globals/parts/part-0 (100%) rename hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/{ => WES}/F000002_2.ht/index/part-0-7e735aac-e66b-4a34-9b45-5fdd65e9a5b4.idx/.index.crc (100%) rename hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/{ => WES}/F000002_2.ht/index/part-0-7e735aac-e66b-4a34-9b45-5fdd65e9a5b4.idx/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/{ => WES}/F000002_2.ht/index/part-0-7e735aac-e66b-4a34-9b45-5fdd65e9a5b4.idx/index (100%) rename hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/{ => WES}/F000002_2.ht/index/part-0-7e735aac-e66b-4a34-9b45-5fdd65e9a5b4.idx/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/{ => WES}/F000002_2.ht/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/{ => WES}/F000002_2.ht/rows/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/{ => WES}/F000002_2.ht/rows/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/{ => WES}/F000002_2.ht/rows/parts/.part-0-7e735aac-e66b-4a34-9b45-5fdd65e9a5b4.crc (100%) rename hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/{ => WES}/F000002_2.ht/rows/parts/part-0-7e735aac-e66b-4a34-9b45-5fdd65e9a5b4 (100%) diff --git a/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/.README.txt.crc similarity index 100% rename from hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/.README.txt.crc rename to hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/.README.txt.crc diff --git a/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/._SUCCESS.crc b/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/._SUCCESS.crc similarity index 100% rename from hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/._SUCCESS.crc rename to hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/._SUCCESS.crc diff --git a/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/README.txt b/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/README.txt similarity index 100% rename from hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/README.txt rename to hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/README.txt diff --git a/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/_SUCCESS b/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/_SUCCESS similarity index 100% rename from hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/_SUCCESS rename to hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/_SUCCESS diff --git a/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/globals/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/globals/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/globals/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/globals/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/globals/metadata.json.gz rename to hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/globals/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/globals/parts/.part-0.crc similarity index 100% rename from hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/globals/parts/.part-0.crc rename to hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/globals/parts/.part-0.crc diff --git a/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/globals/parts/part-0 similarity index 100% rename from hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/globals/parts/part-0 rename to hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/globals/parts/part-0 diff --git a/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/index/part-0-7e735aac-e66b-4a34-9b45-5fdd65e9a5b4.idx/.index.crc b/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/index/part-0-7e735aac-e66b-4a34-9b45-5fdd65e9a5b4.idx/.index.crc similarity index 100% rename from hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/index/part-0-7e735aac-e66b-4a34-9b45-5fdd65e9a5b4.idx/.index.crc rename to hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/index/part-0-7e735aac-e66b-4a34-9b45-5fdd65e9a5b4.idx/.index.crc diff --git a/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/index/part-0-7e735aac-e66b-4a34-9b45-5fdd65e9a5b4.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/index/part-0-7e735aac-e66b-4a34-9b45-5fdd65e9a5b4.idx/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/index/part-0-7e735aac-e66b-4a34-9b45-5fdd65e9a5b4.idx/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/index/part-0-7e735aac-e66b-4a34-9b45-5fdd65e9a5b4.idx/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/index/part-0-7e735aac-e66b-4a34-9b45-5fdd65e9a5b4.idx/index b/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/index/part-0-7e735aac-e66b-4a34-9b45-5fdd65e9a5b4.idx/index similarity index 100% rename from hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/index/part-0-7e735aac-e66b-4a34-9b45-5fdd65e9a5b4.idx/index rename to hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/index/part-0-7e735aac-e66b-4a34-9b45-5fdd65e9a5b4.idx/index diff --git a/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/index/part-0-7e735aac-e66b-4a34-9b45-5fdd65e9a5b4.idx/metadata.json.gz b/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/index/part-0-7e735aac-e66b-4a34-9b45-5fdd65e9a5b4.idx/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/index/part-0-7e735aac-e66b-4a34-9b45-5fdd65e9a5b4.idx/metadata.json.gz rename to hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/index/part-0-7e735aac-e66b-4a34-9b45-5fdd65e9a5b4.idx/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/metadata.json.gz b/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/metadata.json.gz rename to hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/rows/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/rows/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/rows/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/rows/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/rows/metadata.json.gz rename to hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/rows/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/rows/parts/.part-0-7e735aac-e66b-4a34-9b45-5fdd65e9a5b4.crc b/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/rows/parts/.part-0-7e735aac-e66b-4a34-9b45-5fdd65e9a5b4.crc similarity index 100% rename from hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/rows/parts/.part-0-7e735aac-e66b-4a34-9b45-5fdd65e9a5b4.crc rename to hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/rows/parts/.part-0-7e735aac-e66b-4a34-9b45-5fdd65e9a5b4.crc diff --git a/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/rows/parts/part-0-7e735aac-e66b-4a34-9b45-5fdd65e9a5b4 b/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/rows/parts/part-0-7e735aac-e66b-4a34-9b45-5fdd65e9a5b4 similarity index 100% rename from hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/F000002_2.ht/rows/parts/part-0-7e735aac-e66b-4a34-9b45-5fdd65e9a5b4 rename to hail_search/fixtures/GRCh38/ONT_SNV_INDEL/families/WES/F000002_2.ht/rows/parts/part-0-7e735aac-e66b-4a34-9b45-5fdd65e9a5b4 diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 99ca936ac6..f677a540c7 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -1,6 +1,7 @@ from aiohttp.web import HTTPBadRequest, HTTPNotFound from collections import defaultdict, namedtuple import hail as hl +import hailtop.fs as hfs import logging import os @@ -266,6 +267,9 @@ def _get_table_path(cls, path, use_ssd_dir=False): return f'{SSD_DATASETS_DIR if use_ssd_dir else DATASETS_DIR}/{cls.GENOME_VERSION}/{cls.DATA_TYPE}/{path}' def _read_table(self, path, drop_globals=None, use_ssd_dir=False, skip_missing_field=None): + if not hfs.exists(self._get_table_path(path, use_ssd_dir=use_ssd_dir)): + return None + table_path = self._get_table_path(path, use_ssd_dir=use_ssd_dir) if 'variant_ht' in self._load_table_kwargs: ht = self._query_table_annotations(self._load_table_kwargs['variant_ht'], table_path) @@ -296,8 +300,19 @@ def _parse_sample_data(self, sample_data): def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, n_partitions=MAX_PARTITIONS, **kwargs): if len(project_samples) == 1: project_guid = list(project_samples.keys())[0] - sample_type = list(project_samples[project_guid].values())[0][0]['sample_type'] - project_ht = self._read_table(f'projects/{sample_type}/{project_guid}.ht', use_ssd_dir=True) + # for variant lookup, project_samples looks like + # {: {: True, : True}, : ...} + # for variant search, project_samples looks like + # {: {: [, , ...], : ...}, : ...} + first_family_samples = list(project_samples[project_guid].values())[0] + if type(first_family_samples) is bool: + project_ht = ( + self._read_table(f'projects/WES/{project_guid}.ht', use_ssd_dir=True) or + self._read_table(f'projects/WGS/{project_guid}.ht', use_ssd_dir=True) + ) + else: + sample_type = first_family_samples[0]['sample_type'] + project_ht = self._read_table(f'projects/{sample_type}/{project_guid}.ht', use_ssd_dir=True) return self._filter_entries_table(project_ht, project_samples[project_guid], **kwargs) # Need to chunk tables or else evaluating table globals throws LineTooLong exception @@ -309,12 +324,16 @@ def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, n_ project_hts = [] sample_data = {} for project_guid, project_sample_data in project_samples.items(): - sample_type = list(project_sample_data.values())[0][0]['sample_type'] - project_ht = self._read_table( - f'projects/{sample_type}/{project_guid}.ht', - use_ssd_dir=True, - skip_missing_field='family_entries' if skip_all_missing else None, - ) + first_family_samples = list(project_sample_data.values())[0] + if type(first_family_samples) is bool: + project_ht = ( + self._read_table(f'projects/WES/{project_guid}.ht', use_ssd_dir=True) or + self._read_table(f'projects/WGS/{project_guid}.ht', use_ssd_dir=True) + ) + else: + sample_type = first_family_samples[0]['sample_type'] + project_ht = self._read_table(f'projects/{sample_type}/{project_guid}.ht', use_ssd_dir=True) + if project_ht is None: continue project_hts.append(project_ht.select_globals('sample_type', 'family_guids', 'family_samples')) diff --git a/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py b/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py index 193d7dee86..c08ff4eae5 100644 --- a/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py +++ b/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py @@ -213,11 +213,11 @@ def test_command(self, mock_email, mock_airtable_utils): EXISTING_SAMPLE_GUID, REPLACED_SAMPLE_GUID, NEW_SAMPLE_GUID_P3, NEW_SAMPLE_GUID_P4, }, has_additional_requests=True, reload_calls=[ {**search_body, 'sample_data': {'SNV_INDEL': [ - {'individual_guid': 'I000017_na20889', 'family_guid': 'F000012_12', 'project_guid': 'R0003_test', 'affected': 'A', 'sample_id': 'NA20889'}, - {'individual_guid': 'I000016_na20888', 'family_guid': 'F000012_12', 'project_guid': 'R0003_test', 'affected': 'A', 'sample_id': 'NA20888'}, + {'individual_guid': 'I000017_na20889', 'family_guid': 'F000012_12', 'project_guid': 'R0003_test', 'affected': 'A', 'sample_id': 'NA20889', 'sample_type': 'WES'}, + {'individual_guid': 'I000016_na20888', 'family_guid': 'F000012_12', 'project_guid': 'R0003_test', 'affected': 'A', 'sample_id': 'NA20888', 'sample_type': 'WES'}, ]}}, {**search_body, 'sample_data': {'SNV_INDEL': [ - {'individual_guid': 'I000018_na21234', 'family_guid': 'F000014_14', 'project_guid': 'R0004_non_analyst_project', 'affected': 'A', 'sample_id': 'NA21234'}, + {'individual_guid': 'I000018_na21234', 'family_guid': 'F000014_14', 'project_guid': 'R0004_non_analyst_project', 'affected': 'A', 'sample_id': 'NA21234', 'sample_type': 'WES'}, ]}}, ], reload_annotations_logs=[ 'Reloading shared annotations for 3 SNV_INDEL GRCh38 saved variants (3 unique)', 'Fetched 1 additional variants', 'Fetched 1 additional variants', 'Updated 2 saved variants', @@ -383,7 +383,7 @@ def test_gcnv_command(self): } self._test_success('GRCh37/GCNV', metadata, dataset_type='SV', sample_guids={f'S00000{GUID_ID}_na20872', f'S00000{GUID_ID}_na20889'}, reload_calls=[{ 'genome_version': 'GRCh37', 'num_results': 1, 'variant_ids': [], 'variant_keys': ['prefix_19107_DEL'], - 'sample_data': {'SV_WES': [{'individual_guid': 'I000017_na20889', 'family_guid': 'F000012_12', 'project_guid': 'R0003_test', 'affected': 'A', 'sample_id': 'NA20889'}]}, + 'sample_data': {'SV_WES': [{'individual_guid': 'I000017_na20889', 'family_guid': 'F000012_12', 'project_guid': 'R0003_test', 'affected': 'A', 'sample_id': 'NA20889', 'sample_type': 'WES'}]}, }], reload_annotations_logs=['No additional saved variants to update']) self.mock_send_slack.assert_has_calls([ diff --git a/seqr/utils/search/hail_search_utils_tests.py b/seqr/utils/search/hail_search_utils_tests.py index 0274b913b8..3818309757 100644 --- a/seqr/utils/search/hail_search_utils_tests.py +++ b/seqr/utils/search/hail_search_utils_tests.py @@ -151,7 +151,7 @@ def test_query_variants(self): query_variants(self.results_model, user=self.user) self._test_expected_search_call( inheritance_mode='recessive', dataset_type='SNV_INDEL', secondary_dataset_type='SNV_INDEL', - search_fields=['annotations', 'annotations_secondary'], omit_sample_type='SV_WES', + search_fields=['annotations', 'annotations_secondary'], omit_data_type='SV_WES', ) self.search_model.search['inheritance']['mode'] = 'x_linked_recessive' @@ -159,7 +159,7 @@ def test_query_variants(self): self._test_expected_search_call( inheritance_mode='x_linked_recessive', dataset_type='SNV_INDEL', secondary_dataset_type='SNV_INDEL', search_fields=['annotations', 'annotations_secondary'], sample_data=EXPECTED_SAMPLE_DATA_WITH_SEX, - omit_sample_type='SV_WES', + omit_data_type='SV_WES', ) self.results_model.families.set(Family.objects.filter(id__in=[2, 11, 14])) @@ -313,7 +313,7 @@ def test_get_single_variant(self): get_single_variant(self.families, 'prefix_19107_DEL', user=self.user) self._test_minimal_search_call( variant_ids=[], variant_keys=['prefix_19107_DEL'], - num_results=1, sample_data=EXPECTED_SAMPLE_DATA, omit_sample_type='SNV_INDEL') + num_results=1, sample_data=EXPECTED_SAMPLE_DATA, omit_data_type='SNV_INDEL') get_single_variant(self.families, 'M-10195-C-A', user=self.user) self._test_minimal_search_call( From eeee1d04129c01052b4cfdd79b7a808c1e80377e Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 2 Aug 2024 16:51:15 -0400 Subject: [PATCH 547/736] isinstance --- hail_search/queries/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index f677a540c7..96c566aa68 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -305,7 +305,7 @@ def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, n_ # for variant search, project_samples looks like # {: {: [, , ...], : ...}, : ...} first_family_samples = list(project_samples[project_guid].values())[0] - if type(first_family_samples) is bool: + if isinstance(first_family_samples, bool): project_ht = ( self._read_table(f'projects/WES/{project_guid}.ht', use_ssd_dir=True) or self._read_table(f'projects/WGS/{project_guid}.ht', use_ssd_dir=True) From 498f560fbf941b4bf30f0fbcdf3e6f278c8c2500 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Mon, 5 Aug 2024 15:27:44 -0400 Subject: [PATCH 548/736] update sample in test --- hail_search/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hail_search/test_utils.py b/hail_search/test_utils.py index aef30f3304..255182d4af 100644 --- a/hail_search/test_utils.py +++ b/hail_search/test_utils.py @@ -57,7 +57,7 @@ } ALL_AFFECTED_SAMPLE_DATA['SNV_INDEL'].append(FAMILY_5_SAMPLE) FAMILY_11_SAMPLE = { - 'sample_id': 'NA20885', 'individual_guid': 'I000015_na20885', 'family_guid': 'F000011_11', 'project_guid': 'R0003_test', 'affected': 'A', 'sample_type': 'WGS', + 'sample_id': 'NA20885', 'individual_guid': 'I000015_na20885', 'family_guid': 'F000011_11', 'project_guid': 'R0003_test', 'affected': 'A', 'sample_type': 'WES', } MULTI_PROJECT_SAMPLE_DATA = deepcopy(FAMILY_2_VARIANT_SAMPLE_DATA) MULTI_PROJECT_SAMPLE_DATA['SNV_INDEL'].append(FAMILY_11_SAMPLE) From b66498259c9470d3a32fef071dce20e3606081c5 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Mon, 5 Aug 2024 15:31:49 -0400 Subject: [PATCH 549/736] another isinstance --- hail_search/queries/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 96c566aa68..dfcaf26e23 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -325,7 +325,7 @@ def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, n_ sample_data = {} for project_guid, project_sample_data in project_samples.items(): first_family_samples = list(project_sample_data.values())[0] - if type(first_family_samples) is bool: + if isinstance(first_family_samples, bool): project_ht = ( self._read_table(f'projects/WES/{project_guid}.ht', use_ssd_dir=True) or self._read_table(f'projects/WGS/{project_guid}.ht', use_ssd_dir=True) From 36d848c02ad174277cde50e8ec4713f66035ffcb Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 6 Aug 2024 17:11:44 -0400 Subject: [PATCH 550/736] use single dag name --- seqr/views/utils/airflow_utils.py | 49 ++++++++++++++++--------------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/seqr/views/utils/airflow_utils.py b/seqr/views/utils/airflow_utils.py index b47c0093fc..0be413da2a 100644 --- a/seqr/views/utils/airflow_utils.py +++ b/seqr/views/utils/airflow_utils.py @@ -16,6 +16,7 @@ logger = SeqrLogger(__name__) +DAG_NAME = 'LOADING_PIPELINE' AIRFLOW_AUTH_SCOPE = "https://www.googleapis.com/auth/cloud-platform" SEQR_V2_DATASETS_GS_PATH = 'gs://seqr-datasets/v02' SEQR_V3_PEDIGREE_GS_PATH = 'gs://seqr-loading-temp/v03' @@ -31,13 +32,13 @@ def trigger_data_loading(projects: list[Project], sample_type: str, dataset_type individual_ids: list[str] = None, additional_project_files: dict = None): success = True - dag_name = f'v03_pipeline-{_dag_dataset_type(sample_type, dataset_type)}' project_guids = sorted([p.guid for p in projects]) updated_variables = { 'projects_to_run': project_guids, 'callset_path': data_path, 'sample_source': 'Broad_Internal' if is_internal else 'AnVIL', 'sample_type': sample_type, + 'dataset_type': _dag_dataset_type(sample_type, dataset_type), 'reference_genome': GENOME_VERSION_LOOKUP[genome_version], } @@ -47,18 +48,18 @@ def trigger_data_loading(projects: list[Project], sample_type: str, dataset_type ) try: - _check_dag_running_state(dag_name) - _update_variables(dag_name, updated_variables) - _wait_for_dag_variable_update(dag_name, project_guids) - _trigger_dag(dag_name) + _check_dag_running_state() + _update_variables(updated_variables) + _wait_for_dag_variable_update(project_guids) + _trigger_dag() except Exception as e: logger_call = logger.warning if isinstance(e, DagRunningException) else logger.error logger_call(str(e), user) - _send_slack_msg_on_failure_trigger(e, dag_name, updated_variables, error_message) + _send_slack_msg_on_failure_trigger(e, updated_variables, error_message) success = False if success or success_slack_channel != SEQR_SLACK_LOADING_NOTIFICATION_CHANNEL: - _send_load_data_slack_msg([success_message] + upload_info, success_slack_channel, dag_name, updated_variables) + _send_load_data_slack_msg([success_message] + upload_info, success_slack_channel, updated_variables) return success @@ -77,31 +78,31 @@ def write_data_loading_pedigree(project: Project, user: User): ) -def _send_load_data_slack_msg(messages: list[str], channel: str, dag_id: str, dag: dict): +def _send_load_data_slack_msg(messages: list[str], channel: str, dag: dict): message = '\n\n '.join(messages) message_content = f"""{message} - DAG {dag_id} is triggered with following: + DAG {DAG_NAME} is triggered with following: ```{json.dumps(dag, indent=4)}``` """ safe_post_to_slack(channel, message_content) -def _send_slack_msg_on_failure_trigger(e, dag_id, dag, error_message): +def _send_slack_msg_on_failure_trigger(e, dag, error_message): message_content = f"""{error_message}: {e} - DAG {dag_id} should be triggered with following: + DAG {DAG_NAME} should be triggered with following: ```{json.dumps(dag, indent=4)}``` """ safe_post_to_slack(SEQR_SLACK_LOADING_NOTIFICATION_CHANNEL, message_content) -def _check_dag_running_state(dag_id): - endpoint = 'dags/{}/dagRuns'.format(dag_id) +def _check_dag_running_state(): + endpoint = f'dags/{DAG_NAME}/dagRuns' resp = _make_airflow_api_request(endpoint, method='GET') dag_runs = resp['dag_runs'] if dag_runs and dag_runs[-1]['state'] == 'running': - raise DagRunningException(f'{dag_id} is running and cannot be triggered again.') + raise DagRunningException(f'{DAG_NAME} DAG is running and cannot be triggered again.') def _dag_dataset_type(sample_type: str, dataset_type: str): @@ -163,24 +164,24 @@ def _get_gs_pedigree_path(genome_version: str, sample_type: str, dataset_type: s return f'{SEQR_V3_PEDIGREE_GS_PATH}/{GENOME_VERSION_LOOKUP[genome_version]}/{sample_type}/{dataset_type}/pedigrees/' -def _wait_for_dag_variable_update(dag_id, projects): - dag_projects = _get_task_ids(dag_id) +def _wait_for_dag_variable_update(projects): + dag_projects = _get_task_ids() while all(p not in ''.join(dag_projects) for p in projects): - dag_projects = _get_task_ids(dag_id) + dag_projects = _get_task_ids() -def _update_variables(key, val): - endpoint = 'variables/{}'.format(key) +def _update_variables(val): + endpoint = f'variables/{DAG_NAME}' val_str = json.dumps(val) json_data = { - "key": key, + "key": DAG_NAME, "value": val_str } _make_airflow_api_request(endpoint, method='PATCH', json=json_data) -def _get_task_ids(dag_id): - endpoint = 'dags/{}/tasks'.format(dag_id) +def _get_task_ids(): + endpoint = f'dags/{DAG_NAME}/tasks' airflow_response = _make_airflow_api_request(endpoint, method='GET') tasks = airflow_response['tasks'] @@ -188,8 +189,8 @@ def _get_task_ids(dag_id): return task_ids -def _trigger_dag(dag_id): - endpoint = 'dags/{}/dagRuns'.format(dag_id) +def _trigger_dag(): + endpoint = f'dags/{DAG_NAME}/dagRuns' _make_airflow_api_request(endpoint, method='POST', json={}) From a3dd06ff585f35eace5a14085758f5921fe90945 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 6 Aug 2024 17:44:05 -0400 Subject: [PATCH 551/736] fix tests --- seqr/views/apis/anvil_workspace_api_tests.py | 14 +++++----- seqr/views/apis/data_manager_api_tests.py | 13 +++++----- seqr/views/utils/test_utils.py | 27 ++++++++++---------- 3 files changed, 28 insertions(+), 26 deletions(-) diff --git a/seqr/views/apis/anvil_workspace_api_tests.py b/seqr/views/apis/anvil_workspace_api_tests.py index 2265fb7507..ce044695b2 100644 --- a/seqr/views/apis/anvil_workspace_api_tests.py +++ b/seqr/views/apis/anvil_workspace_api_tests.py @@ -473,7 +473,6 @@ class LoadAnvilDataAPITest(AirflowTestCase): fixtures = ['users', 'social_auth', 'reference_data', '1kg_project'] LOADING_PROJECT_GUID = f'P_{TEST_NO_PROJECT_WORKSPACE_NAME}' - DAG_NAME = 'v03_pipeline-SNV_INDEL' ADDITIONAL_REQUEST_COUNT = 1 @staticmethod @@ -483,6 +482,7 @@ def _get_dag_variable_overrides(additional_tasks_check): 'callset_path': 'test_path.vcf', 'sample_source': 'AnVIL', 'sample_type': 'WES', + 'dataset_type': 'SNV_INDEL', } if additional_tasks_check: variables.update({ @@ -769,6 +769,7 @@ def _assert_valid_operation(self, project, test_add_data=True): 'callset_path': 'gs://test_bucket/test_path.vcf', 'sample_source': 'AnVIL', 'sample_type': 'WES', + 'dataset_type': 'SNV_INDEL', 'reference_genome': genome_version, } sample_summary = '3 new' @@ -780,11 +781,10 @@ def _assert_valid_operation(self, project, test_add_data=True): Pedigree file has been uploaded to gs://seqr-datasets/v02/{version}/AnVIL_WES/{guid}/base/ - DAG {dag_id} is triggered with following: + DAG LOADING_PIPELINE is triggered with following: ```{dag}``` """.format(guid=project.guid, version=genome_version, workspace_name=project.workspace_name, project_name=project.name, sample_summary=sample_summary, - dag_id=self.DAG_NAME, dag=json.dumps(dag_json, indent=4), ) self.mock_slack.assert_called_with( @@ -837,23 +837,23 @@ def _test_mv_file_and_triggering_dag_exception(self, url, workspace, sample_data self.manager_user, detail=sample_data) self.mock_api_logger.error.assert_not_called() self.mock_airflow_logger.warning.assert_called_with( - f'{self.DAG_NAME} is running and cannot be triggered again.', self.manager_user) + 'LOADING_PIPELINE DAG is running and cannot be triggered again.', self.manager_user) self.mock_airtable_logger.error.assert_called_with( f'Airtable create "AnVIL Seqr Loading Requests Tracking" error: 400 Client Error: Bad Request for url: ' f'{MOCK_AIRTABLE_URL}/appUelDNM3BnWaR7M/AnVIL%20Seqr%20Loading%20Requests%20Tracking', self.manager_user) - slack_message_on_failure = """ERROR triggering AnVIL loading for project {guid}: {dag_id} is running and cannot be triggered again. + slack_message_on_failure = """ERROR triggering AnVIL loading for project {guid}: LOADING_PIPELINE DAG is running and cannot be triggered again. - DAG {dag_id} should be triggered with following: + DAG LOADING_PIPELINE should be triggered with following: ```{dag}``` """.format( guid=project.guid, - dag_id=self.DAG_NAME, dag=json.dumps({ 'projects_to_run': [project.guid], 'callset_path': 'gs://test_bucket/test_path.vcf', 'sample_source': 'AnVIL', 'sample_type': 'WES', + 'dataset_type': 'SNV_INDEL', 'reference_genome': genome_version, }, indent=4), ) diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index 81e77f0fac..888adcdbf0 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -1527,8 +1527,6 @@ def _add_file_iter(self, stdout): class AnvilDataManagerAPITest(AirflowTestCase, DataManagerAPITest): fixtures = ['users', 'social_auth', '1kg_project', 'reference_data'] - DAG_NAME = 'v03_pipeline-MITO' - SECOND_DAG_NAME = 'v03_pipeline-GCNV' LOADING_PROJECT_GUID = 'R0004_non_analyst_project' PROJECTS = [PROJECT_GUID, LOADING_PROJECT_GUID] @@ -1587,6 +1585,7 @@ def _get_dag_variable_overrides(*args, **kwargs): 'callset_path': 'mito_callset.mt', 'sample_source': 'Broad_Internal', 'sample_type': 'WGS', + 'dataset_type': 'MITO', } @responses.activate @@ -1626,6 +1625,7 @@ def test_load_data(self, mock_temp_dir, mock_open): "callset_path": "gs://test_bucket/mito_callset.mt", "sample_source": "Broad_Internal", "sample_type": "WGS", + "dataset_type": "MITO", "reference_genome": "GRCh38" }""" message = f"""*test_pm_user@test.com* triggered loading internal WGS MITO data for 2 projects @@ -1634,12 +1634,13 @@ def test_load_data(self, mock_temp_dir, mock_open): Pedigree file has been uploaded to gs://seqr-datasets/v02/GRCh38/RDG_WGS_Broad_Internal/base/projects/R0004_non_analyst_project/ - DAG {self.DAG_NAME} is triggered with following: + DAG LOADING_PIPELINE is triggered with following: ```{dag_json}``` """ self.mock_slack.assert_called_once_with(SEQR_SLACK_LOADING_NOTIFICATION_CHANNEL, message) # Test loading trigger error + self.set_dag_trigger_error_response(status=400) self.mock_authorized_session.reset_mock() self.mock_slack.reset_mock() mock_open.reset_mock() @@ -1652,19 +1653,19 @@ def test_load_data(self, mock_temp_dir, mock_open): self.assertEqual(response.status_code, 200) self.assertDictEqual(response.json(), {'success': True}) - self.assert_airflow_calls(trigger_error=True, secondary_dag_name=self.SECOND_DAG_NAME) + self.assert_airflow_calls(trigger_error=True, dataset_type='GCNV') self._has_expected_gs_calls(mock_open, 'SV', is_second_dag=True, sample_type='WES') self.mock_airflow_logger.warning.assert_not_called() self.mock_airflow_logger.error.assert_called_with(mock.ANY, self.pm_user) errors = [call.args[0] for call in self.mock_airflow_logger.error.call_args_list] for error in errors: - self.assertRegex(error, 'Connection refused by Responses') + self.assertRegex(error, '400 Client Error: Bad Request') dag_json = dag_json.replace('mito_callset.mt', 'sv_callset.vcf').replace( 'WGS', 'WES').replace('MITO', 'GCNV').replace('v01', 'v03') error_message = f"""ERROR triggering internal WES SV loading: {errors[0]} - DAG {self.SECOND_DAG_NAME} should be triggered with following: + DAG LOADING_PIPELINE should be triggered with following: ```{dag_json}``` """ self.mock_slack.assert_called_once_with(SEQR_SLACK_LOADING_NOTIFICATION_CHANNEL, error_message) diff --git a/seqr/views/utils/test_utils.py b/seqr/views/utils/test_utils.py index bc9fe1ffb9..22e16195d1 100644 --- a/seqr/views/utils/test_utils.py +++ b/seqr/views/utils/test_utils.py @@ -568,6 +568,7 @@ def assert_no_extra_anvil_calls(self): MOCK_AIRFLOW_URL = 'http://testairflowserver' +DAG_NAME = 'LOADING_PIPELINE' PROJECT_GUID = 'R0001_1kg' @@ -575,7 +576,7 @@ class AirflowTestCase(AnvilAuthenticationTestCase): ADDITIONAL_REQUEST_COUNT = 0 def setUp(self): - self._dag_url = f'{MOCK_AIRFLOW_URL}/api/v1/dags/{self.DAG_NAME}' + self._dag_url = f'{MOCK_AIRFLOW_URL}/api/v1/dags/{DAG_NAME}' # check dag running state responses.add(responses.GET, f'{self._dag_url}/dagRuns', json={ @@ -591,8 +592,8 @@ def setUp(self): responses.add(responses.POST, f'{self._dag_url}/dagRuns', json={}) # update variables responses.add( - responses.PATCH, f'{MOCK_AIRFLOW_URL}/api/v1/variables/{self.DAG_NAME}', - json={'key': self.DAG_NAME, 'value': 'updated variables'}, + responses.PATCH, f'{MOCK_AIRFLOW_URL}/api/v1/variables/{DAG_NAME}', + json={'key': DAG_NAME, 'value': 'updated variables'}, ) # get task id self.add_dag_tasks_response(['R0006_test']) @@ -625,7 +626,7 @@ def add_dag_tasks_response(self, projects): tasks += [ {'task_id': 'create_dataproc_cluster'}, {'task_id': f'pyspark_compute_project_{project}'}, - {'task_id': f'pyspark_compute_variants_{self.DAG_NAME}'}, + {'task_id': f'pyspark_compute_variants_{DAG_NAME}'}, {'task_id': f'pyspark_export_project_{project}'}, {'task_id': 'scale_dataproc_cluster'}, {'task_id': f'skip_compute_project_subset_{project}'} @@ -634,17 +635,17 @@ def add_dag_tasks_response(self, projects): 'tasks': tasks, 'total_entries': len(tasks), }) - def set_dag_trigger_error_response(self): - responses.replace(responses.GET, f'{self._dag_url}/dagRuns', json={'dag_runs': [{ + def set_dag_trigger_error_response(self, status=200): + responses.replace(responses.GET, f'{self._dag_url}/dagRuns', status=status, json={'dag_runs': [{ 'conf': {}, - 'dag_id': self.DAG_NAME, + 'dag_id': DAG_NAME, 'dag_run_id': 'manual__2022-04-28T11:51:22.735124+00:00', 'end_date': None, 'execution_date': '2022-04-28T11:51:22.735124+00:00', 'external_trigger': True, 'start_date': '2022-04-28T11:51:25.626176+00:00', 'state': 'running'} ]}) - def assert_airflow_calls(self, trigger_error=False, additional_tasks_check=False, secondary_dag_name=None): + def assert_airflow_calls(self, trigger_error=False, additional_tasks_check=False, dataset_type=None): self.mock_airflow_logger.info.assert_not_called() # Test triggering anvil dags @@ -662,15 +663,15 @@ def assert_airflow_calls(self, trigger_error=False, additional_tasks_check=False 'callset_path': f'gs://test_bucket/{dag_variable_overrides["callset_path"]}', 'sample_source': dag_variable_overrides['sample_source'], 'sample_type': dag_variable_overrides['sample_type'], + 'dataset_type': dataset_type or dag_variable_overrides['dataset_type'], 'reference_genome': dag_variable_overrides.get('reference_genome', 'GRCh38'), } - self._assert_airflow_calls(self.DAG_NAME, dag_variables, call_count, secondary_dag_name) + self._assert_airflow_calls(dag_variables, call_count) - def _assert_airflow_calls(self, dag_name, dag_variables, call_count, secondary_dag_name, offset=0): + def _assert_airflow_calls(self, dag_variables, call_count, offset=0): dag_url = self._dag_url # check dag running state - dag_url = self._dag_url.replace(dag_name, secondary_dag_name) if secondary_dag_name else dag_url self.assertEqual(responses.calls[offset].request.url, f'{dag_url}/dagRuns') self.assertEqual(responses.calls[offset].request.method, "GET") @@ -678,10 +679,10 @@ def _assert_airflow_calls(self, dag_name, dag_variables, call_count, secondary_d return # update variables - self.assertEqual(responses.calls[offset+1].request.url, f'{MOCK_AIRFLOW_URL}/api/v1/variables/{dag_name}') + self.assertEqual(responses.calls[offset+1].request.url, f'{MOCK_AIRFLOW_URL}/api/v1/variables/{DAG_NAME}') self.assertEqual(responses.calls[offset+1].request.method, 'PATCH') self.assertDictEqual(json.loads(responses.calls[offset+1].request.body), { - 'key': dag_name, + 'key': DAG_NAME, 'value': json.dumps(dag_variables), }) From bdd9ab4c25db1190a46afc395cbb34cd652b254d Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Wed, 7 Aug 2024 14:32:23 -0400 Subject: [PATCH 552/736] sample_type in sample_data --- hail_search/queries/base.py | 17 +++++++++++------ hail_search/test_utils.py | 8 ++++---- .../check_for_new_samples_from_pipeline.py | 2 +- ...check_for_new_samples_from_pipeline_tests.py | 4 ++-- 4 files changed, 18 insertions(+), 13 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index dfcaf26e23..241896007c 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -288,10 +288,10 @@ def _query_table_annotations(ht, query_table_path): def _parse_sample_data(self, sample_data): families = set() - project_samples = defaultdict(lambda: defaultdict(list)) + project_samples = defaultdict(lambda: defaultdict(lambda: defaultdict(list))) for s in sample_data: families.add(s['family_guid']) - project_samples[s['project_guid']][s['family_guid']].append(s) + project_samples[s['project_guid']][s['family_guid']][s['sample_type']].append(s) num_families = len(families) logger.info(f'Loading {self.DATA_TYPE} data for {num_families} families in {len(project_samples)} projects') @@ -303,7 +303,7 @@ def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, n_ # for variant lookup, project_samples looks like # {: {: True, : True}, : ...} # for variant search, project_samples looks like - # {: {: [, , ...], : ...}, : ...} + # {: {: {: [, , ...], : ...}, : ...}, : ...} first_family_samples = list(project_samples[project_guid].values())[0] if isinstance(first_family_samples, bool): project_ht = ( @@ -311,7 +311,7 @@ def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, n_ self._read_table(f'projects/WGS/{project_guid}.ht', use_ssd_dir=True) ) else: - sample_type = first_family_samples[0]['sample_type'] + sample_type = list(first_family_samples.keys())[0] project_ht = self._read_table(f'projects/{sample_type}/{project_guid}.ht', use_ssd_dir=True) return self._filter_entries_table(project_ht, project_samples[project_guid], **kwargs) @@ -331,7 +331,7 @@ def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, n_ self._read_table(f'projects/WGS/{project_guid}.ht', use_ssd_dir=True) ) else: - sample_type = first_family_samples[0]['sample_type'] + sample_type = list(first_family_samples.keys())[0] project_ht = self._read_table(f'projects/{sample_type}/{project_guid}.ht', use_ssd_dir=True) if project_ht is None: @@ -359,7 +359,7 @@ def import_filtered_table(self, project_samples, num_families, intervals=None, * if num_families == 1: family_sample_data = list(project_samples.values())[0] family_guid = list(family_sample_data.keys())[0] - sample_type = family_sample_data[family_guid][0]['sample_type'] + sample_type = list(family_sample_data[family_guid].keys())[0] family_ht = self._read_table(f'families/{sample_type}/{family_guid}.ht', use_ssd_dir=True) family_ht = family_ht.transmute(family_entries=[family_ht.entries]) family_ht = family_ht.annotate_globals( @@ -415,6 +415,11 @@ def _merge_project_hts(project_hts, n_partitions, include_all_globals=False): def _filter_entries_table(self, ht, sample_data, inheritance_filter=None, quality_filter=None, **kwargs): ht = self._prefilter_entries_table(ht, **kwargs) + # Temporarily flatten sample data for each sample_type into one list of samples + for family_guid, samples_by_sample_type in sample_data.items(): + samples = [s for samples in samples_by_sample_type.values() for s in samples] + sample_data[family_guid] = samples + ht, sorted_family_sample_data = self._add_entry_sample_families(ht, sample_data) passes_quality_filter = self._get_family_passes_quality_filter(quality_filter, ht, **kwargs) diff --git a/hail_search/test_utils.py b/hail_search/test_utils.py index 255182d4af..e2553f4dd7 100644 --- a/hail_search/test_utils.py +++ b/hail_search/test_utils.py @@ -56,15 +56,15 @@ 'sample_id': 'NA20874', 'individual_guid': 'I000009_na20874', 'family_guid': 'F000005_5', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sample_type': 'WES', } ALL_AFFECTED_SAMPLE_DATA['SNV_INDEL'].append(FAMILY_5_SAMPLE) -FAMILY_11_SAMPLE = { +FAMILY_11_SAMPLE_WES = { 'sample_id': 'NA20885', 'individual_guid': 'I000015_na20885', 'family_guid': 'F000011_11', 'project_guid': 'R0003_test', 'affected': 'A', 'sample_type': 'WES', } MULTI_PROJECT_SAMPLE_DATA = deepcopy(FAMILY_2_VARIANT_SAMPLE_DATA) -MULTI_PROJECT_SAMPLE_DATA['SNV_INDEL'].append(FAMILY_11_SAMPLE) +MULTI_PROJECT_SAMPLE_DATA['SNV_INDEL'].append(FAMILY_11_SAMPLE_WES) MULTI_PROJECT_MISSING_SAMPLE_DATA = deepcopy(FAMILY_2_MISSING_SAMPLE_DATA) -MULTI_PROJECT_MISSING_SAMPLE_DATA['SNV_INDEL'].append(FAMILY_11_SAMPLE) +MULTI_PROJECT_MISSING_SAMPLE_DATA['SNV_INDEL'].append(FAMILY_11_SAMPLE_WES) -SV_WGS_SAMPLE_DATA_WITH_SEX = {'SV_WGS': [{'sex': 'M', **FAMILY_11_SAMPLE}, { +SV_WGS_SAMPLE_DATA_WITH_SEX = {'SV_WGS': [{'sex': 'M', **FAMILY_11_SAMPLE_WES, 'sample_type': 'WGS'}, { 'sample_id': 'NA20884', 'individual_guid': 'I000025_na20884', 'family_guid': 'F000011_11', 'project_guid': 'R0003_test', 'affected': 'N', 'sample_type': 'WGS', 'sex': 'M', }, { 'sample_id': 'NA20883', 'individual_guid': 'I000035_na20883', 'family_guid': 'F000011_11', 'project_guid': 'R0003_test', 'affected': 'N', 'sample_type': 'WGS', 'sex': 'F', diff --git a/seqr/management/commands/check_for_new_samples_from_pipeline.py b/seqr/management/commands/check_for_new_samples_from_pipeline.py index 17824d8114..5666046029 100644 --- a/seqr/management/commands/check_for_new_samples_from_pipeline.py +++ b/seqr/management/commands/check_for_new_samples_from_pipeline.py @@ -20,7 +20,7 @@ logger = logging.getLogger(__name__) -GS_PATH_TEMPLATE = 'gs://seqr-hail-search-data/v03/{path}/runs/{version}/' +GS_PATH_TEMPLATE = 'gs://seqr-hail-search-data/v3.1/{path}/runs/{version}/' DATASET_TYPE_MAP = {'GCNV': Sample.DATASET_TYPE_SV_CALLS} USER_EMAIL = 'manage_command' MAX_LOOKUP_VARIANTS = 5000 diff --git a/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py b/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py index e0b891bcdb..af2423c818 100644 --- a/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py +++ b/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py @@ -83,8 +83,8 @@ def _test_success(self, path, metadata, dataset_type, sample_guids, reload_calls call_command('check_for_new_samples_from_pipeline', path, 'auto__2023-08-08') self.mock_subprocess.assert_has_calls([mock.call(command, stdout=-1, stderr=-2, shell=True) for command in [ - f'gsutil ls gs://seqr-hail-search-data/v03/{path}/runs/auto__2023-08-08/_SUCCESS', - f'gsutil cat gs://seqr-hail-search-data/v03/{path}/runs/auto__2023-08-08/metadata.json', + f'gsutil ls gs://seqr-hail-search-data/v3.1/{path}/runs/auto__2023-08-08/_SUCCESS', + f'gsutil cat gs://seqr-hail-search-data/v3.1/{path}/runs/auto__2023-08-08/metadata.json', ]], any_order=True) self.mock_logger.info.assert_has_calls([ From ddac649e37606fa2ac71d83cdf93a6e27d656cc8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 7 Aug 2024 19:33:40 +0000 Subject: [PATCH 553/736] Bump django from 4.2.14 to 4.2.15 Bumps [django](https://github.com/django/django) from 4.2.14 to 4.2.15. - [Commits](https://github.com/django/django/compare/4.2.14...4.2.15) --- updated-dependencies: - dependency-name: django dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements-dev.txt | 3 +-- requirements.txt | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 4a612073b8..990d45182f 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -22,7 +22,7 @@ click==8.1.3 # via pip-tools coverage==5.1 # via -r requirements-dev.in -django==4.2.14 +django==4.2.15 # via # -c requirements.txt # django-appconf @@ -83,4 +83,3 @@ wheel==0.38.4 # The following packages are considered to be unsafe in a requirements file: # pip # setuptools -zipp>=3.19.1 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/requirements.txt b/requirements.txt index 0dae0ae7bb..a3175ce89a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -26,7 +26,7 @@ defusedxml==0.7.1 # via # python3-openid # social-auth-core -django==4.2.14 +django==4.2.15 # via # -r requirements.in # django-anymail @@ -182,4 +182,3 @@ urllib3==1.26.19 # requests whitenoise==6.3.0 # via -r requirements.in -zipp>=3.19.1 # not directly required, pinned by Snyk to avoid a vulnerability From e2ac78c6b10b322140b3dda8d0b1185ad1a07a3b Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Wed, 7 Aug 2024 15:41:42 -0400 Subject: [PATCH 554/736] add project and family tables for R0003_test WES, handle lookup sample data --- .../families/WES/F000011_11.ht/.README.txt.crc | Bin 0 -> 12 bytes .../families/WES/F000011_11.ht/._SUCCESS.crc | Bin 0 -> 8 bytes .../WES/F000011_11.ht/.metadata.json.gz.crc | Bin 0 -> 12 bytes .../SV_WES/families/WES/F000011_11.ht/README.txt | 3 +++ .../SV_WES/families/WES/F000011_11.ht/_SUCCESS | 0 .../F000011_11.ht/globals/.metadata.json.gz.crc | Bin 0 -> 12 bytes .../WES/F000011_11.ht/globals/metadata.json.gz | Bin 0 -> 295 bytes .../WES/F000011_11.ht/globals/parts/.part-0.crc | Bin 0 -> 12 bytes .../WES/F000011_11.ht/globals/parts/part-0 | Bin 0 -> 460 bytes .../.index.crc | Bin 0 -> 12 bytes .../.metadata.json.gz.crc | Bin 0 -> 12 bytes .../index | Bin 0 -> 169 bytes .../metadata.json.gz | Bin 0 -> 162 bytes .../families/WES/F000011_11.ht/metadata.json.gz | Bin 0 -> 337 bytes .../WES/F000011_11.ht/rows/.metadata.json.gz.crc | Bin 0 -> 16 bytes .../WES/F000011_11.ht/rows/metadata.json.gz | Bin 0 -> 639 bytes ...8-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.crc | Bin 0 -> 12 bytes ...0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003 | Bin 0 -> 192 bytes .../projects/WES/R0003_test.ht/.README.txt.crc | Bin 0 -> 12 bytes .../projects/WES/R0003_test.ht/._SUCCESS.crc | Bin 0 -> 8 bytes .../WES/R0003_test.ht/.metadata.json.gz.crc | Bin 0 -> 12 bytes .../SV_WES/projects/WES/R0003_test.ht/README.txt | 3 +++ .../SV_WES/projects/WES/R0003_test.ht/_SUCCESS | 0 .../R0003_test.ht/globals/.metadata.json.gz.crc | Bin 0 -> 12 bytes .../WES/R0003_test.ht/globals/metadata.json.gz | Bin 0 -> 311 bytes .../WES/R0003_test.ht/globals/parts/.part-0.crc | Bin 0 -> 12 bytes .../WES/R0003_test.ht/globals/parts/part-0 | Bin 0 -> 191 bytes .../.index.crc | Bin 0 -> 12 bytes .../.metadata.json.gz.crc | Bin 0 -> 12 bytes .../index | Bin 0 -> 183 bytes .../metadata.json.gz | Bin 0 -> 162 bytes .../projects/WES/R0003_test.ht/metadata.json.gz | Bin 0 -> 375 bytes .../WES/R0003_test.ht/rows/.metadata.json.gz.crc | Bin 0 -> 16 bytes .../WES/R0003_test.ht/rows/metadata.json.gz | Bin 0 -> 644 bytes ...art-0-cbf84037-3354-427a-98a6-b953711ae5bc.crc | Bin 0 -> 12 bytes .../part-0-cbf84037-3354-427a-98a6-b953711ae5bc | Bin 0 -> 200 bytes hail_search/queries/base.py | 5 +++-- hail_search/test_search.py | 4 ++-- 38 files changed, 11 insertions(+), 4 deletions(-) create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/.README.txt.crc create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/._SUCCESS.crc create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/README.txt create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/_SUCCESS create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/globals/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/globals/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/globals/parts/.part-0.crc create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/globals/parts/part-0 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/index/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.idx/.index.crc create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/index/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.idx/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/index/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.idx/index create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/index/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.idx/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/rows/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/rows/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/rows/parts/.part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.crc create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/rows/parts/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/.README.txt.crc create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/._SUCCESS.crc create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/README.txt create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/_SUCCESS create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/globals/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/globals/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/globals/parts/.part-0.crc create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/globals/parts/part-0 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/.index.crc create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/index create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/rows/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/rows/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/rows/parts/.part-0-cbf84037-3354-427a-98a6-b953711ae5bc.crc create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/rows/parts/part-0-cbf84037-3354-427a-98a6-b953711ae5bc diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/.README.txt.crc new file mode 100644 index 0000000000000000000000000000000000000000..a4400373154743338e43096341ff91f638053b5d GIT binary patch literal 12 TcmYc;N@ieSU}CtlVo@ys6sZH= literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/._SUCCESS.crc b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/._SUCCESS.crc new file mode 100644 index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c GIT binary patch literal 8 PcmYc;N@ieSU}69O2$TUk literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/.metadata.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..2817ee0840e52c9da3e3b53b13e797aa07a16f40 GIT binary patch literal 12 TcmYc;N@ieSU}Dfa{?7ye61@Xr literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/README.txt b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/README.txt new file mode 100644 index 0000000000..cd4126895e --- /dev/null +++ b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.109-b71b065e4bb6 + Created at 2023/08/23 14:16:34 \ No newline at end of file diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/_SUCCESS b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/_SUCCESS new file mode 100644 index 0000000000..e69de29bb2 diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/globals/.metadata.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..18ad5aa5ab9630781295c79a0032a005a78bc93d GIT binary patch literal 12 TcmYc;N@ieSU}Bgb8qN#=5v&4< literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/globals/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..19dd3d6727d00995e9771f45114424f2530c21eb GIT binary patch literal 295 zcmV+?0oeW@iwFP!000000F{wZPlGTNh5t)mhlx(33vV(pJ}faA#s`;$bnOM}N+E4A zLBfCcUM8DNUgW8q@0@!mw`GPGz(V$HC0LN_*N3D6F=qiw_t3C#v&EArB?!`4*baf0 zrUJP<0(1dMNM>=w9VFh?!nYYOly1_zR?>yD_}*IHY|x<$x6L+iyr?v!)3B~2_b@>^ zs{8z@n<_f^ktmaoO2%L)NWc$TKYCYZ8Q8N6uj1%xzKBjJVB%eA`O6aNQtWZc$h0Fd zQD11dp5NUr{>D~sH&4qx`_CYLGw|X(+0lcysJQiyN`u2KGJ3hVWVFqXH8(j#Wo;y( tmN}0Q$6^SKJRFn}x|X=b9^o&BqZX5gl#Zrx@2#Uu_YKj;f~!md002<7jDi3F literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/globals/parts/.part-0.crc new file mode 100644 index 0000000000000000000000000000000000000000..14b34f102c45175de6be75fe39cabd9ef2743363 GIT binary patch literal 12 TcmYc;N@ieSU}A`4$}<4~5Do%8 literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/globals/parts/part-0 new file mode 100644 index 0000000000000000000000000000000000000000..a6fab3a930518526a8d53a2dd30f78591ec5a790 GIT binary patch literal 460 zcmWNM&2H0B5QWEN(MrTpoa~Syp}-bHT4*KLJT)W z9S}_dW2u!-AQwy%=!*o75yi^{emakV&~&?SA%hU4V)$*4Av$SeI2Qqpe+8us=t;*= z5<4o-f>VukEI8B)!@GtB&kGv_O231*&g_7+;R1b`07|X7iYKX_ft*o{^$7arFYLUx zIyfnNT^QID-dH(=FD`gR+^pyC0j7`>dOM8B3O4Ng04`+==nw|#5cV@2{fMsGqbzHyZ_T?^u`Swa+h%ttv*|ovlgs9l zY4|i!Uf&=iz5vGjXvH5Uk*%GVAOai0`|5qFv{(~}^;S06FGKTL1t6 literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..e4b22f3bb490f95c55551b2f0b56ed6a65b01250 GIT binary patch literal 337 zcmV-X0j~ZZiwFP!000000F9ARPr@)1h5t*RD;bCo`=&D?z8Dqa3t5)Ca)HLKZMqJ! zEdB3Z#wMsQzIEq(=j`^jWm}~&KcKIaHB8Lr$$Y}u9+bX_EE!EkgZqcKWSmhpd?jY<5zNmx*D5yR(4#`=aKH8nO1!%DhzLufgLaB6#Pm~UHnd<;YLkGO7K7r|r zcsAj7!lgCR`VG)Eh7#xTkej;9L5H5|BneuJM{6~L;RcUCgvb|fJ-k?nl$@CH%ar0g zO^mcEX-j7fja4zxvh$X@qk;0Yz4wo&bU>C4W%mHzTOasHsy+Tc*TEEW;DZYE+_|)M jKSN*eI#_Av{4N{#^)!oA)Q7sa?uhsWG&;e2WC8#HWJjou literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/rows/.metadata.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..01d66fc81577fb38feb4079fdb5d7e5075fe702f GIT binary patch literal 16 XcmYc;N@ieSU}AV_Wbi6bZSocXBvb`1 literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/rows/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..1fd69440433b39f134f72464df9df971618240a8 GIT binary patch literal 639 zcmV-_0)YJ=iwFP!000000NqtxZ`v>v{V#rPU`hBW%iEw?hlJ2psyz%L%e8Mx*5E{r z!^%+p`>w-B$Olrp$AX05+;gwbu}|=xNVWmuk*O5$3gY4C-68?*5{u9m*b#3@woHXg zY8nc%LX^h>xZx0Qe`y?c1x^<2>&9X=iUxS&9>RE{H%*_j3geILe6Vnq`gjGTGo#{2=-R`G?ZcyB;pvdK$!dlI$O3hxrV$yJ@ zhkXklhbsxiN;Quy`LW_4NI{>cF)3rsbQsHOYo4##vv!hmA*K>CBci$dF$t&D8*A5A z{U~X{=Tfq0j)(kzr(dk6K(?QE5ig35KhWB#HdAvz-jt97Re|}7XdnE}(41XVg3?r9 z*IrkCJyva!uL%B-NyFW#Zh8BkaJdatc23NYgem0O3u<8M3~i$gq-cyt))+jO-ubfwb?6QZHIzKc1SYar7X85) Z^UV~4nI`4#*E-SE+Sc&yyb15bjeF4O4sM_&b&hYNBhx6&pJWe(=*lP;-P=70W0LVT zOkNpn4<~BEC*k$aFs2N?daZWdz-PkEED9f2n-<5*7(X_kfvf0(0yIGKG9FNztT%cP f%hkRIa>((V1Vsy)gQ{La%_w06Zq9Ke&vx+#6P6~H literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/.README.txt.crc new file mode 100644 index 0000000000000000000000000000000000000000..a3bb76f1307c1fd446eb806c189c3219d0910f6d GIT binary patch literal 12 TcmYc;N@ieSU}7-)8>I*U5+nlY literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/._SUCCESS.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/._SUCCESS.crc new file mode 100644 index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c GIT binary patch literal 8 PcmYc;N@ieSU}69O2$TUk literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/.metadata.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..dfa05f538157270698fa8e925e319c00b2cfbe24 GIT binary patch literal 12 TcmYc;N@ieSU}DJ9*IEw%5V!)Z literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/README.txt b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/README.txt new file mode 100644 index 0000000000..c7d1359830 --- /dev/null +++ b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.126-ee77707f4fab + Created at 2024/02/05 17:34:17 \ No newline at end of file diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/_SUCCESS b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/_SUCCESS new file mode 100644 index 0000000000..e69de29bb2 diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/globals/.metadata.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..79ea0b74d6e0dad1fda877b28442060d1feb892f GIT binary patch literal 12 TcmYc;N@ieSU}CU*`1Kb66YB&- literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/globals/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..c0e3ea8a8374d355a4419a5c6e78527167606d00 GIT binary patch literal 311 zcmV-70m%LziwFP!000000F_crPs1P>{x4it6P?*&xLH?ECMK)#VACuN-c}r?4TUAO z>3{cST{nv-Il=SA!$(gvtAJ4QY6KV%+xJb{0AG@b><-Won&mj-wE*sMLi@?^)-@oW zE(EH;L&z+Wn6;2vPak#$R)x;N4?IM}jD41PT6}-v z7dN-W|AVhB(_@s(N~NP)7ZSW1c}~wHn0DXv+LUJ#u`ESYg3dh zBqXNrPLTInm?*=p$KK|^dBh^QMZoE(Lm|Vp)i1s&Xc`9gw(pAExBS}cvn$TN*d6~@ zTxgQqWu?CRkK@j>1lc~@7$0R`knQDn=I!5{rIEI~J2aJ#_bgZ``>bl&3h^DL=lX4{ qRbx&*&STyd(@@OF%An9am8Y??cj2LpT+1YRfi49Buxl9@85jV@u1+KX literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/.index.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/.index.crc new file mode 100644 index 0000000000000000000000000000000000000000..c196068cba2e66a1fddf969adfc94b8eeb0789b3 GIT binary patch literal 12 TcmYc;N@ieSU}9MN=2k8M6&VB@ literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/.metadata.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..41cee7d3f8b97bb44926145da125f70696fb803b GIT binary patch literal 12 TcmYc;N@ieSU}8vjFrEzn5ZwZ> literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/index b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/index new file mode 100644 index 0000000000000000000000000000000000000000..c7ce1f380c5c8ab6571026bd9fd73dfd6571973d GIT binary patch literal 183 zcmXS5U|=W*VvVi(e-+AQnHfY_85md?w3G8Q@{3C1jVuif^^!A+4E547^AdC7lXFrN z^GXZiJ^g~?;|_w!U=~cthB&4ZQ zO8@trU8~fmy*1zczVC9EHY`^f(-ZnqS;NF4o<&cb9YN`v$flEM@)#{15aT$W#`!#l zJ>$${jvkG~GWDoa2jNKYO(i72p5jC8kSd$Yp*d)~2QAjYS28q}%OGGW))=-VDRbEI z915lD^iZj+6wls6eP1e}aGG38B?lWHfxDLfU@f*M@Cr;wRkq{ao`Xp=!nBju5B(k$-8(pVHR;|jkG=k8hhyBJf2ch6)EV@Q=KG1Yw=>OMljsqc_g%a z_1VL#jYz1{(kv})Qrf~-4rTW~*GC!nmQ;KGzpsPI zbO1HYZ`E;-&hBpyczSF!husZ5nY85wl_hBanP(0;LP VDXsT+|5ayge*yXr;d#&k000EBxiA0# literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/rows/.metadata.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..59afd1815900ecc8e854c51d64b74abfbd3ec6c8 GIT binary patch literal 16 XcmYc;N@ieSU}A{e5D<{Gbn!j_BJTxE literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/rows/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..49ebd019d137568e1fa2f3836c68170369f91424 GIT binary patch literal 644 zcmV-~0(<=*iwFP!000000NqtxkD5Rf{V(&nU{`Qr?;Q{n&}6rd>x0oV=YE`f56qDWwguvmnegEa{HLGyiyd&A7=*sUfp|-@XEJ0$ z(aIF5h`L%^Rpk{J&_O8&$1=~uQ|Wm@F^B(foI0cwd*C|+vH3aBZpkW+l0<+RmY z)6taCQgJDw(lEi3Qo%Y60+5YuK2FD$Jsh8p48k{I2jfsYux&Fk(;pNOYU?&M%t|bGvV`b6QyauPoi90Eqd1x%H|@KeTsN{z z^iJQLpzK%bVL}3Ok;NnX62#+_h zRMZ|>R)=q-)d|pZ5#{>kNOyELUr-HG%kAo(K=`c`yAAryruWmwg6i6RO$`N)QXh?& emqlGrVN;z&Bbn}$DJ8K*G{8UAr^)M(2mk<_v^>55 literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/rows/parts/.part-0-cbf84037-3354-427a-98a6-b953711ae5bc.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/rows/parts/.part-0-cbf84037-3354-427a-98a6-b953711ae5bc.crc new file mode 100644 index 0000000000000000000000000000000000000000..2e373bfe9be3af3373c2e0917fc127efb4e786ac GIT binary patch literal 12 TcmYc;N@ieSU}DIB$7~1y5B0REp+o(?dImroG2P4(|tZN>NUN1~*a8VieHer)5h9h~5|04Lwm3acBuvhtdPI0Obgd)nI9ljk_3CLa|)8*p*>|`vDIC zU_|AvRm(^aVGg2L(FNLY%1g>3SR@F+XF)LtoDxYjm{1J>00000001bpFa00@0RR9z C Date: Wed, 7 Aug 2024 16:04:37 -0400 Subject: [PATCH 555/736] add WES test fixture table --- .../projects/WES/R0003_test.ht/.README.txt.crc | Bin 0 -> 12 bytes .../projects/WES/R0003_test.ht}/._SUCCESS.crc | Bin .../WES/R0003_test.ht/.metadata.json.gz.crc | Bin 0 -> 12 bytes .../projects/WES/R0003_test.ht/README.txt | 3 +++ .../projects/WES/R0003_test.ht}/_SUCCESS | 0 .../R0003_test.ht/globals/.metadata.json.gz.crc | Bin 0 -> 12 bytes .../WES/R0003_test.ht/globals/metadata.json.gz | Bin 0 -> 324 bytes .../WES/R0003_test.ht/globals/parts/.part-0.crc | Bin 0 -> 12 bytes .../WES/R0003_test.ht/globals/parts/part-0 | Bin 0 -> 179 bytes .../.index.crc | Bin 0 -> 12 bytes .../.metadata.json.gz.crc | Bin 0 -> 12 bytes .../index | Bin 0 -> 104 bytes .../metadata.json.gz | Bin 0 -> 185 bytes .../projects/WES/R0003_test.ht/metadata.json.gz | Bin 0 -> 370 bytes .../WES/R0003_test.ht/rows/.metadata.json.gz.crc | Bin 0 -> 16 bytes .../WES/R0003_test.ht/rows/metadata.json.gz | Bin 0 -> 641 bytes ...art-0-a35eed71-c848-4567-8937-364bcaecaf47.crc | Bin 0 -> 12 bytes .../part-0-a35eed71-c848-4567-8937-364bcaecaf47 | Bin 0 -> 107 bytes .../families/WES/F000011_11.ht/.README.txt.crc | Bin 12 -> 0 bytes .../WES/F000011_11.ht/.metadata.json.gz.crc | Bin 12 -> 0 bytes .../SV_WES/families/WES/F000011_11.ht/README.txt | 3 --- .../F000011_11.ht/globals/.metadata.json.gz.crc | Bin 12 -> 0 bytes .../WES/F000011_11.ht/globals/metadata.json.gz | Bin 295 -> 0 bytes .../WES/F000011_11.ht/globals/parts/.part-0.crc | Bin 12 -> 0 bytes .../WES/F000011_11.ht/globals/parts/part-0 | Bin 460 -> 0 bytes .../.index.crc | Bin 12 -> 0 bytes .../.metadata.json.gz.crc | Bin 12 -> 0 bytes .../index | Bin 169 -> 0 bytes .../metadata.json.gz | Bin 162 -> 0 bytes .../families/WES/F000011_11.ht/metadata.json.gz | Bin 337 -> 0 bytes .../WES/F000011_11.ht/rows/.metadata.json.gz.crc | Bin 16 -> 0 bytes .../WES/F000011_11.ht/rows/metadata.json.gz | Bin 639 -> 0 bytes ...8-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.crc | Bin 12 -> 0 bytes ...0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003 | Bin 192 -> 0 bytes .../projects/WES/R0003_test.ht/.README.txt.crc | Bin 12 -> 0 bytes .../projects/WES/R0003_test.ht/._SUCCESS.crc | Bin 8 -> 0 bytes .../WES/R0003_test.ht/.metadata.json.gz.crc | Bin 12 -> 0 bytes .../SV_WES/projects/WES/R0003_test.ht/README.txt | 3 --- .../SV_WES/projects/WES/R0003_test.ht/_SUCCESS | 0 .../R0003_test.ht/globals/.metadata.json.gz.crc | Bin 12 -> 0 bytes .../WES/R0003_test.ht/globals/metadata.json.gz | Bin 311 -> 0 bytes .../WES/R0003_test.ht/globals/parts/.part-0.crc | Bin 12 -> 0 bytes .../WES/R0003_test.ht/globals/parts/part-0 | Bin 191 -> 0 bytes .../.index.crc | Bin 12 -> 0 bytes .../.metadata.json.gz.crc | Bin 12 -> 0 bytes .../index | Bin 183 -> 0 bytes .../metadata.json.gz | Bin 162 -> 0 bytes .../projects/WES/R0003_test.ht/metadata.json.gz | Bin 375 -> 0 bytes .../WES/R0003_test.ht/rows/.metadata.json.gz.crc | Bin 16 -> 0 bytes .../WES/R0003_test.ht/rows/metadata.json.gz | Bin 644 -> 0 bytes ...art-0-cbf84037-3354-427a-98a6-b953711ae5bc.crc | Bin 12 -> 0 bytes .../part-0-cbf84037-3354-427a-98a6-b953711ae5bc | Bin 200 -> 0 bytes 52 files changed, 3 insertions(+), 6 deletions(-) create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/.README.txt.crc rename hail_search/fixtures/GRCh38/{SV_WES/families/WES/F000011_11.ht => SNV_INDEL/projects/WES/R0003_test.ht}/._SUCCESS.crc (100%) create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/README.txt rename hail_search/fixtures/GRCh38/{SV_WES/families/WES/F000011_11.ht => SNV_INDEL/projects/WES/R0003_test.ht}/_SUCCESS (100%) create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/globals/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/globals/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/globals/parts/.part-0.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/globals/parts/part-0 create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/index/part-0-a35eed71-c848-4567-8937-364bcaecaf47.idx/.index.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/index/part-0-a35eed71-c848-4567-8937-364bcaecaf47.idx/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/index/part-0-a35eed71-c848-4567-8937-364bcaecaf47.idx/index create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/index/part-0-a35eed71-c848-4567-8937-364bcaecaf47.idx/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/rows/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/rows/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/rows/parts/.part-0-a35eed71-c848-4567-8937-364bcaecaf47.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/rows/parts/part-0-a35eed71-c848-4567-8937-364bcaecaf47 delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/.README.txt.crc delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/.metadata.json.gz.crc delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/README.txt delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/globals/.metadata.json.gz.crc delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/globals/metadata.json.gz delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/globals/parts/.part-0.crc delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/globals/parts/part-0 delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/index/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.idx/.index.crc delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/index/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.idx/.metadata.json.gz.crc delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/index/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.idx/index delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/index/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.idx/metadata.json.gz delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/metadata.json.gz delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/rows/.metadata.json.gz.crc delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/rows/metadata.json.gz delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/rows/parts/.part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003.crc delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/rows/parts/part-0-278-0-0-7dd50455-5c6c-48a0-7033-11afafa5d003 delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/.README.txt.crc delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/._SUCCESS.crc delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/.metadata.json.gz.crc delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/README.txt delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/_SUCCESS delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/globals/.metadata.json.gz.crc delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/globals/metadata.json.gz delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/globals/parts/.part-0.crc delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/globals/parts/part-0 delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/.index.crc delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/.metadata.json.gz.crc delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/index delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/metadata.json.gz delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/metadata.json.gz delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/rows/.metadata.json.gz.crc delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/rows/metadata.json.gz delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/rows/parts/.part-0-cbf84037-3354-427a-98a6-b953711ae5bc.crc delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/rows/parts/part-0-cbf84037-3354-427a-98a6-b953711ae5bc diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/.README.txt.crc new file mode 100644 index 0000000000000000000000000000000000000000..c98011f4e3ff63d8724013e8358026b0a4edec18 GIT binary patch literal 12 TcmYc;N@ieSU}7lO%=iuf5uO7& literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/._SUCCESS.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/._SUCCESS.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/._SUCCESS.crc rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/._SUCCESS.crc diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/.metadata.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..288ebfdfcfc2c6cb560890e8a032d0d617063e8e GIT binary patch literal 12 TcmYc;N@ieSU}6xl)$IlV4{QQN literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/README.txt b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/README.txt new file mode 100644 index 0000000000..0ad192a49b --- /dev/null +++ b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.128-eead8100a1c1 + Created at 2024/08/07 16:01:38 \ No newline at end of file diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/_SUCCESS b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/_SUCCESS similarity index 100% rename from hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/_SUCCESS rename to hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/_SUCCESS diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/globals/.metadata.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..acf12b18f0fe52a544cc6f65bdbbc7e722339f38 GIT binary patch literal 12 TcmYc;N@ieSU}88TyV?~15`qH^ literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/globals/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..a9459e7d89b84db892bdd4fb907f3b19a5a2899a GIT binary patch literal 324 zcmV-K0lWSmiwFP!000000F_crPlGTR{x7`_6P?*&dJ~5y6O&;)NC@fL7pyCVly(Uc z{=09-V1g%mYM-w@{pg5hB@jwpjQ|5;|GtZB;A;|)>Od?Z4KgS zB2Wb$LS_|(tby1X$8G0o!7Ma%6+Ifm+5}0dnQ7CYVP&l#vuL6t8sw~$s?7=~#nm%e zF6`i(WnMSL)^toeG39_RQu?&_ZBLiBi=Hvq1R<%5dBXvl_ZxCY^`hKP@-Zox35V?u zF3-U%?(`)J@753Dl?n*|luG=PgsS34jG2iJ9*pHT3bvcg{r0czqp{*_{ihE*11p!# z-VZ!P&5V7Pcv}1?#4m1ciT^KOd!`ErE2o90I?`AqF&0gr#W0bYD<|;T&J?}^OjzVK W9_f7=v(_};)Bga@3U%hL0ssIragzlA literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/globals/parts/.part-0.crc new file mode 100644 index 0000000000000000000000000000000000000000..ec695bda7b936238f5deb256986cf2fbf454eda2 GIT binary patch literal 12 TcmYc;N@ieSU}BiN< literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/globals/parts/part-0 new file mode 100644 index 0000000000000000000000000000000000000000..adfbcf32cc05d70ab4168ed1833857ff6e6ea88e GIT binary patch literal 179 zcmbQoz`(E-h&8tA|5e!9%fj%=;P>Sa~S3JIyeZM!8q zxAPn>w>>R(@{) literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/index/part-0-a35eed71-c848-4567-8937-364bcaecaf47.idx/index b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/index/part-0-a35eed71-c848-4567-8937-364bcaecaf47.idx/index new file mode 100644 index 0000000000000000000000000000000000000000..55e960e931bb88a950f33a3e8604a4529fbc0713 GIT binary patch literal 104 zcmcC#U|=u;VvVi(e-(@-G6I>*3@phRMTU#~nV21&of#b&ApGNjOiYeIt|Ekc_}FeC aCdLp(cc1_Rg9ywBk%bHlj386c%m4soJQa2T literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/index/part-0-a35eed71-c848-4567-8937-364bcaecaf47.idx/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/index/part-0-a35eed71-c848-4567-8937-364bcaecaf47.idx/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..505696c221554258212a8ddf2a204174be3577a1 GIT binary patch literal 185 zcmV;q07m~GiwFP!0000009B5`3c@fDME_+^3ON*8V$DqiJt!zDUc^IMw@ol4l5D|J z`tNSNd07T#=Isp7Scf-^$p#Nf%B&~_R8jldSl@1SO(B9RvXvBI(6)v^Ed;!)O18L%y|B%V^d+ZaaT!8gY n{G2oy4*WE`F$6j`O+ua_mrNq2RvjT%hH?A?EnNng&j0`b1YS+q literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/projects/WES/R0003_test.ht/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..9af33596ce4d961c44e1554796a05f1393749df9 GIT binary patch literal 370 zcmV-&0ge72iwFP!000000F9DSZ-Oush5t*Rwxlxxi!*(5n3-jJaBB8q2&v_&%?c%L znIVM#z6$~dpY}r9bH8(Xb1xM+N(J#R7%O!~*z6_n=aiheR5y`KC;r6q=T87UnS0YI z_k>3%F>nL}nE)FbP;t)P8R46Xh~o#0cV!M(*)^Byr1XI+wsocut}60`JFsjEZjX}8 zP7$>{%{v#nYV^ocsml0N$i%XRF}xh;u5>k7wv?hzq9ZV>=?|1_ue2yEGkool)0aACTdFFbuXPfAz)QGZ*-D@ygzm=5F>(m+MYj z?O2G9)!1i^gA+Qw~)*dsSHnZG(FT)<$<{;X23_Ixg8dZJyzpsVSlyl>x z!{x_97hMlsz`8GmZRI|!=l4O>vhX0GNHZ^n&34!JgC`Fa3sROi&RR|g91iS_Z zIc6$D{_i_Z$R|uHm3mVyAUQrC@9R8(4@A%n5T87V0A3-O{JfiPfwhT67B8?T{xgYU zwoCAyMiE4ikU1`~H~?l6iGRpD%=TLdhQ$JAfeGQt)o4Hym@2tr>X5ecs}U9p#aBLN zaY5zY+Qtc2JQf&v5Xxz5>p0pU^`)fy=R9+6g*;Lq&n?gmkM;`?N^&f`Iws^FmFeRj zem2JH5B$-66jSB7*4rcDpwymn_28JPIl)12pl1dTL2a6P)tD znBa9fj(1x2D^WpOdSM+p1G;HeYV$%9+>qGt@Q}w+FeOA!RIM?7ef=#>H_-@anBw;) zCaVa9TqbJ4gV&5W++ak%grY06^=_SR%kkVLqrhnBcGL61?+Fzv7{ogfnEC@r6m~GiveN}#WC-I_ zww5-^$q^>LePg?xE)O6$zdsyeN}u2}Cm@11HgbC1w(Hur^x>Idv b9*Wq=3^5UH>`iFVvVi(e-)B_nHWNt85vlTGl~os`7<#)Iy*BuGB5%m10Ta1dk&uCflN$} z0w3(zL=GR@EyTna!swp-z+Q%jft|sCMerEo%mX`|MSVkrq72x0fu@20*kq6i0NGp? A_y7O^ literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/.README.txt.crc deleted file mode 100644 index a4400373154743338e43096341ff91f638053b5d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}CtlVo@ys6sZH= diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/.metadata.json.gz.crc deleted file mode 100644 index 2817ee0840e52c9da3e3b53b13e797aa07a16f40..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}Dfa{?7ye61@Xr diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/README.txt b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/README.txt deleted file mode 100644 index cd4126895e..0000000000 --- a/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/README.txt +++ /dev/null @@ -1,3 +0,0 @@ -This folder comprises a Hail (www.hail.is) native Table or MatrixTable. - Written with version 0.2.109-b71b065e4bb6 - Created at 2023/08/23 14:16:34 \ No newline at end of file diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/globals/.metadata.json.gz.crc deleted file mode 100644 index 18ad5aa5ab9630781295c79a0032a005a78bc93d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}Bgb8qN#=5v&4< diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/globals/metadata.json.gz deleted file mode 100644 index 19dd3d6727d00995e9771f45114424f2530c21eb..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 295 zcmV+?0oeW@iwFP!000000F{wZPlGTNh5t)mhlx(33vV(pJ}faA#s`;$bnOM}N+E4A zLBfCcUM8DNUgW8q@0@!mw`GPGz(V$HC0LN_*N3D6F=qiw_t3C#v&EArB?!`4*baf0 zrUJP<0(1dMNM>=w9VFh?!nYYOly1_zR?>yD_}*IHY|x<$x6L+iyr?v!)3B~2_b@>^ zs{8z@n<_f^ktmaoO2%L)NWc$TKYCYZ8Q8N6uj1%xzKBjJVB%eA`O6aNQtWZc$h0Fd zQD11dp5NUr{>D~sH&4qx`_CYLGw|X(+0lcysJQiyN`u2KGJ3hVWVFqXH8(j#Wo;y( tmN}0Q$6^SKJRFn}x|X=b9^o&BqZX5gl#Zrx@2#Uu_YKj;f~!md002<7jDi3F diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/globals/parts/.part-0.crc deleted file mode 100644 index 14b34f102c45175de6be75fe39cabd9ef2743363..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}A`4$}<4~5Do%8 diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/globals/parts/part-0 deleted file mode 100644 index a6fab3a930518526a8d53a2dd30f78591ec5a790..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 460 zcmWNM&2H0B5QWEN(MrTpoa~Syp}-bHT4*KLJT)W z9S}_dW2u!-AQwy%=!*o75yi^{emakV&~&?SA%hU4V)$*4Av$SeI2Qqpe+8us=t;*= z5<4o-f>VukEI8B)!@GtB&kGv_O231*&g_7+;R1b`07|X7iYKX_ft*o{^$7arFYLUx zIyfnNT^QID-dH(=FD`gR+^pyC0j7`>dOM8B3O4Ng04`+==nw|#5cV@2{fMsGqbzHyZ_T?^u`Swa+h%ttv*|ovlgs9l zY4|i!Uf&=iz5vGjXvH5Uk*%GVAOai0`|5qFv{(~}^;S06FGKTL1t6 diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/metadata.json.gz deleted file mode 100644 index e4b22f3bb490f95c55551b2f0b56ed6a65b01250..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 337 zcmV-X0j~ZZiwFP!000000F9ARPr@)1h5t*RD;bCo`=&D?z8Dqa3t5)Ca)HLKZMqJ! zEdB3Z#wMsQzIEq(=j`^jWm}~&KcKIaHB8Lr$$Y}u9+bX_EE!EkgZqcKWSmhpd?jY<5zNmx*D5yR(4#`=aKH8nO1!%DhzLufgLaB6#Pm~UHnd<;YLkGO7K7r|r zcsAj7!lgCR`VG)Eh7#xTkej;9L5H5|BneuJM{6~L;RcUCgvb|fJ-k?nl$@CH%ar0g zO^mcEX-j7fja4zxvh$X@qk;0Yz4wo&bU>C4W%mHzTOasHsy+Tc*TEEW;DZYE+_|)M jKSN*eI#_Av{4N{#^)!oA)Q7sa?uhsWG&;e2WC8#HWJjou diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/rows/.metadata.json.gz.crc deleted file mode 100644 index 01d66fc81577fb38feb4079fdb5d7e5075fe702f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16 XcmYc;N@ieSU}AV_Wbi6bZSocXBvb`1 diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/families/WES/F000011_11.ht/rows/metadata.json.gz deleted file mode 100644 index 1fd69440433b39f134f72464df9df971618240a8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 639 zcmV-_0)YJ=iwFP!000000NqtxZ`v>v{V#rPU`hBW%iEw?hlJ2psyz%L%e8Mx*5E{r z!^%+p`>w-B$Olrp$AX05+;gwbu}|=xNVWmuk*O5$3gY4C-68?*5{u9m*b#3@woHXg zY8nc%LX^h>xZx0Qe`y?c1x^<2>&9X=iUxS&9>RE{H%*_j3geILe6Vnq`gjGTGo#{2=-R`G?ZcyB;pvdK$!dlI$O3hxrV$yJ@ zhkXklhbsxiN;Quy`LW_4NI{>cF)3rsbQsHOYo4##vv!hmA*K>CBci$dF$t&D8*A5A z{U~X{=Tfq0j)(kzr(dk6K(?QE5ig35KhWB#HdAvz-jt97Re|}7XdnE}(41XVg3?r9 z*IrkCJyva!uL%B-NyFW#Zh8BkaJdatc23NYgem0O3u<8M3~i$gq-cyt))+jO-ubfwb?6QZHIzKc1SYar7X85) Z^UV~4nI`4#*E-SE+Sc&yyb15bjeF4O4sM_&b&hYNBhx6&pJWe(=*lP;-P=70W0LVT zOkNpn4<~BEC*k$aFs2N?daZWdz-PkEED9f2n-<5*7(X_kfvf0(0yIGKG9FNztT%cP f%hkRIa>((V1Vsy)gQ{La%_w06Zq9Ke&vx+#6P6~H diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/.README.txt.crc deleted file mode 100644 index a3bb76f1307c1fd446eb806c189c3219d0910f6d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}7-)8>I*U5+nlY diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/._SUCCESS.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/._SUCCESS.crc deleted file mode 100644 index 3b7b044936a890cd8d651d349a752d819d71d22c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8 PcmYc;N@ieSU}69O2$TUk diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/.metadata.json.gz.crc deleted file mode 100644 index dfa05f538157270698fa8e925e319c00b2cfbe24..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}DJ9*IEw%5V!)Z diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/README.txt b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/README.txt deleted file mode 100644 index c7d1359830..0000000000 --- a/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/README.txt +++ /dev/null @@ -1,3 +0,0 @@ -This folder comprises a Hail (www.hail.is) native Table or MatrixTable. - Written with version 0.2.126-ee77707f4fab - Created at 2024/02/05 17:34:17 \ No newline at end of file diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/_SUCCESS b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/_SUCCESS deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/globals/.metadata.json.gz.crc deleted file mode 100644 index 79ea0b74d6e0dad1fda877b28442060d1feb892f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}CU*`1Kb66YB&- diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/globals/metadata.json.gz deleted file mode 100644 index c0e3ea8a8374d355a4419a5c6e78527167606d00..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 311 zcmV-70m%LziwFP!000000F_crPs1P>{x4it6P?*&xLH?ECMK)#VACuN-c}r?4TUAO z>3{cST{nv-Il=SA!$(gvtAJ4QY6KV%+xJb{0AG@b><-Won&mj-wE*sMLi@?^)-@oW zE(EH;L&z+Wn6;2vPak#$R)x;N4?IM}jD41PT6}-v z7dN-W|AVhB(_@s(N~NP)7ZSW1c}~wHn0DXv+LUJ#u`ESYg3dh zBqXNrPLTInm?*=p$KK|^dBh^QMZoE(Lm|Vp)i1s&Xc`9gw(pAExBS}cvn$TN*d6~@ zTxgQqWu?CRkK@j>1lc~@7$0R`knQDn=I!5{rIEI~J2aJ#_bgZ``>bl&3h^DL=lX4{ qRbx&*&STyd(@@OF%An9am8Y??cj2LpT+1YRfi49Buxl9@85jV@u1+KX diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/.index.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/.index.crc deleted file mode 100644 index c196068cba2e66a1fddf969adfc94b8eeb0789b3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}9MN=2k8M6&VB@ diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/.metadata.json.gz.crc deleted file mode 100644 index 41cee7d3f8b97bb44926145da125f70696fb803b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}8vjFrEzn5ZwZ> diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/index b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/index/part-0-cbf84037-3354-427a-98a6-b953711ae5bc.idx/index deleted file mode 100644 index c7ce1f380c5c8ab6571026bd9fd73dfd6571973d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 183 zcmXS5U|=W*VvVi(e-+AQnHfY_85md?w3G8Q@{3C1jVuif^^!A+4E547^AdC7lXFrN z^GXZiJ^g~?;|_w!U=~cthB&4ZQ zO8@trU8~fmy*1zczVC9EHY`^f(-ZnqS;NF4o<&cb9YN`v$flEM@)#{15aT$W#`!#l zJ>$${jvkG~GWDoa2jNKYO(i72p5jC8kSd$Yp*d)~2QAjYS28q}%OGGW))=-VDRbEI z915lD^iZj+6wls6eP1e}aGG38B?lWHfxDLfU@f*M@Cr;wRkq{ao`Xp=!nBju5B(k$-8(pVHR;|jkG=k8hhyBJf2ch6)EV@Q=KG1Yw=>OMljsqc_g%a z_1VL#jYz1{(kv})Qrf~-4rTW~*GC!nmQ;KGzpsPI zbO1HYZ`E;-&hBpyczSF!husZ5nY85wl_hBanP(0;LP VDXsT+|5ayge*yXr;d#&k000EBxiA0# diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/rows/.metadata.json.gz.crc deleted file mode 100644 index 59afd1815900ecc8e854c51d64b74abfbd3ec6c8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16 XcmYc;N@ieSU}A{e5D<{Gbn!j_BJTxE diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/rows/metadata.json.gz deleted file mode 100644 index 49ebd019d137568e1fa2f3836c68170369f91424..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 644 zcmV-~0(<=*iwFP!000000NqtxkD5Rf{V(&nU{`Qr?;Q{n&}6rd>x0oV=YE`f56qDWwguvmnegEa{HLGyiyd&A7=*sUfp|-@XEJ0$ z(aIF5h`L%^Rpk{J&_O8&$1=~uQ|Wm@F^B(foI0cwd*C|+vH3aBZpkW+l0<+RmY z)6taCQgJDw(lEi3Qo%Y60+5YuK2FD$Jsh8p48k{I2jfsYux&Fk(;pNOYU?&M%t|bGvV`b6QyauPoi90Eqd1x%H|@KeTsN{z z^iJQLpzK%bVL}3Ok;NnX62#+_h zRMZ|>R)=q-)d|pZ5#{>kNOyELUr-HG%kAo(K=`c`yAAryruWmwg6i6RO$`N)QXh?& emqlGrVN;z&Bbn}$DJ8K*G{8UAr^)M(2mk<_v^>55 diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/rows/parts/.part-0-cbf84037-3354-427a-98a6-b953711ae5bc.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/WES/R0003_test.ht/rows/parts/.part-0-cbf84037-3354-427a-98a6-b953711ae5bc.crc deleted file mode 100644 index 2e373bfe9be3af3373c2e0917fc127efb4e786ac..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}DIB$7~1y5B0REp+o(?dImroG2P4(|tZN>NUN1~*a8VieHer)5h9h~5|04Lwm3acBuvhtdPI0Obgd)nI9ljk_3CLa|)8*p*>|`vDIC zU_|AvRm(^aVGg2L(FNLY%1g>3SR@F+XF)LtoDxYjm{1J>00000001bpFa00@0RR9z C Date: Wed, 7 Aug 2024 16:43:37 -0400 Subject: [PATCH 556/736] move hailtop to lookup only --- hail_search/queries/base.py | 32 +++++++++----------------------- hail_search/queries/mito.py | 12 +++++++++++- hail_search/test_search.py | 2 +- hail_search/test_utils.py | 2 +- 4 files changed, 22 insertions(+), 26 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index abe6cb582c..5fa5d0fddd 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -1,7 +1,6 @@ from aiohttp.web import HTTPBadRequest, HTTPNotFound from collections import defaultdict, namedtuple import hail as hl -import hailtop.fs as hfs import logging import os @@ -267,9 +266,6 @@ def _get_table_path(cls, path, use_ssd_dir=False): return f'{SSD_DATASETS_DIR if use_ssd_dir else DATASETS_DIR}/{cls.GENOME_VERSION}/{cls.DATA_TYPE}/{path}' def _read_table(self, path, drop_globals=None, use_ssd_dir=False, skip_missing_field=None): - if not hfs.exists(self._get_table_path(path, use_ssd_dir=use_ssd_dir)): - return None - table_path = self._get_table_path(path, use_ssd_dir=use_ssd_dir) if 'variant_ht' in self._load_table_kwargs: ht = self._query_table_annotations(self._load_table_kwargs['variant_ht'], table_path) @@ -301,18 +297,12 @@ def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, n_ if len(project_samples) == 1: project_guid = list(project_samples.keys())[0] # for variant lookup, project_samples looks like - # {: {: True, : True}, : ...} + # {: {: {: True}, {: {: True}}, : ...} # for variant search, project_samples looks like # {: {: {: [, , ...], : ...}, : ...}, : ...} first_family_samples = list(project_samples[project_guid].values())[0] - if isinstance(first_family_samples, bool): - project_ht = ( - self._read_table(f'projects/WES/{project_guid}.ht', use_ssd_dir=True) or - self._read_table(f'projects/WGS/{project_guid}.ht', use_ssd_dir=True) - ) - else: - sample_type = list(first_family_samples.keys())[0] - project_ht = self._read_table(f'projects/{sample_type}/{project_guid}.ht', use_ssd_dir=True) + sample_type = list(first_family_samples.keys())[0] + project_ht = self._read_table(f'projects/{sample_type}/{project_guid}.ht', use_ssd_dir=True) return self._filter_entries_table(project_ht, project_samples[project_guid], **kwargs) # Need to chunk tables or else evaluating table globals throws LineTooLong exception @@ -325,14 +315,8 @@ def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, n_ sample_data = {} for project_guid, project_sample_data in project_samples.items(): first_family_samples = list(project_sample_data.values())[0] - if isinstance(first_family_samples, bool): - project_ht = ( - self._read_table(f'projects/WES/{project_guid}.ht', use_ssd_dir=True) or - self._read_table(f'projects/WGS/{project_guid}.ht', use_ssd_dir=True) - ) - else: - sample_type = list(first_family_samples.keys())[0] - project_ht = self._read_table(f'projects/{sample_type}/{project_guid}.ht', use_ssd_dir=True) + sample_type = list(first_family_samples.keys())[0] + project_ht = self._read_table(f'projects/{sample_type}/{project_guid}.ht', use_ssd_dir=True) if project_ht is None: continue @@ -415,11 +399,13 @@ def _merge_project_hts(project_hts, n_partitions, include_all_globals=False): def _filter_entries_table(self, ht, sample_data, inheritance_filter=None, quality_filter=None, **kwargs): ht = self._prefilter_entries_table(ht, **kwargs) - # Temporarily flatten sample data for each sample_type into one list of samples + # Temporarily reset sample_data for family_guid, samples_by_sample_type in sample_data.items(): - if isinstance(samples_by_sample_type, dict): + if isinstance(list(samples_by_sample_type.values())[0], list): samples = [s for samples in samples_by_sample_type.values() for s in samples] sample_data[family_guid] = samples + else: + sample_data[family_guid] = True ht, sorted_family_sample_data = self._add_entry_sample_families(ht, sample_data) diff --git a/hail_search/queries/mito.py b/hail_search/queries/mito.py index 17f5d65c57..5be79c829b 100644 --- a/hail_search/queries/mito.py +++ b/hail_search/queries/mito.py @@ -1,5 +1,6 @@ from aiohttp.web import HTTPNotFound import hail as hl +import hailtop.fs as hfs import logging from hail_search.constants import ABSENT_PATH_SORT_OFFSET, CLINVAR_KEY, CLINVAR_MITO_KEY, CLINVAR_LIKELY_PATH_FILTER, CLINVAR_PATH_FILTER, \ @@ -308,7 +309,7 @@ def _gene_rank_sort(cls, r, gene_ranks): def _add_project_lookup_data(self, ht, annotation_fields, *args, **kwargs): # Get all the project-families for the looked up variant formatted as a dict of dicts: - # {: {: True, : True}, : ...} + # {: {: {: True}, {: {: True}}, : ...} lookup_ht = self._read_table('lookup.ht', use_ssd_dir=True, skip_missing_field='project_stats') if lookup_ht is None: raise HTTPNotFound() @@ -325,6 +326,15 @@ def _add_project_lookup_data(self, ht, annotation_fields, *args, **kwargs): hl.dict(family_indices.map(lambda j: (lookup_ht.project_families[project_guid][j], True))), ))), 1), )[0] + + for project_guid, families in variant_projects.items(): + if hfs.exists(self._get_table_path(f'projects/WES/{project_guid}.ht', use_ssd_dir=True)): + sample_type = 'WES' + else: + sample_type = 'WGS' + for family_guid in families: + families[family_guid] = {sample_type: True} + # Variant can be present in the lookup table with only ref calls, so is still not present in any projects if not variant_projects: raise HTTPNotFound() diff --git a/hail_search/test_search.py b/hail_search/test_search.py index 0d1cf69878..9e0f64033e 100644 --- a/hail_search/test_search.py +++ b/hail_search/test_search.py @@ -28,7 +28,7 @@ 'familyGuids': ['F000011_11'], 'genotypes': { 'I000015_na20885': { - 'sampleId': 'NA20885', 'sampleType': 'WGS', 'individualGuid': 'I000015_na20885', 'familyGuid': 'F000011_11', + 'sampleId': 'NA20885', 'sampleType': 'WES', 'individualGuid': 'I000015_na20885', 'familyGuid': 'F000011_11', 'numAlt': 1, 'dp': 8, 'gq': 14, 'ab': 0.875, } }, diff --git a/hail_search/test_utils.py b/hail_search/test_utils.py index e2553f4dd7..ead298ea18 100644 --- a/hail_search/test_utils.py +++ b/hail_search/test_utils.py @@ -402,7 +402,7 @@ {k: v for k, v in g.items() if k != 'individualGuid'} for g in VARIANT1['genotypes'].values() ], key=lambda x: x['sampleId'], reverse=True), 'F000011_11': [{ - 'sampleId': 'NA20885', 'sampleType': 'WGS', 'familyGuid': 'F000011_11', + 'sampleId': 'NA20885', 'sampleType': 'WES', 'familyGuid': 'F000011_11', 'numAlt': 2, 'dp': 6, 'gq': 16, 'ab': 1.0, }], } From 6fa48e585259aea6bc6c0dc8fe70bccbe8cdb9cb Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Wed, 7 Aug 2024 16:56:41 -0400 Subject: [PATCH 557/736] test --- seqr/views/apis/variant_search_api_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seqr/views/apis/variant_search_api_tests.py b/seqr/views/apis/variant_search_api_tests.py index 0cf449d4f0..3acd45aa89 100644 --- a/seqr/views/apis/variant_search_api_tests.py +++ b/seqr/views/apis/variant_search_api_tests.py @@ -809,7 +809,7 @@ def test_variant_lookup(self, mock_variant_lookup): 'I0_F0_1-10439-AC-A': {'ab': 0.0, 'dp': 60, 'gq': 20, 'numAlt': 0, 'sampleType': 'WES'}, 'I1_F0_1-10439-AC-A': {'ab': 0.0, 'dp': 24, 'gq': 0, 'numAlt': 0, 'sampleType': 'WES'}, 'I2_F0_1-10439-AC-A': {'ab': 0.5, 'dp': 10, 'gq': 99, 'numAlt': 1, 'sampleType': 'WES'}, - 'I0_F1_1-10439-AC-A': {'ab': 1.0, 'dp': 6, 'gq': 16, 'numAlt': 2, 'sampleType': 'WGS'}, + 'I0_F1_1-10439-AC-A': {'ab': 1.0, 'dp': 6, 'gq': 16, 'numAlt': 2, 'sampleType': 'WES'}, }, } del expected_variant['familyGenotypes'] From af42441d284eb1da68c762c73520cfc24908ada0 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Thu, 8 Aug 2024 11:44:27 -0400 Subject: [PATCH 558/736] os instead of hailtop exists --- hail_search/queries/mito.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hail_search/queries/mito.py b/hail_search/queries/mito.py index 5be79c829b..08c1af4bd6 100644 --- a/hail_search/queries/mito.py +++ b/hail_search/queries/mito.py @@ -1,6 +1,7 @@ +import os + from aiohttp.web import HTTPNotFound import hail as hl -import hailtop.fs as hfs import logging from hail_search.constants import ABSENT_PATH_SORT_OFFSET, CLINVAR_KEY, CLINVAR_MITO_KEY, CLINVAR_LIKELY_PATH_FILTER, CLINVAR_PATH_FILTER, \ @@ -328,7 +329,7 @@ def _add_project_lookup_data(self, ht, annotation_fields, *args, **kwargs): )[0] for project_guid, families in variant_projects.items(): - if hfs.exists(self._get_table_path(f'projects/WES/{project_guid}.ht', use_ssd_dir=True)): + if os.path.exists(self._get_table_path(f'projects/WES/{project_guid}.ht')): sample_type = 'WES' else: sample_type = 'WGS' From 30c6a813e605f9f53f327f2dac74295f6f63eb07 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Thu, 8 Aug 2024 11:53:41 -0400 Subject: [PATCH 559/736] Update base.py --- hail_search/queries/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 5fa5d0fddd..5588bc64dd 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -399,7 +399,7 @@ def _merge_project_hts(project_hts, n_partitions, include_all_globals=False): def _filter_entries_table(self, ht, sample_data, inheritance_filter=None, quality_filter=None, **kwargs): ht = self._prefilter_entries_table(ht, **kwargs) - # Temporarily reset sample_data + # Temporarily reset sample_data until full blended eS/GS support is added for family_guid, samples_by_sample_type in sample_data.items(): if isinstance(list(samples_by_sample_type.values())[0], list): samples = [s for samples in samples_by_sample_type.values() for s in samples] From eaf3623808bfe3ebaf3d0ad133de0413e10889f3 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 8 Aug 2024 12:15:35 -0400 Subject: [PATCH 560/736] super-interval prefiltering --- hail_search/queries/base.py | 12 +++++++++--- hail_search/queries/snv_indel_37.py | 3 ++- seqr/utils/search/hail_search_utils.py | 6 ++++-- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index fb8565dcde..0308a58fdb 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -630,8 +630,10 @@ def _parse_intervals(self, intervals, gene_ids=None, **kwargs): raw_intervals = intervals if self._should_add_chr_prefix(): + # TODO handle [] notation? intervals = [ - f'[chr{interval.replace("[", "")}' if interval.startswith('[') else f'chr{interval}' + #f'[chr{interval.replace("[", "")}' if interval.startswith('[') else f'chr{interval}' + [f'chr{interval[0]}', *interval[1:]] for interval in (intervals or []) ] @@ -640,10 +642,14 @@ def _parse_intervals(self, intervals, gene_ids=None, **kwargs): intervals = (intervals or []) + [reference_genome.x_contigs[0]] if len(intervals) > MAX_GENE_INTERVALS and len(intervals) == len(gene_ids or []): - return [] + super_intervals = defaultdict(lambda: (1e9, 0)) + for chrom, start, end in intervals: + super_intervals[chrom] = (min(super_intervals[chrom][0], start), max(super_intervals[chrom][1], end)) + intervals = [(chrom, start, end) for chrom, (start, end) in super_intervals.items()] parsed_intervals = [ - hl.eval(hl.parse_locus_interval(interval, reference_genome=self.GENOME_VERSION, invalid_missing=True)) + #hl.eval(hl.parse_locus_interval(interval, reference_genome=self.GENOME_VERSION, invalid_missing=True)) + hl.eval(hl.locus_interval(*interval, reference_genome=self.GENOME_VERSION, invalid_missing=True)) for interval in intervals ] invalid_intervals = [raw_intervals[i] for i, interval in enumerate(parsed_intervals) if interval is None] diff --git a/hail_search/queries/snv_indel_37.py b/hail_search/queries/snv_indel_37.py index 3fb3cc5562..1511ef26b4 100644 --- a/hail_search/queries/snv_indel_37.py +++ b/hail_search/queries/snv_indel_37.py @@ -75,7 +75,8 @@ class SnvIndelHailTableQuery37(MitoHailTableQuery): def _prefilter_entries_table(self, ht, *args, **kwargs): ht = super()._prefilter_entries_table(ht, *args, **kwargs) - if 'variant_ht' not in self._load_table_kwargs and not self._load_table_kwargs.get('_filter_intervals'): + if True: + #if 'variant_ht' not in self._load_table_kwargs and not self._load_table_kwargs.get('_filter_intervals'): af_ht = self._get_loaded_filter_ht( GNOMAD_GENOMES_FIELD, 'high_af_variants.ht', self._get_gnomad_af_prefilter, **kwargs) if af_ht: diff --git a/seqr/utils/search/hail_search_utils.py b/seqr/utils/search/hail_search_utils.py index 8a5002e078..74d07754f7 100644 --- a/seqr/utils/search/hail_search_utils.py +++ b/seqr/utils/search/hail_search_utils.py @@ -192,7 +192,8 @@ def _parse_location_search(search): for gene in genes.values() ] parsed_intervals = [_format_interval(**interval) for interval in intervals or []] + [ - '{chrom}:{start}-{end}'.format(**gene) for gene in gene_coords] + # '{chrom}:{start}-{end}'.format(**gene) for gene in gene_coords] + [gene['chrom'], gene['start'], gene['end']] for gene in gene_coords] if Sample.DATASET_TYPE_MITO_CALLS in search['sample_data'] and not exclude_locations: chromosomes = {gene['chrom'] for gene in gene_coords + (intervals or [])} if 'M' not in chromosomes: @@ -214,7 +215,8 @@ def _format_interval(chrom=None, start=None, end=None, offset=None, **kwargs): offset_pos = int((end - start) * offset) start = max(start - offset_pos, MIN_POS) end = min(end + offset_pos, MAX_POS) - return f'{chrom}:{start}-{end}' + return (chrom, start, end) + #return f'{chrom}:{start}-{end}' def _validate_expected_families(results, expected_families): From a4fd275bc0c3d87708ec333d590b413178f1ee8e Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 8 Aug 2024 16:56:15 -0400 Subject: [PATCH 561/736] clean up --- hail_search/queries/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 0308a58fdb..e4772ee7e4 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -648,7 +648,6 @@ def _parse_intervals(self, intervals, gene_ids=None, **kwargs): intervals = [(chrom, start, end) for chrom, (start, end) in super_intervals.items()] parsed_intervals = [ - #hl.eval(hl.parse_locus_interval(interval, reference_genome=self.GENOME_VERSION, invalid_missing=True)) hl.eval(hl.locus_interval(*interval, reference_genome=self.GENOME_VERSION, invalid_missing=True)) for interval in intervals ] From a5b97215e30ecad23611c4a96b9d18e763651e8a Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 8 Aug 2024 16:59:42 -0400 Subject: [PATCH 562/736] fix tests --- hail_search/test_utils.py | 2 +- seqr/utils/search/hail_search_utils_tests.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hail_search/test_utils.py b/hail_search/test_utils.py index dda9b2c502..9c612a0e54 100644 --- a/hail_search/test_utils.py +++ b/hail_search/test_utils.py @@ -944,7 +944,7 @@ LOCATION_SEARCH = { 'gene_ids': ['ENSG00000177000', 'ENSG00000097046'], - 'intervals': ['2:1234-5678', '7:1-11100', '1:11785723-11806455', '1:91500851-91525764'], + 'intervals': [['2', 1234, 5678], ['7', 1, 11100], ['1', 11785723, 11806455], ['1', 91500851, 91525764]], } EXCLUDE_LOCATION_SEARCH = {'intervals': LOCATION_SEARCH['intervals'], 'exclude_intervals': True} VARIANT_ID_SEARCH = {'variant_ids': [['1', 10439, 'AC', 'A'], ['1', 91511686, 'TCA', 'G']], 'rs_ids': []} diff --git a/seqr/utils/search/hail_search_utils_tests.py b/seqr/utils/search/hail_search_utils_tests.py index 65b25977cc..1c2f3c58ec 100644 --- a/seqr/utils/search/hail_search_utils_tests.py +++ b/seqr/utils/search/hail_search_utils_tests.py @@ -182,13 +182,13 @@ def test_query_variants(self): self.search_model.search['locus'] = {'rawItems': 'M:10-100 '} query_variants(self.results_model, user=self.user) - self._test_expected_search_call(intervals=['M:10-100'], sample_data=EXPECTED_MITO_SAMPLE_DATA) + self._test_expected_search_call(intervals=[['M', 10, 100]], sample_data=EXPECTED_MITO_SAMPLE_DATA) self.search_model.search['locus']['rawItems'] += raw_locus query_variants(self.results_model, user=self.user) self._test_expected_search_call( gene_ids=LOCATION_SEARCH['gene_ids'], - intervals=['M:10-100'] + LOCATION_SEARCH['intervals'], + intervals=[['M', 10, 100]] + LOCATION_SEARCH['intervals'], sample_data={**MULTI_PROJECT_SAMPLE_DATA, **sv_sample_data, **EXPECTED_MITO_SAMPLE_DATA}, ) From c7bd0e029393fe4f49175c3079fe2a0b6060b5a6 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 8 Aug 2024 17:11:24 -0400 Subject: [PATCH 563/736] use af prefiter for large gene lists --- hail_search/queries/base.py | 2 +- hail_search/queries/snv_indel_37.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index e4772ee7e4..3eda0cab55 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -259,7 +259,7 @@ def _load_filtered_table(self, sample_data, intervals=None, annotations=None, an parsed_intervals = self._parse_intervals(intervals, **kwargs) parsed_annotations = self._parse_annotations(annotations, annotations_secondary, **kwargs) self.import_filtered_table( - *self._parse_sample_data(sample_data), parsed_intervals=parsed_intervals, parsed_annotations=parsed_annotations, **kwargs) + *self._parse_sample_data(sample_data), parsed_intervals=parsed_intervals, raw_intervals=intervals, parsed_annotations=parsed_annotations, **kwargs) @classmethod def _get_table_path(cls, path, use_ssd_dir=False): diff --git a/hail_search/queries/snv_indel_37.py b/hail_search/queries/snv_indel_37.py index 1511ef26b4..ee863b0839 100644 --- a/hail_search/queries/snv_indel_37.py +++ b/hail_search/queries/snv_indel_37.py @@ -73,10 +73,11 @@ class SnvIndelHailTableQuery37(MitoHailTableQuery): ('is_gt_10_percent', 0.1), ]) - def _prefilter_entries_table(self, ht, *args, **kwargs): + def _prefilter_entries_table(self, ht, *args, raw_intervals=None, **kwargs): ht = super()._prefilter_entries_table(ht, *args, **kwargs) - if True: - #if 'variant_ht' not in self._load_table_kwargs and not self._load_table_kwargs.get('_filter_intervals'): + load_table_intervals = self._load_table_kwargs.get('_intervals') or [] + no_interval_prefilter = not load_table_intervals or len(raw_intervals) > len(load_table_intervals) + if 'variant_ht' not in self._load_table_kwargs and no_interval_prefilter: af_ht = self._get_loaded_filter_ht( GNOMAD_GENOMES_FIELD, 'high_af_variants.ht', self._get_gnomad_af_prefilter, **kwargs) if af_ht: From a87104355db2eaef9c3d70503e62804f85d3b606 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 9 Aug 2024 11:03:06 -0400 Subject: [PATCH 564/736] bump changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e730f2820d..57532e5186 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ ## dev +## 8/9/24 +* Update directory structure for search backend + ## 8/2/24 * Adds index_file_path to IGV Sample model (REQUIRES DB MIGRATION) From 4393b3f72245db99a2c165347e4a86da39d90675 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 9 Aug 2024 11:33:51 -0400 Subject: [PATCH 565/736] fix unit tests --- hail_search/queries/base.py | 22 ++++++++++------------ hail_search/queries/snv_indel_37.py | 2 +- hail_search/test_search.py | 10 +++++----- 3 files changed, 16 insertions(+), 18 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 3eda0cab55..496f1400e1 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -630,16 +630,7 @@ def _parse_intervals(self, intervals, gene_ids=None, **kwargs): raw_intervals = intervals if self._should_add_chr_prefix(): - # TODO handle [] notation? - intervals = [ - #f'[chr{interval.replace("[", "")}' if interval.startswith('[') else f'chr{interval}' - [f'chr{interval[0]}', *interval[1:]] - for interval in (intervals or []) - ] - - if is_x_linked: - reference_genome = hl.get_reference(self.GENOME_VERSION) - intervals = (intervals or []) + [reference_genome.x_contigs[0]] + intervals = [[f'chr{interval[0]}', *interval[1:]] for interval in (intervals or [])] if len(intervals) > MAX_GENE_INTERVALS and len(intervals) == len(gene_ids or []): super_intervals = defaultdict(lambda: (1e9, 0)) @@ -649,11 +640,18 @@ def _parse_intervals(self, intervals, gene_ids=None, **kwargs): parsed_intervals = [ hl.eval(hl.locus_interval(*interval, reference_genome=self.GENOME_VERSION, invalid_missing=True)) - for interval in intervals + for interval in (intervals or []) ] invalid_intervals = [raw_intervals[i] for i, interval in enumerate(parsed_intervals) if interval is None] if invalid_intervals: - raise HTTPBadRequest(reason=f'Invalid intervals: {", ".join(invalid_intervals)}') + error_interval = ', '.join([f'{chrom}:{start}-{end}' for chrom, start, end in invalid_intervals]) + raise HTTPBadRequest(reason=f'Invalid intervals: {error_interval}') + + if is_x_linked: + reference_genome = hl.get_reference(self.GENOME_VERSION) + parsed_intervals.append( + hl.eval(hl.parse_locus_interval(reference_genome.x_contigs[0], reference_genome=self.GENOME_VERSION)) + ) return parsed_intervals diff --git a/hail_search/queries/snv_indel_37.py b/hail_search/queries/snv_indel_37.py index ee863b0839..3addc9ded2 100644 --- a/hail_search/queries/snv_indel_37.py +++ b/hail_search/queries/snv_indel_37.py @@ -76,7 +76,7 @@ class SnvIndelHailTableQuery37(MitoHailTableQuery): def _prefilter_entries_table(self, ht, *args, raw_intervals=None, **kwargs): ht = super()._prefilter_entries_table(ht, *args, **kwargs) load_table_intervals = self._load_table_kwargs.get('_intervals') or [] - no_interval_prefilter = not load_table_intervals or len(raw_intervals) > len(load_table_intervals) + no_interval_prefilter = not load_table_intervals or len(raw_intervals or []) > len(load_table_intervals) if 'variant_ht' not in self._load_table_kwargs and no_interval_prefilter: af_ht = self._get_loaded_filter_ht( GNOMAD_GENOMES_FIELD, 'high_af_variants.ht', self._get_gnomad_af_prefilter, **kwargs) diff --git a/hail_search/test_search.py b/hail_search/test_search.py index d42883b674..3b8d37153c 100644 --- a/hail_search/test_search.py +++ b/hail_search/test_search.py @@ -602,9 +602,9 @@ async def test_location_search(self): ) await self._assert_expected_search( - [GRCH37_VARIANT], intervals=['7:143268894-143271480'], genome_version='GRCh37', sample_data=FAMILY_2_VARIANT_SAMPLE_DATA) + [GRCH37_VARIANT], intervals=[['7', 143268894, 143271480]], genome_version='GRCh37', sample_data=FAMILY_2_VARIANT_SAMPLE_DATA) - sv_intervals = ['1:9310023-9380264', '17:38717636-38724781'] + sv_intervals = [['1', 9310023, 9380264], ['17', 38717636, 38724781]] await self._assert_expected_search( [GCNV_VARIANT3, GCNV_VARIANT4], intervals=sv_intervals, gene_ids=['ENSG00000275023'], omit_sample_type='SNV_INDEL', ) @@ -651,7 +651,7 @@ async def test_location_search(self): ) # For gene search, return SVs annotated in gene even if they fall outside the gene interval - nearest_tss_gene_intervals = ['1:9292894-9369532'] + nearest_tss_gene_intervals = [['1', 9292894, 9369532]] await self._assert_expected_search( [SV_VARIANT1], sample_data=SV_WGS_SAMPLE_DATA, intervals=nearest_tss_gene_intervals, ) @@ -1074,12 +1074,12 @@ async def test_search_errors(self): self.assertEqual(reason, 'The following samples are available in seqr but missing the loaded data: NA19675_1, NA19678') search_body = get_hail_search_body( - intervals=LOCATION_SEARCH['intervals'] + ['1:1-99999999999'], omit_sample_type='SV_WES', + intervals=LOCATION_SEARCH['intervals'] + [['1', 1, 999999999]], omit_sample_type='SV_WES', ) async with self.client.request('POST', '/search', json=search_body) as resp: self.assertEqual(resp.status, 400) reason = resp.reason - self.assertEqual(reason, 'Invalid intervals: 1:1-99999999999') + self.assertEqual(reason, 'Invalid intervals: 1:1-999999999') async def test_sort(self): await self._assert_expected_search( From 7d8a8c353d109b3a398fc085395643b01302779b Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 9 Aug 2024 11:42:40 -0400 Subject: [PATCH 566/736] clean up --- seqr/utils/search/hail_search_utils.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/seqr/utils/search/hail_search_utils.py b/seqr/utils/search/hail_search_utils.py index ae0be19f77..774e21ee9b 100644 --- a/seqr/utils/search/hail_search_utils.py +++ b/seqr/utils/search/hail_search_utils.py @@ -192,7 +192,6 @@ def _parse_location_search(search): for gene in genes.values() ] parsed_intervals = [_format_interval(**interval) for interval in intervals or []] + [ - # '{chrom}:{start}-{end}'.format(**gene) for gene in gene_coords] [gene['chrom'], gene['start'], gene['end']] for gene in gene_coords] if Sample.DATASET_TYPE_MITO_CALLS in search['sample_data'] and not exclude_locations: chromosomes = {gene['chrom'] for gene in gene_coords + (intervals or [])} @@ -215,8 +214,7 @@ def _format_interval(chrom=None, start=None, end=None, offset=None, **kwargs): offset_pos = int((end - start) * offset) start = max(start - offset_pos, MIN_POS) end = min(end + offset_pos, MAX_POS) - return (chrom, start, end) - #return f'{chrom}:{start}-{end}' + return chrom, start, end def _validate_expected_families(results, expected_families): From 509d1d6eee83abdd603991794131b7abc09b72cf Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 9 Aug 2024 12:20:38 -0400 Subject: [PATCH 567/736] cluster intervals --- hail_search/queries/base.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index dee34af8e5..9fc709e45a 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -647,10 +647,18 @@ def _parse_intervals(self, intervals, gene_ids=None, **kwargs): intervals = [[f'chr{interval[0]}', *interval[1:]] for interval in (intervals or [])] if len(intervals) > MAX_GENE_INTERVALS and len(intervals) == len(gene_ids or []): - super_intervals = defaultdict(lambda: (1e9, 0)) - for chrom, start, end in intervals: - super_intervals[chrom] = (min(super_intervals[chrom][0], start), max(super_intervals[chrom][1], end)) - intervals = [(chrom, start, end) for chrom, (start, end) in super_intervals.items()] + intervals = sorted(intervals) + distance = 100000 + while len(intervals) > MAX_GENE_INTERVALS: + merged_intervals = [intervals[0]] + for chrom, start, end in intervals[1:]: + prev_chrom, prev_start, prev_end = merged_intervals[-1] + if chrom == prev_chrom and start - prev_end < distance: + merged_intervals[-1] = (chrom, prev_start, max(prev_end, end)) + else: + merged_intervals.append((chrom, start, end)) + intervals = merged_intervals + distance += 100000 parsed_intervals = [ hl.eval(hl.locus_interval(*interval, reference_genome=self.GENOME_VERSION, invalid_missing=True)) From 3931445d338ca165148f0267ee08da5facf77297 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 9 Aug 2024 14:13:47 -0400 Subject: [PATCH 568/736] test large gene list search --- .github/workflows/hail-search-unit-tests.yaml | 1 + hail_search/queries/base.py | 2 +- hail_search/test_search.py | 6 ++++++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/hail-search-unit-tests.yaml b/.github/workflows/hail-search-unit-tests.yaml index 016e4e4382..eab73e2ebe 100644 --- a/.github/workflows/hail-search-unit-tests.yaml +++ b/.github/workflows/hail-search-unit-tests.yaml @@ -29,6 +29,7 @@ jobs: run: | export DATASETS_DIR=./hail_search/fixtures export ONT_ENABLED=true + export MAX_GENE_INTERVALS=3 export MACHINE_MEM=24 export JAVA_OPTS_XSS=16M coverage run --source="./hail_search" --omit="./hail_search/__main__.py","./hail_search/test_utils.py" -m pytest hail_search/ diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index dee34af8e5..63f7f62782 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -14,7 +14,7 @@ # Number of filtered genes at which pre-filtering a table by gene-intervals does not improve performance # Estimated based on behavior for several representative gene lists -MAX_GENE_INTERVALS = 100 +MAX_GENE_INTERVALS = int(os.environ.get('MAX_GENE_INTERVALS', 100)) # Optimal number of entry table partitions, balancing parallelization with partition overhead # Experimentally determined based on compound het search performance: diff --git a/hail_search/test_search.py b/hail_search/test_search.py index 5293935e6e..6e9cc1ab90 100644 --- a/hail_search/test_search.py +++ b/hail_search/test_search.py @@ -601,6 +601,12 @@ async def test_location_search(self): [MULTI_FAMILY_VARIANT, VARIANT4], omit_data_type='SV_WES', **LOCATION_SEARCH, ) + # Test "large" gene list search + await self._assert_expected_search( + [VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], omit_data_type='SV_WES', intervals=LOCATION_SEARCH['intervals'], + gene_ids=LOCATION_SEARCH['gene_ids'] + ['ENSG00000277258', 'ENSG00000275023'], + ) + await self._assert_expected_search( [GRCH37_VARIANT], intervals=[['7', 143268894, 143271480]], genome_version='GRCh37', sample_data=FAMILY_2_VARIANT_SAMPLE_DATA) From 94e6a8e37b3bf830a753ce96e8c027dbca921593 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 9 Aug 2024 18:25:56 +0000 Subject: [PATCH 569/736] Bump aiohttp from 3.9.4 to 3.10.2 in /hail_search Bumps [aiohttp](https://github.com/aio-libs/aiohttp) from 3.9.4 to 3.10.2. - [Release notes](https://github.com/aio-libs/aiohttp/releases) - [Changelog](https://github.com/aio-libs/aiohttp/blob/master/CHANGES.rst) - [Commits](https://github.com/aio-libs/aiohttp/compare/v3.9.4...v3.10.2) --- updated-dependencies: - dependency-name: aiohttp dependency-type: indirect ... Signed-off-by: dependabot[bot] --- hail_search/requirements-test.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hail_search/requirements-test.txt b/hail_search/requirements-test.txt index 508c8f0d05..413d43db69 100644 --- a/hail_search/requirements-test.txt +++ b/hail_search/requirements-test.txt @@ -4,7 +4,9 @@ # # pip-compile hail_search/requirements-test.in # -aiohttp==3.9.4 +aiohappyeyeballs==2.3.5 + # via aiohttp +aiohttp==3.10.2 # via pytest-aiohttp aiosignal==1.3.1 # via aiohttp From 77abd6a08a25ce196b26207012dcdad49567ba94 Mon Sep 17 00:00:00 2001 From: Cas Simons Date: Sun, 11 Aug 2024 21:03:43 +1200 Subject: [PATCH 570/736] Update talos import --- seqr/views/apis/summary_data_api.py | 14 +++---- seqr/views/utils/orm_to_json_utils.py | 2 +- .../components/ExternalAnalysis.jsx | 4 +- .../SummaryData/components/SavedVariants.jsx | 4 +- .../panel/variants/FamilyVariantTags.jsx | 20 ++++++--- ui/shared/utils/constants.js | 41 ++++++++++++------- 6 files changed, 52 insertions(+), 33 deletions(-) diff --git a/seqr/views/apis/summary_data_api.py b/seqr/views/apis/summary_data_api.py index dbf944bd28..1dc09c7219 100644 --- a/seqr/views/apis/summary_data_api.py +++ b/seqr/views/apis/summary_data_api.py @@ -568,7 +568,7 @@ def _load_aip_full_report_data(data: dict, user: User): Version of _load_aip_data that ingests a full AIP report rather than the cut down "seqr" format. - - Adds both the AIP-permissive and AIP-restrictive tags + - Adds both the Talos-permissive and Talos-restrictive tags depending on the presence of HPO matches in the variant. - Adds the First Seen metadata field to the tags. @@ -629,11 +629,11 @@ def _load_aip_full_report_data(data: dict, user: User): saved_variant_map.update(new_variants_from_search) # Add the aip_permissive tag to all variants - aip_tag_type = VariantTagType.objects.get(name='AIP-permissive', project=None) + aip_tag_type = VariantTagType.objects.get(name='Talos-permissive', project=None) num_new, num_updated = _cpg_add_aip_tags_to_saved_variants(aip_tag_type, saved_variant_map, family_variant_data, category_map, user, restrictive=False) # Add the aip_restrictive tag to qualifying variants - aip_restrictive_tag_type = VariantTagType.objects.get(name='AIP-restrictive', project=None) + aip_restrictive_tag_type = VariantTagType.objects.get(name='Talos-restrictive', project=None) num_new_restrictive, num_updated_restrictive = _cpg_add_aip_tags_to_saved_variants(aip_restrictive_tag_type, saved_variant_map, family_variant_data, category_map, user, restrictive=True) summary_message = f'Loaded {num_new} new ({num_new_restrictive} restrictive) and {num_updated} updated ({num_updated_restrictive} restrictive) AIP tags for {len(family_id_map)} families' @@ -672,14 +672,10 @@ def _cpg_add_aip_tags_to_saved_variants(aip_tag_type, saved_variant_map, family_ # Copy selected metadata fields from the AIP results to the tag metadata. metadata = {} - for k in ['flags', 'independent', 'labels', 'panels', 'phenotypes', 'reasons', 'support_vars']: + for k in ['flags', 'independent', 'labels', 'panels', 'phenotypes', 'reasons', 'support_vars', 'phenotype_labels', + 'date_of_phenotype_match', 'evidence_last_updated', 'first_tagged']: metadata[k] = variant_result[k] - if restrictive: - metadata['first_tagged'] = variant_result.get('first_seen_restrictive', variant_result['first_seen']) - else: - metadata['first_tagged'] = variant_result['first_seen'] - # Add the categories using the date of ingest as the date. metadata['categories'] = {category: {'name': category_map[category], 'date': today} for category in variant_result['categories']} diff --git a/seqr/views/utils/orm_to_json_utils.py b/seqr/views/utils/orm_to_json_utils.py index 60c7e65336..3e684ec9c1 100644 --- a/seqr/views/utils/orm_to_json_utils.py +++ b/seqr/views/utils/orm_to_json_utils.py @@ -437,7 +437,7 @@ def _format_functional_tags(tags): return tags -AIP_TAG_TYPES = ['AIP', 'AIP-permissive', 'AIP-restrictive'] +AIP_TAG_TYPES = ['AIP', 'Talos-permissive', 'Talos-restrictive'] GREGOR_FINDING_TAG_TYPE = 'GREGoR Finding' STRUCTURED_METADATA_TAG_TYPES = AIP_TAG_TYPES + [GREGOR_FINDING_TAG_TYPE,] def _format_variant_tags(tags): diff --git a/ui/pages/SummaryData/components/ExternalAnalysis.jsx b/ui/pages/SummaryData/components/ExternalAnalysis.jsx index f3f511534e..ab7eb5cc4e 100644 --- a/ui/pages/SummaryData/components/ExternalAnalysis.jsx +++ b/ui/pages/SummaryData/components/ExternalAnalysis.jsx @@ -17,7 +17,7 @@ const UPLOAD_FIELDS = [ component: Select, options: [ ...FAMILY_ANALYSED_BY_DATA_TYPES.map(([value, text]) => ({ value, text })), - { value: 'AIP' }, { value: 'CPG: Full AIP report' }, + { value: 'AIP' }, { value: 'CPG: Full Talos report' }, ], validate: validators.required, }, @@ -29,7 +29,7 @@ const UPLOAD_FIELDS = [ Drag-drop or click here to upload analysed families

- File should include a "Project" and a "Family" column OR be valid AIP JSON + File should include a "Project" and a "Family" column OR be valid AIP/Talos JSON
), validate: validateUploadedFile, diff --git a/ui/pages/SummaryData/components/SavedVariants.jsx b/ui/pages/SummaryData/components/SavedVariants.jsx index b9a97ac936..63ffadaf7d 100644 --- a/ui/pages/SummaryData/components/SavedVariants.jsx +++ b/ui/pages/SummaryData/components/SavedVariants.jsx @@ -51,8 +51,8 @@ const TAG_OPTIONS = [ 'Confident AR one hit', 'Analyst high priority', 'AIP', - 'AIP-permissive', - 'AIP-restrictive', + 'Talos-permissive', + 'Talos-restrictive', 'seqr MME (old)', 'Submit to Clinvar', 'Share with KOMP', diff --git a/ui/shared/components/panel/variants/FamilyVariantTags.jsx b/ui/shared/components/panel/variants/FamilyVariantTags.jsx index 75a6f6d20d..9cb46ddde6 100644 --- a/ui/shared/components/panel/variants/FamilyVariantTags.jsx +++ b/ui/shared/components/panel/variants/FamilyVariantTags.jsx @@ -91,7 +91,7 @@ const aipHpoList = (panels) => { return (
- Phenotype Matches: + Gene Panel Matches: {Object.entries(panels).map(([matchClass, matches]) => { if (matches.matches === 0) { return null @@ -105,9 +105,6 @@ const aipHpoList = (panels) => { case 'forced': label = 'Cohort Panel' break - case 'gene_level': - label = 'Gene Specific Match' - break default: label = '' } @@ -132,7 +129,7 @@ export const taggedByPopup = (tag, title) => (trigger, hideMetadata) => ( position="top right" size="tiny" trigger={trigger} - header={title || (tag.aipMetadata ? 'AIP results' : 'Tagged by')} + header={title || (tag.aipMetadata ? 'Talos results' : 'Tagged by')} hoverable flowing content={ @@ -144,6 +141,16 @@ export const taggedByPopup = (tag, title) => (trigger, hideMetadata) => ( {tag.aipMetadata.first_tagged}
+
+ Evidence Updated: + + {tag.aipMetadata.evidence_last_updated} +
+
+ Phenotype match first identified: + + {tag.aipMetadata.date_of_phenotype_match} +
Categories: {Object.entries(tag.aipMetadata.categories).map(aipCategoryRow)} @@ -166,6 +173,9 @@ export const taggedByPopup = (tag, title) => (trigger, hideMetadata) => ( {tag.aipMetadata.labels && ( aipHpoList(tag.aipMetadata.panels) )} + {tag.aipMetadata.labels && ( + aipMetaList('gene-hpo', 'Matched Gene Phenotypes', tag.aipMetadata.phenotype_labels) + )}
) : `${tag.createdBy || 'unknown user'}${tag.lastModifiedDate ? ` on ${new Date(tag.lastModifiedDate).toLocaleDateString()}` : ''}`} {tag.metadata && !hideMetadata && ( diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js index 707edbdaf7..3b09b0c426 100644 --- a/ui/shared/utils/constants.js +++ b/ui/shared/utils/constants.js @@ -1064,8 +1064,9 @@ const SORT_BY_EIGEN = 'EIGEN' const SORT_BY_MPC = 'MPC' const SORT_BY_PRIMATE_AI = 'PRIMATE_AI' const SORT_BY_TAGGED_DATE = 'TAGGED_DATE' -const SORT_BY_AIP_DATE = 'AIP_CATEGORY_DATE' -const SORT_BY_AIP_FIRST_TAGGED = 'AIP_FIRST_TAGGED' +const SORT_BY_TALOS_DATE = 'TALOS_CATEGORY_DATE' +const SORT_BY_TALOS_FIRST_TAGGED = 'TALOS_FIRST_TAGGED' +const SORT_BY_TALOS_PHENO_DATE = 'TALOS_PHENO_DATE' const SORT_BY_SIZE = 'SIZE' export const getPermissionedHgmdClass = (variant, user, familiesByGuid, projectByGuid) => ( @@ -1222,35 +1223,47 @@ const VARIANT_SORT_OPTONS = [ ), }, { - value: SORT_BY_AIP_FIRST_TAGGED, - text: 'AIP: Last Tagged', + value: SORT_BY_TALOS_FIRST_TAGGED, + text: 'TALOS: Date first Tagged', comparator: (a, b, genesById, tagsByGuid) => { - const getAipFirstTaggedDate = (variant) => { + const getTalosFirstTaggedDate = (variant) => { const aipMetadata = variant.tagGuids.map(tagGuid => tagsByGuid[tagGuid]?.aipMetadata) const dates = (aipMetadata || []).map(metadata => metadata?.first_tagged || '') return dates.filter(date => date !== null).sort().reverse()[0] || '' } - return getAipFirstTaggedDate(b).localeCompare(getAipFirstTaggedDate(a)) + return getTalosFirstTaggedDate(b).localeCompare(getTalosFirstTaggedDate(a)) }, }, { - value: SORT_BY_AIP_DATE, - text: 'AIP: Evidence Last Updated', + value: SORT_BY_TALOS_DATE, + text: 'TALOS: Date Evidence Updated', comparator: (a, b, genesById, tagsByGuid) => { - const getLatestAipCatagoryDate = (variant) => { + const getLatestTalosCatagoryDate = (variant) => { const aipMetadata = variant.tagGuids.map(tagGuid => tagsByGuid[tagGuid]?.aipMetadata) - const dates = (aipMetadata || []).map(metadata => Object.values(metadata?.categories || {}) - .map(data => data.date)).flat() + const dates = (aipMetadata || []).map(metadata => metadata?.evidence_last_updated || '') return dates.filter(date => date !== null).sort().reverse()[0] || '' } - return getLatestAipCatagoryDate(b).localeCompare(getLatestAipCatagoryDate(a)) + return getLatestTalosCatagoryDate(b).localeCompare(getLatestTalosCatagoryDate(a)) + }, + }, + { + value: SORT_BY_TALOS_PHENO_DATE, + text: 'TALOS: Date Phenotype Match Fist Found', + comparator: (a, b, genesById, tagsByGuid) => { + const getLatestTalosPhenoDate = (variant) => { + const aipMetadata = variant.tagGuids.map(tagGuid => tagsByGuid[tagGuid]?.aipMetadata) + const dates = (aipMetadata || []).map(metadata => metadata?.date_of_phenotype_match || '') + return dates.filter(date => date !== null).sort().reverse()[0] || '' + } + + return getLatestTalosPhenoDate(b).localeCompare(getLatestTalosPhenoDate(a)) }, }, ] -// CPG: AIP related sorting must be excluded from VARIANT_SEARCH_SORT_OPTONS -const VARIANT_SEARCH_SORT_OPTONS = VARIANT_SORT_OPTONS.slice(1, VARIANT_SORT_OPTONS.length - 3) +// CPG: TALOS related sorting must be excluded from VARIANT_SEARCH_SORT_OPTONS +const VARIANT_SEARCH_SORT_OPTONS = VARIANT_SORT_OPTONS.slice(1, VARIANT_SORT_OPTONS.length - 4) export const VARIANT_SORT_LOOKUP = VARIANT_SORT_OPTONS.reduce( (acc, opt) => ({ From b91fe4af6fac48c6d3d8146d205d84a49fa8a7e3 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 12 Aug 2024 12:35:51 -0400 Subject: [PATCH 571/736] contat placeholder --- seqr/views/apis/variant_search_api.py | 1 + ui/pages/SummaryData/components/VariantLookup.jsx | 4 +++- ui/shared/components/panel/variants/VariantIndividuals.jsx | 4 +++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/seqr/views/apis/variant_search_api.py b/seqr/views/apis/variant_search_api.py index dc9bd67ca5..0d9cdac9c2 100644 --- a/seqr/views/apis/variant_search_api.py +++ b/seqr/views/apis/variant_search_api.py @@ -585,6 +585,7 @@ def _update_lookup_variant(variant, response): (i.pop('family__guid'), i.pop('individual_id')): i for i in Individual.objects.filter(family__guid__in=no_access_families).values( 'family__guid', 'individual_id', 'affected', 'sex', 'features', + vlmContactEmail=F('family__project__vlm_contact_email'), ) } add_individual_hpo_details(individual_summary_map.values()) diff --git a/ui/pages/SummaryData/components/VariantLookup.jsx b/ui/pages/SummaryData/components/VariantLookup.jsx index dae7b6595f..fa929579e0 100644 --- a/ui/pages/SummaryData/components/VariantLookup.jsx +++ b/ui/pages/SummaryData/components/VariantLookup.jsx @@ -34,6 +34,8 @@ const FIELDS = [ { required: true, ...GENOME_VERSION_FIELD }, ] +const individualDetail = individual => !individual.projectGuid && individual.vlmContactEmail + const LookupFamily = ({ familyGuid, variant, reads, showReads }) => ( @@ -41,7 +43,7 @@ const LookupFamily = ({ familyGuid, variant, reads, showReads }) => ( - + {showReads} {reads} diff --git a/ui/shared/components/panel/variants/VariantIndividuals.jsx b/ui/shared/components/panel/variants/VariantIndividuals.jsx index 1611ff8d90..a495d93e15 100644 --- a/ui/shared/components/panel/variants/VariantIndividuals.jsx +++ b/ui/shared/components/panel/variants/VariantIndividuals.jsx @@ -407,8 +407,9 @@ IndividualDetailField.propTypes = { field: PropTypes.string, } -const BaseVariantIndividuals = React.memo(({ variant, individuals, isCompoundHet, genesById }) => ( +const BaseVariantIndividuals = React.memo(({ variant, individuals, isCompoundHet, genesById, individualDetail }) => ( + {individualDetail && {individualDetail((individuals || [])[0])}} {(individuals || []).map(individual => ( ({ From 3ecb3a96a609f7a352114d153c42a126facd2a7c Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 12 Aug 2024 13:27:59 -0400 Subject: [PATCH 572/736] contact button --- .../SummaryData/components/VariantLookup.jsx | 31 +++++++++++++++++-- .../panel/variants/VariantIndividuals.jsx | 3 +- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/ui/pages/SummaryData/components/VariantLookup.jsx b/ui/pages/SummaryData/components/VariantLookup.jsx index fa929579e0..41f6756280 100644 --- a/ui/pages/SummaryData/components/VariantLookup.jsx +++ b/ui/pages/SummaryData/components/VariantLookup.jsx @@ -6,6 +6,7 @@ import { Grid, Header } from 'semantic-ui-react' import { RECEIVE_DATA } from 'redux/utils/reducerUtils' import { QueryParamsEditor } from 'shared/components/QueryParamEditor' import StateDataLoader from 'shared/components/StateDataLoader' +import UpdateButton from 'shared/components/buttons/UpdateButton' import FormWrapper from 'shared/components/form/FormWrapper' import { helpLabel } from 'shared/components/form/FormHelpers' import { BaseSemanticInput } from 'shared/components/form/Inputs' @@ -13,7 +14,7 @@ import FamilyReads from 'shared/components/panel/family/FamilyReads' import FamilyVariantTags from 'shared/components/panel/variants/FamilyVariantTags' import Variants, { Variant, StyledVariantRow } from 'shared/components/panel/variants/Variants' import { FamilyVariantIndividuals } from 'shared/components/panel/variants/VariantIndividuals' -import { GENOME_VERSION_FIELD } from 'shared/utils/constants' +import { getVariantMainGeneId, GENOME_VERSION_FIELD } from 'shared/utils/constants' const FIELDS = [ { @@ -34,7 +35,33 @@ const FIELDS = [ { required: true, ...GENOME_VERSION_FIELD }, ] -const individualDetail = individual => !individual.projectGuid && individual.vlmContactEmail +const CONTACT_FIELDS = [ + { name: 'subject', label: 'Subject:' }, + { name: 'body', component: BaseSemanticInput, inputType: 'TextArea', rows: 12 }, +] + +const defaultEmail = (variant, to, genesById) => ({ + to, + subject: `${genesById[getVariantMainGeneId(variant)]?.geneSymbol || variant.variantId} variant match in seqr`, + // TODO c. and p. , name + body: `Dear researcher,\n\nWe are interested in learning more about your case in seqr harboring a variant [c. and p.] in ${genesById[getVariantMainGeneId(variant)]?.geneSymbol}.\n\nWe appreciate your assistance and look forward to hearing more from you.\n\nBest wishes,\n\n[name]`, +}) + +const individualDetail = ({ projectGuid, familyGuid, vlmContactEmail }, variant, genesById) => !projectGuid && ( + +) const LookupFamily = ({ familyGuid, variant, reads, showReads }) => ( diff --git a/ui/shared/components/panel/variants/VariantIndividuals.jsx b/ui/shared/components/panel/variants/VariantIndividuals.jsx index a495d93e15..832bcce3e9 100644 --- a/ui/shared/components/panel/variants/VariantIndividuals.jsx +++ b/ui/shared/components/panel/variants/VariantIndividuals.jsx @@ -409,7 +409,8 @@ IndividualDetailField.propTypes = { const BaseVariantIndividuals = React.memo(({ variant, individuals, isCompoundHet, genesById, individualDetail }) => ( - {individualDetail && {individualDetail((individuals || [])[0])}} + {individualDetail && + {individualDetail((individuals || [])[0], variant, genesById)}} {(individuals || []).map(individual => ( Date: Mon, 12 Aug 2024 13:54:30 -0400 Subject: [PATCH 573/736] shared send email button --- ui/pages/Project/components/Matchmaker.jsx | 45 +++-------------- .../SummaryData/components/VariantLookup.jsx | 22 +++----- .../components/buttons/SendEmailButton.jsx | 50 +++++++++++++++++++ 3 files changed, 64 insertions(+), 53 deletions(-) create mode 100644 ui/shared/components/buttons/SendEmailButton.jsx diff --git a/ui/pages/Project/components/Matchmaker.jsx b/ui/pages/Project/components/Matchmaker.jsx index 7c461c470b..5aad99db66 100644 --- a/ui/pages/Project/components/Matchmaker.jsx +++ b/ui/pages/Project/components/Matchmaker.jsx @@ -12,6 +12,7 @@ import { } from 'redux/selectors' import DeleteButton from 'shared/components/buttons/DeleteButton' import UpdateButton from 'shared/components/buttons/UpdateButton' +import SendEmailButton from 'shared/components/buttons/SendEmailButton' import { BooleanCheckbox, BaseSemanticInput } from 'shared/components/form/Inputs' import { SubmissionGeneVariants, Phenotypes } from 'shared/components/panel/MatchmakerPanel' import BaseFieldView from 'shared/components/panel/view-fields/BaseFieldView' @@ -127,7 +128,6 @@ const mapPhenotypeStateToProps = (state, ownProps) => ({ const EditPhenotypesTable = connect(mapPhenotypeStateToProps)(BaseEditPhenotypesTable) -const CONTACT_URL_REGEX = /^[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}(,\s*[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{1,4})*$/i const SUBMISSION_EDIT_FIELDS = [ { ...MATCHMAKER_CONTACT_NAME_FIELD, name: 'contactName' }, { ...MATCHMAKER_CONTACT_URL_FIELD, name: 'contactHref' }, @@ -151,49 +151,20 @@ const SUBMISSION_EDIT_FIELDS = [ }, ] -const CONTACT_FIELDS = [ - { - name: 'to', - label: 'Send To:', - validate: val => (CONTACT_URL_REGEX.test(val) ? undefined : 'Invalid Contact Email'), - }, - { name: 'subject', label: 'Subject:' }, - { name: 'body', component: BaseSemanticInput, inputType: 'TextArea', rows: 12 }, -] - -const BaseContactHostButton = React.memo(({ defaultContactEmail, onSubmit, canSend }) => ( - // when submitOnChange is true, no submit button is shown - -)) - -BaseContactHostButton.propTypes = { - defaultContactEmail: PropTypes.object, - onSubmit: PropTypes.func, - canSend: PropTypes.bool, -} - const mapContactButtonStateToProps = (state, ownProps) => ({ - defaultContactEmail: getMmeDefaultContactEmail(state, ownProps), - canSend: getCurrentProject(state).isAnalystProject, + defaultEmail: getMmeDefaultContactEmail(state, ownProps), + draftOnly: !getCurrentProject(state).isAnalystProject, + editRecipient: true, + buttonText: 'Contact Host', + idField: 'patientId', + modalTitleDetail: patientId => ` for Patient ${patientId}`, }) const mapContactDispatchToProps = { onSubmit: sendMmeContactEmail, } -const ContactHostButton = connect(mapContactButtonStateToProps, mapContactDispatchToProps)(BaseContactHostButton) +const ContactHostButton = connect(mapContactButtonStateToProps, mapContactDispatchToProps)(SendEmailButton) const contactedLabel = (val) => { if (val.hostContacted) { diff --git a/ui/pages/SummaryData/components/VariantLookup.jsx b/ui/pages/SummaryData/components/VariantLookup.jsx index 41f6756280..be8ea95cf9 100644 --- a/ui/pages/SummaryData/components/VariantLookup.jsx +++ b/ui/pages/SummaryData/components/VariantLookup.jsx @@ -6,7 +6,7 @@ import { Grid, Header } from 'semantic-ui-react' import { RECEIVE_DATA } from 'redux/utils/reducerUtils' import { QueryParamsEditor } from 'shared/components/QueryParamEditor' import StateDataLoader from 'shared/components/StateDataLoader' -import UpdateButton from 'shared/components/buttons/UpdateButton' +import SendEmailButton from 'shared/components/buttons/SendEmailButton' import FormWrapper from 'shared/components/form/FormWrapper' import { helpLabel } from 'shared/components/form/FormHelpers' import { BaseSemanticInput } from 'shared/components/form/Inputs' @@ -35,31 +35,21 @@ const FIELDS = [ { required: true, ...GENOME_VERSION_FIELD }, ] -const CONTACT_FIELDS = [ - { name: 'subject', label: 'Subject:' }, - { name: 'body', component: BaseSemanticInput, inputType: 'TextArea', rows: 12 }, -] - -const defaultEmail = (variant, to, genesById) => ({ +const defaultEmail = (variant, to, familyGuid, genesById) => ({ to, + familyGuid, subject: `${genesById[getVariantMainGeneId(variant)]?.geneSymbol || variant.variantId} variant match in seqr`, // TODO c. and p. , name body: `Dear researcher,\n\nWe are interested in learning more about your case in seqr harboring a variant [c. and p.] in ${genesById[getVariantMainGeneId(variant)]?.geneSymbol}.\n\nWe appreciate your assistance and look forward to hearing more from you.\n\nBest wishes,\n\n[name]`, }) const individualDetail = ({ projectGuid, familyGuid, vlmContactEmail }, variant, genesById) => !projectGuid && ( - ) diff --git a/ui/shared/components/buttons/SendEmailButton.jsx b/ui/shared/components/buttons/SendEmailButton.jsx new file mode 100644 index 0000000000..5897ec8f3e --- /dev/null +++ b/ui/shared/components/buttons/SendEmailButton.jsx @@ -0,0 +1,50 @@ +import React from 'react' +import PropTypes from 'prop-types' +import UpdateButton from './UpdateButton' +import { BaseSemanticInput } from '../form/Inputs' + +const CONTACT_URL_REGEX = /^[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}(,\s*[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{1,4})*$/i + +const NO_RECIPIENT_CONTACT_FIELDS = [ + { name: 'subject', label: 'Subject:' }, + { name: 'body', component: BaseSemanticInput, inputType: 'TextArea', rows: 12 }, +] +const CONTACT_FIELDS = [ + { + name: 'to', + label: 'Send To:', + validate: val => (CONTACT_URL_REGEX.test(val) ? undefined : 'Invalid Contact Email'), + }, + ...NO_RECIPIENT_CONTACT_FIELDS, +] + +const SendEmailButton = React.memo(( + { defaultEmail, onSubmit, modalId, idField, draftOnly, editRecipient, modalTitleDetail, ...props }, +) => ( + // when submitOnChange is true, no submit button is shown + +)) + +SendEmailButton.propTypes = { + defaultEmail: PropTypes.object.isRequired, + onSubmit: PropTypes.func, + modalId: PropTypes.string, + idField: PropTypes.string, + draftOnly: PropTypes.bool, + editRecipient: PropTypes.bool, + modalTitleDetail: PropTypes.string, +} + +export default SendEmailButton From ce7e1f4f367a04c888f525e0f7e971f04adab10a Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 12 Aug 2024 15:55:19 -0400 Subject: [PATCH 574/736] selector for contact email --- .../SummaryData/components/VariantLookup.jsx | 38 +++++++++---------- ui/pages/SummaryData/selectors.js | 27 +++++++++++++ .../components/buttons/SendEmailButton.jsx | 4 +- .../panel/variants/VariantIndividuals.jsx | 5 +-- 4 files changed, 49 insertions(+), 25 deletions(-) diff --git a/ui/pages/SummaryData/components/VariantLookup.jsx b/ui/pages/SummaryData/components/VariantLookup.jsx index be8ea95cf9..8038a871e6 100644 --- a/ui/pages/SummaryData/components/VariantLookup.jsx +++ b/ui/pages/SummaryData/components/VariantLookup.jsx @@ -14,7 +14,8 @@ import FamilyReads from 'shared/components/panel/family/FamilyReads' import FamilyVariantTags from 'shared/components/panel/variants/FamilyVariantTags' import Variants, { Variant, StyledVariantRow } from 'shared/components/panel/variants/Variants' import { FamilyVariantIndividuals } from 'shared/components/panel/variants/VariantIndividuals' -import { getVariantMainGeneId, GENOME_VERSION_FIELD } from 'shared/utils/constants' +import { GENOME_VERSION_FIELD } from 'shared/utils/constants' +import { geVlmDefaultContactEmailByFamily } from '../selectors' const FIELDS = [ { @@ -35,32 +36,31 @@ const FIELDS = [ { required: true, ...GENOME_VERSION_FIELD }, ] -const defaultEmail = (variant, to, familyGuid, genesById) => ({ - to, - familyGuid, - subject: `${genesById[getVariantMainGeneId(variant)]?.geneSymbol || variant.variantId} variant match in seqr`, - // TODO c. and p. , name - body: `Dear researcher,\n\nWe are interested in learning more about your case in seqr harboring a variant [c. and p.] in ${genesById[getVariantMainGeneId(variant)]?.geneSymbol}.\n\nWe appreciate your assistance and look forward to hearing more from you.\n\nBest wishes,\n\n[name]`, -}) +const mapContactStateToProps = (state, ownProps) => { + const defaultEmail = geVlmDefaultContactEmailByFamily(state, ownProps)[ownProps.familyGuid] + const disabled = !defaultEmail?.to + return { + defaultEmail, + disabled, + buttonText: disabled ? 'Contact Opted Out' : null, + modalId: ownProps.familyGuid, + } +} -const individualDetail = ({ projectGuid, familyGuid, vlmContactEmail }, variant, genesById) => !projectGuid && ( - -) +const mapContactDispatchToProps = { + onSubmit: console.log, +} + +const ContactButton = connect(mapContactStateToProps, mapContactDispatchToProps)(SendEmailButton) const LookupFamily = ({ familyGuid, variant, reads, showReads }) => ( - + - + {showReads} {reads} diff --git a/ui/pages/SummaryData/selectors.js b/ui/pages/SummaryData/selectors.js index 9676010112..e6901b582f 100644 --- a/ui/pages/SummaryData/selectors.js +++ b/ui/pages/SummaryData/selectors.js @@ -1,3 +1,8 @@ +import { createSelector } from 'reselect' + +import { getSortedIndividualsByFamily, getGenesById, getUser } from 'redux/selectors' +import { getVariantMainGeneId } from 'shared/utils/constants' + export const getSuccessStoryLoading = state => state.successStoryLoading.isLoading export const getSuccessStoryLoadingError = state => state.successStoryLoading.errorMessage export const getSuccessStoryRows = state => state.successStoryRows @@ -6,3 +11,25 @@ export const getMmeLoadingError = state => state.mmeLoading.errorMessage export const getMmeMetrics = state => state.mmeMetrics export const getMmeSubmissions = state => state.mmeSubmissions export const getExternalAnalysisUploadStats = state => state.externalAnalysisUploadStats + +export const geVlmDefaultContactEmailByFamily = createSelector( + getSortedIndividualsByFamily, + getGenesById, + getUser, + (state, ownProps) => ownProps.variant, + (individualsByFamily, genesById, user, variant) => { + const gene = genesById[getVariantMainGeneId(variant)]?.geneSymbol + const defaultEmail = { + subject: `${gene || variant.variantId} variant match in seqr`, + // TODO c. and p. + body: `Dear researcher,\n\nWe are interested in learning more about your case in seqr harboring a variant [c. and p.] in ${gene}.\n\nWe appreciate your assistance and look forward to hearing more from you.\n\nBest wishes,\n${user.displayName}`, + } + return (variant.lookupFamilyGuids || []).reduce((acc, familyGuid) => { + const individual = individualsByFamily[familyGuid]?.[0] + if (!individual || individual.projectGuid) { + return acc + } + return { ...acc, [familyGuid]: { ...defaultEmail, to: individual.vlmContactEmail } } + }, {}) + }, +) diff --git a/ui/shared/components/buttons/SendEmailButton.jsx b/ui/shared/components/buttons/SendEmailButton.jsx index 5897ec8f3e..7b3cb9bdb9 100644 --- a/ui/shared/components/buttons/SendEmailButton.jsx +++ b/ui/shared/components/buttons/SendEmailButton.jsx @@ -20,7 +20,7 @@ const CONTACT_FIELDS = [ const SendEmailButton = React.memo(( { defaultEmail, onSubmit, modalId, idField, draftOnly, editRecipient, modalTitleDetail, ...props }, -) => ( +) => (defaultEmail ? ( // when submitOnChange is true, no submit button is shown -)) +) : null)) SendEmailButton.propTypes = { defaultEmail: PropTypes.object.isRequired, diff --git a/ui/shared/components/panel/variants/VariantIndividuals.jsx b/ui/shared/components/panel/variants/VariantIndividuals.jsx index 832bcce3e9..1611ff8d90 100644 --- a/ui/shared/components/panel/variants/VariantIndividuals.jsx +++ b/ui/shared/components/panel/variants/VariantIndividuals.jsx @@ -407,10 +407,8 @@ IndividualDetailField.propTypes = { field: PropTypes.string, } -const BaseVariantIndividuals = React.memo(({ variant, individuals, isCompoundHet, genesById, individualDetail }) => ( +const BaseVariantIndividuals = React.memo(({ variant, individuals, isCompoundHet, genesById }) => ( - {individualDetail && - {individualDetail((individuals || [])[0], variant, genesById)}} {(individuals || []).map(individual => ( ({ From 29d5f3b47b84e84a9627283d802bb6e0ecc791c3 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 12 Aug 2024 16:49:27 -0400 Subject: [PATCH 575/736] shared variant summary code --- ui/pages/Project/selectors.js | 23 ++++------------------- ui/pages/SummaryData/selectors.js | 4 ++-- ui/shared/utils/constants.js | 20 ++++++++++++++++++++ 3 files changed, 26 insertions(+), 21 deletions(-) diff --git a/ui/pages/Project/selectors.js b/ui/pages/Project/selectors.js index 50aefbc1a8..c619ef3b3b 100644 --- a/ui/pages/Project/selectors.js +++ b/ui/pages/Project/selectors.js @@ -7,8 +7,7 @@ import { FAMILY_FIELD_FIRST_SAMPLE, FAMILY_FIELD_ANALYSED_BY, FAMILY_NOTES_FIELDS, - GENOME_VERSION_DISPLAY_LOOKUP, - getVariantMainTranscript, + getVariantSummary, INDIVIDUAL_EXPORT_DATA, INDIVIDUAL_HAS_DATA_FIELD, MME_TAG_NAME, @@ -603,23 +602,9 @@ export const getMmeDefaultContactEmail = createSelector( geneSymbol => geneSymbol && submittedGenes.includes(geneSymbol), ) - const submittedVariants = (submissionGeneVariants || []).map(({ variantGuid }) => { - const savedVariant = savedVariants[variantGuid] - const { alt, ref, chrom, pos, end, genomeVersion } = savedVariant - const genotype = (savedVariant.genotypes || {})[individualGuid] || {} - const mainTranscript = getVariantMainTranscript(savedVariant) - let consequence = `${(mainTranscript.majorConsequence || '').replace(/_variant/g, '').replace(/_/g, ' ')} variant` - let variantDetail = [(mainTranscript.hgvsc || '').split(':').pop(), (mainTranscript.hgvsp || '').split(':').pop()].filter(val => val).join('/') - const displayGenomeVersion = GENOME_VERSION_DISPLAY_LOOKUP[genomeVersion] || genomeVersion - let inheritance = genotype.numAlt === 1 ? 'heterozygous' : 'homozygous' - if (genotype.numAlt === -1) { - inheritance = 'copy number' - consequence = genotype.cn < 2 ? 'deletion' : 'duplication' - variantDetail = `CN=${genotype.cn}` - } - const position = ref ? `${pos} ${ref}>${alt}` : `${pos}-${end}` - return `a ${inheritance} ${consequence} ${chrom}:${position}${displayGenomeVersion ? ` (${displayGenomeVersion})` : ''}${variantDetail ? ` (${variantDetail})` : ''}` - }).join(', ') + const submittedVariants = (submissionGeneVariants || []).map(({ variantGuid }) => ( + getVariantSummary(savedVariants[variantGuid], individualGuid) + )).join(', ') const submittedPhenotypeList = (phenotypes || []).filter( ({ observed, label }) => observed === 'yes' && label, diff --git a/ui/pages/SummaryData/selectors.js b/ui/pages/SummaryData/selectors.js index e6901b582f..f714cfcda4 100644 --- a/ui/pages/SummaryData/selectors.js +++ b/ui/pages/SummaryData/selectors.js @@ -1,7 +1,7 @@ import { createSelector } from 'reselect' import { getSortedIndividualsByFamily, getGenesById, getUser } from 'redux/selectors' -import { getVariantMainGeneId } from 'shared/utils/constants' +import { getVariantMainGeneId, getVariantSummary } from 'shared/utils/constants' export const getSuccessStoryLoading = state => state.successStoryLoading.isLoading export const getSuccessStoryLoadingError = state => state.successStoryLoading.errorMessage @@ -22,7 +22,7 @@ export const geVlmDefaultContactEmailByFamily = createSelector( const defaultEmail = { subject: `${gene || variant.variantId} variant match in seqr`, // TODO c. and p. - body: `Dear researcher,\n\nWe are interested in learning more about your case in seqr harboring a variant [c. and p.] in ${gene}.\n\nWe appreciate your assistance and look forward to hearing more from you.\n\nBest wishes,\n${user.displayName}`, + body: `Dear researcher,\n\nWe are interested in learning more about your case in seqr harboring ${getVariantSummary(variant)} in ${gene || 'no genes'}.\n\nWe appreciate your assistance and look forward to hearing more from you.\n\nBest wishes,\n${user.displayName}`, } return (variant.lookupFamilyGuids || []).reduce((acc, familyGuid) => { const individual = individualsByFamily[familyGuid]?.[0] diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js index d302adc7f4..3f042d7d47 100644 --- a/ui/shared/utils/constants.js +++ b/ui/shared/utils/constants.js @@ -1526,6 +1526,26 @@ export const getVariantMainTranscript = ({ transcripts = {}, mainTranscriptId, s Object.values(transcripts), ).find(({ transcriptId }) => transcriptId === (selectedMainTranscriptId || mainTranscriptId)) || {} +export const getVariantSummary = (variant, individualGuid) => { + const { alt, ref, chrom, pos, end, genomeVersion } = variant + const mainTranscript = getVariantMainTranscript(variant) + let consequence = `${(mainTranscript.majorConsequence || '').replace(/_variant/g, '').replace(/_/g, ' ')} variant` + let variantDetail = [(mainTranscript.hgvsc || '').split(':').pop(), (mainTranscript.hgvsp || '').split(':').pop()].filter(val => val).join('/') + const displayGenomeVersion = GENOME_VERSION_DISPLAY_LOOKUP[genomeVersion] || genomeVersion + let inheritance = '' + if (individualGuid) { + const genotype = (variant.genotypes || {})[individualGuid] || {} + inheritance = genotype.numAlt === 1 ? ' heterozygous' : ' homozygous' + if (genotype.numAlt === -1) { + inheritance = ' copy number' + consequence = genotype.cn < 2 ? 'deletion' : 'duplication' + variantDetail = `CN=${genotype.cn}` + } + } + const position = ref ? `${pos} ${ref}>${alt}` : `${pos}-${end}` + return `a${inheritance} ${consequence} ${chrom}:${position}${displayGenomeVersion ? ` (${displayGenomeVersion})` : ''}${variantDetail ? ` (${variantDetail})` : ''}` +} + const getPopAf = population => (variant) => { const populationData = (variant.populations || {})[population] return (populationData || {}).af From 7522c32ba9946b69af1cb131d23fcbef2336dece Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 12 Aug 2024 16:59:32 -0400 Subject: [PATCH 576/736] actual url --- ui/pages/SummaryData/components/VariantLookup.jsx | 3 ++- ui/pages/SummaryData/reducers.js | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/ui/pages/SummaryData/components/VariantLookup.jsx b/ui/pages/SummaryData/components/VariantLookup.jsx index 8038a871e6..c67d8a7ffc 100644 --- a/ui/pages/SummaryData/components/VariantLookup.jsx +++ b/ui/pages/SummaryData/components/VariantLookup.jsx @@ -15,6 +15,7 @@ import FamilyVariantTags from 'shared/components/panel/variants/FamilyVariantTag import Variants, { Variant, StyledVariantRow } from 'shared/components/panel/variants/Variants' import { FamilyVariantIndividuals } from 'shared/components/panel/variants/VariantIndividuals' import { GENOME_VERSION_FIELD } from 'shared/utils/constants' +import { sendVlmContactEmail } from '../reducers' import { geVlmDefaultContactEmailByFamily } from '../selectors' const FIELDS = [ @@ -48,7 +49,7 @@ const mapContactStateToProps = (state, ownProps) => { } const mapContactDispatchToProps = { - onSubmit: console.log, + onSubmit: sendVlmContactEmail, } const ContactButton = connect(mapContactStateToProps, mapContactDispatchToProps)(SendEmailButton) diff --git a/ui/pages/SummaryData/reducers.js b/ui/pages/SummaryData/reducers.js index 761f79b87e..abbcc93948 100644 --- a/ui/pages/SummaryData/reducers.js +++ b/ui/pages/SummaryData/reducers.js @@ -78,6 +78,10 @@ export const updateExternalAnalysis = values => dispatch => new HttpRequestHelpe }, ).post(values) +export const sendVlmContactEmail = values => () => new HttpRequestHelper( + '/api/summary_data/send_email', +).post(values) + export const reducers = { successStoryLoading: loadingReducer(REQUEST_SUCCESS_STORY, RECEIVE_SUCCESS_STORY), successStoryRows: createSingleValueReducer(RECEIVE_SUCCESS_STORY, []), From 560bfd6d9329b018c66b403e103b473cc5f14d25 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 12 Aug 2024 17:14:27 -0400 Subject: [PATCH 577/736] add link --- ui/pages/SummaryData/selectors.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ui/pages/SummaryData/selectors.js b/ui/pages/SummaryData/selectors.js index f714cfcda4..04b2e09f85 100644 --- a/ui/pages/SummaryData/selectors.js +++ b/ui/pages/SummaryData/selectors.js @@ -21,8 +21,8 @@ export const geVlmDefaultContactEmailByFamily = createSelector( const gene = genesById[getVariantMainGeneId(variant)]?.geneSymbol const defaultEmail = { subject: `${gene || variant.variantId} variant match in seqr`, - // TODO c. and p. - body: `Dear researcher,\n\nWe are interested in learning more about your case in seqr harboring ${getVariantSummary(variant)} in ${gene || 'no genes'}.\n\nWe appreciate your assistance and look forward to hearing more from you.\n\nBest wishes,\n${user.displayName}`, + // + body: `Dear researcher,\n\nWe are interested in learning more about your case in seqr harboring ${getVariantSummary(variant)} in ${gene || 'no genes'} (${window.location.href}).\n\nWe appreciate your assistance and look forward to hearing more from you.\n\nBest wishes,\n${user.displayName}`, } return (variant.lookupFamilyGuids || []).reduce((acc, familyGuid) => { const individual = individualsByFamily[familyGuid]?.[0] From 2fcd139026aa62a390f634d7b08159fa6f6a2187 Mon Sep 17 00:00:00 2001 From: Cas Simons Date: Tue, 13 Aug 2024 11:28:54 +1200 Subject: [PATCH 578/736] Missed one AIP>Talos --- seqr/views/apis/summary_data_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seqr/views/apis/summary_data_api.py b/seqr/views/apis/summary_data_api.py index 1dc09c7219..c4997bed1b 100644 --- a/seqr/views/apis/summary_data_api.py +++ b/seqr/views/apis/summary_data_api.py @@ -141,7 +141,7 @@ def hpo_summary_data(request, hpo_id): return create_json_response({'data': list(data)}) -AIP_INGEST_FULL_REPORT_DESC = 'CPG: Full AIP report' +AIP_INGEST_FULL_REPORT_DESC = 'CPG: Full Talos report' @analyst_required From c8d6f6f1195c7cba408c1352fb95f0891a774a49 Mon Sep 17 00:00:00 2001 From: Cas Simons Date: Tue, 13 Aug 2024 15:22:49 +1200 Subject: [PATCH 579/736] Fix to old upstream merge conflict --- seqr/views/apis/summary_data_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seqr/views/apis/summary_data_api.py b/seqr/views/apis/summary_data_api.py index c4997bed1b..fd19991280 100644 --- a/seqr/views/apis/summary_data_api.py +++ b/seqr/views/apis/summary_data_api.py @@ -280,7 +280,7 @@ def _search_new_saved_variants(family_variant_ids: list[FamilyVariantKey], user: search_variants_by_id = {} warnings.append(str(e)) - new_variants = [] + new_variants = {} missing = defaultdict(list) for variant_id, family_ids in variant_families.items(): variant = search_variants_by_id.get(variant_id) or {'familyGuids': []} From 734c0415704add823fe35efc50c906a75d3a6efa Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 13 Aug 2024 11:44:34 -0400 Subject: [PATCH 580/736] send email --- seqr/urls.py | 3 ++- seqr/utils/communication_utils.py | 10 +++++++--- seqr/views/apis/summary_data_api.py | 30 +++++++++++++++++++++++++++-- settings.py | 1 + ui/pages/SummaryData/reducers.js | 2 +- 5 files changed, 39 insertions(+), 7 deletions(-) diff --git a/seqr/urls.py b/seqr/urls.py index 2cdce9c593..d49755881b 100644 --- a/seqr/urls.py +++ b/seqr/urls.py @@ -130,7 +130,7 @@ gregor_export, \ seqr_stats from seqr.views.apis.summary_data_api import success_story, saved_variants_page, mme_details, hpo_summary_data, \ - bulk_update_family_external_analysis, individual_metadata + bulk_update_family_external_analysis, individual_metadata, send_vlm_email from seqr.views.apis.superuser_api import get_all_users from seqr.views.apis.awesomebar_api import awesomebar_autocomplete_handler @@ -345,6 +345,7 @@ 'summary_data/matchmaker': mme_details, 'summary_data/update_external_analysis': bulk_update_family_external_analysis, 'summary_data/individual_metadata/(?P[^/]+)': individual_metadata, + 'summary_data/send_vlm_email': send_vlm_email, 'create_project_from_workspace/(?P[^/]+)/(?P[^/]+)/grant_access': grant_workspace_access, 'create_project_from_workspace/(?P[^/]+)/(?P[^/]+)/validate_vcf': validate_anvil_vcf, diff --git a/seqr/utils/communication_utils.py b/seqr/utils/communication_utils.py index 9baf6b6486..ef4291065d 100644 --- a/seqr/utils/communication_utils.py +++ b/seqr/utils/communication_utils.py @@ -67,9 +67,13 @@ def send_project_notification(project, notification, email, subject): def _set_bulk_notification_stream(message): - message.esp_extra = { - 'MessageStream': 'seqr-notifications', - } + set_email_message_stream(message, 'seqr-notifications') # Use batch API: emails are all sent with a single request and each recipient sees only their own email address message.merge_data = {} + +def set_email_message_stream(message, stream): + message.esp_extra = { + 'MessageStream': stream, + } + diff --git a/seqr/views/apis/summary_data_api.py b/seqr/views/apis/summary_data_api.py index e55cca3b10..4c480f5afb 100644 --- a/seqr/views/apis/summary_data_api.py +++ b/seqr/views/apis/summary_data_api.py @@ -1,6 +1,7 @@ from collections import defaultdict from datetime import datetime from django.core.exceptions import PermissionDenied +from django.core.mail.message import EmailMessage from django.contrib.auth.models import User from django.db.models import CharField, F, Value from django.db.models.functions import Coalesce, Concat, JSONObject, NullIf @@ -14,18 +15,21 @@ from seqr.models import Project, Family, Individual, VariantTagType, SavedVariant, FamilyAnalysedBy from seqr.views.utils.airtable_utils import AirtableSession from seqr.views.utils.file_utils import load_uploaded_file -from seqr.utils.communication_utils import safe_post_to_slack +from seqr.utils.communication_utils import safe_post_to_slack, set_email_message_stream from seqr.utils.gene_utils import get_genes from seqr.utils.middleware import ErrorsWarningsException from seqr.utils.search.utils import get_variants_for_variant_ids from seqr.views.utils.json_utils import create_json_response +from seqr.utils.logging_utils import SeqrLogger from seqr.views.utils.orm_to_json_utils import get_json_for_matchmaker_submissions, get_json_for_saved_variants,\ add_individual_hpo_details, INDIVIDUAL_DISPLAY_NAME_EXPR, AIP_TAG_TYPE from seqr.views.utils.permissions_utils import analyst_required, user_is_analyst, get_project_guids_user_can_view, \ login_and_policies_required, get_project_and_check_permissions, get_internal_projects from seqr.views.utils.anvil_metadata_utils import parse_anvil_metadata, anvil_export_airtable_fields, FAMILY_ROW_TYPE, SUBJECT_ROW_TYPE, DISCOVERY_ROW_TYPE from seqr.views.utils.variant_utils import get_variants_response, bulk_create_tagged_variants, DISCOVERY_CATEGORY -from settings import SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL +from settings import SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL, VLM_SEND_EMAIL + +logger = SeqrLogger(__name__) MAX_SAVED_VARIANTS = 10000 @@ -366,3 +370,25 @@ def _get_airtable_collaborator_names(user, collaborator_ids): collaborator_id: collaborator_map.get(collaborator_id, {}).get('CollaboratorID') for collaborator_id in collaborator_ids } + + +@login_and_policies_required +def send_vlm_email(request): + request_json = json.loads(request.body) + email_message = EmailMessage( + subject=request_json['subject'], + body=request_json['body'], + bcc=[request_json['to']], + cc=[request.user.email], + reply_to=[request.user.email], + to=[VLM_SEND_EMAIL], + from_email=VLM_SEND_EMAIL, + ) + set_email_message_stream(email_message, 'vlm') + + try: + email_message.send() + except Exception as e: + logger.error(f'VLM Email Error: {e}', request.user, detail=request_json) + + return create_json_response({'success': True}) diff --git a/settings.py b/settings.py index 099c717f5e..abfe408e7b 100644 --- a/settings.py +++ b/settings.py @@ -355,6 +355,7 @@ MME_DEFAULT_CONTACT_HREF = 'mailto:{}'.format(MME_DEFAULT_CONTACT_EMAIL) VLM_DEFAULT_CONTACT_EMAIL = 'vlm@broadinstitute.org' +VLM_SEND_EMAIL = 'vlm-noreply@broadinstitute.org' MME_CONFIG_DIR = os.environ.get('MME_CONFIG_DIR', '') MME_NODES = {} diff --git a/ui/pages/SummaryData/reducers.js b/ui/pages/SummaryData/reducers.js index abbcc93948..a3111e8086 100644 --- a/ui/pages/SummaryData/reducers.js +++ b/ui/pages/SummaryData/reducers.js @@ -79,7 +79,7 @@ export const updateExternalAnalysis = values => dispatch => new HttpRequestHelpe ).post(values) export const sendVlmContactEmail = values => () => new HttpRequestHelper( - '/api/summary_data/send_email', + '/api/summary_data/send_vlm_email', ).post(values) export const reducers = { From 523ca959479dee0537802c2da66c73095ec4a7b4 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 13 Aug 2024 11:52:48 -0400 Subject: [PATCH 581/736] fix tests --- seqr/views/apis/summary_data_api.py | 2 +- seqr/views/apis/variant_search_api_tests.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/seqr/views/apis/summary_data_api.py b/seqr/views/apis/summary_data_api.py index 4c480f5afb..775562313d 100644 --- a/seqr/views/apis/summary_data_api.py +++ b/seqr/views/apis/summary_data_api.py @@ -378,7 +378,7 @@ def send_vlm_email(request): email_message = EmailMessage( subject=request_json['subject'], body=request_json['body'], - bcc=[request_json['to']], + bcc=[s.strip() for s in request_json['to'].split(',')], cc=[request.user.email], reply_to=[request.user.email], to=[VLM_SEND_EMAIL], diff --git a/seqr/views/apis/variant_search_api_tests.py b/seqr/views/apis/variant_search_api_tests.py index 3acd45aa89..926368947a 100644 --- a/seqr/views/apis/variant_search_api_tests.py +++ b/seqr/views/apis/variant_search_api_tests.py @@ -823,18 +823,22 @@ def test_variant_lookup(self, mock_variant_lookup): 'I0_F0_1-10439-AC-A': { 'affected': 'N', 'familyGuid': 'F0_1-10439-AC-A', 'features': [], 'individualGuid': 'I0_F0_1-10439-AC-A', 'sex': 'F', + 'vlmContactEmail': 'test@broadinstitute.org,vlm@broadinstitute.org', }, 'I0_F1_1-10439-AC-A': { 'affected': 'A', 'familyGuid': 'F1_1-10439-AC-A', 'individualGuid': 'I0_F1_1-10439-AC-A', 'sex': 'M', 'features': [{'category': 'HP:0001626', 'label': '1 terms'}, {'category': 'Other', 'label': '1 terms'}], + 'vlmContactEmail': 'seqr-test@gmail.com,test@broadinstitute.org', }, 'I1_F0_1-10439-AC-A': { 'affected': 'N', 'familyGuid': 'F0_1-10439-AC-A', 'features': [], 'individualGuid': 'I1_F0_1-10439-AC-A', 'sex': 'M', + 'vlmContactEmail': 'test@broadinstitute.org,vlm@broadinstitute.org', }, 'I2_F0_1-10439-AC-A': { 'affected': 'A', 'familyGuid': 'F0_1-10439-AC-A', 'individualGuid': 'I2_F0_1-10439-AC-A', 'sex': 'F', 'features': [{'category': 'HP:0000707', 'label': '1 terms'}, {'category': 'HP:0001626', 'label': '1 terms'}], + 'vlmContactEmail': 'test@broadinstitute.org,vlm@broadinstitute.org', }, }, 'variants': [expected_variant], From de0191992470053733dcf688d618176f4c1cde40 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 13 Aug 2024 12:19:56 -0400 Subject: [PATCH 582/736] add tests --- seqr/views/apis/summary_data_api_tests.py | 46 ++++++++++++++++++++++- seqr/views/utils/test_utils.py | 9 +++-- 2 files changed, 51 insertions(+), 4 deletions(-) diff --git a/seqr/views/apis/summary_data_api_tests.py b/seqr/views/apis/summary_data_api_tests.py index cc80bc86fb..becb09ee9b 100644 --- a/seqr/views/apis/summary_data_api_tests.py +++ b/seqr/views/apis/summary_data_api_tests.py @@ -6,7 +6,7 @@ import responses from seqr.views.apis.summary_data_api import mme_details, success_story, saved_variants_page, hpo_summary_data, \ - bulk_update_family_external_analysis, individual_metadata + bulk_update_family_external_analysis, individual_metadata, send_vlm_email from seqr.views.utils.test_utils import AuthenticationTestCase, AnvilAuthenticationTestCase, AirtableTest, PARSED_VARIANTS from seqr.models import FamilyAnalysedBy, SavedVariant, VariantTag from settings import AIRTABLE_URL @@ -715,6 +715,50 @@ def test_sample_metadata_export(self, mock_google_authenticated): response = self.client.get(f'{gregor_projects_url}?includeAirtable=true') self._has_expected_metadata_response(response, multi_project_individuals, has_airtable=True, has_duplicate=True) + @mock.patch('seqr.views.apis.summary_data_api.EmailMessage') + def test_send_vlm_email(self, mock_email): + url = reverse(send_vlm_email) + self.check_require_login(url) + + self.reset_logs() + body = { + 'to': 'test@test.com , other_test@gmail.com', + 'body': 'some email content', + 'subject': 'some email subject' + } + response = self.client.post(url, content_type='application/json', data=json.dumps(body)) + self._assert_expected_vlm_email(response, mock_email) + + self.reset_logs() + mock_email.return_value.send.side_effect = Exception('Send failed') + response = self.client.post(url, content_type='application/json', data=json.dumps(body)) + self._assert_expected_vlm_email(response, mock_email, additional_logs=[ + ('VLM Email Error: Send failed', { + 'severity': 'ERROR', + '@type': 'type.googleapis.com/google.devtools.clouderrorreporting.v1beta1.ReportedErrorEvent', + 'detail': body, + }), + ]) + + def _assert_expected_vlm_email(self, response, mock_email, additional_logs=None): + self.assertEqual(response.status_code, 200) + self.assertDictEqual(response.json(), {'success': True}) + + mock_email.assert_called_with( + subject='some email subject', + body='some email content', + bcc=['test@test.com', 'other_test@gmail.com'], + cc=['test_user_no_access@test.com'], + reply_to=['test_user_no_access@test.com'], + to=['vlm-noreply@broadinstitute.org'], + from_email='vlm-noreply@broadinstitute.org') + self.assertDictEqual(mock_email.return_value.esp_extra, {'MessageStream': 'vlm'}) + mock_email.return_value.send.assert_called() + + self.assert_json_logs(self.no_access_user, (additional_logs or []) + [ + (None, {'httpRequest': mock.ANY, 'requestBody': mock.ANY}) + ]) + # Tests for AnVIL access disabled class LocalSummaryDataAPITest(AuthenticationTestCase, SummaryDataAPITest): diff --git a/seqr/views/utils/test_utils.py b/seqr/views/utils/test_utils.py index 22e16195d1..568c68734b 100644 --- a/seqr/views/utils/test_utils.py +++ b/seqr/views/utils/test_utils.py @@ -256,9 +256,12 @@ def assert_json_logs(self, user, expected): extra = extra or {} validate = extra.pop('validate', None) log_value = json.loads(logs[i]) - self.assertDictEqual(log_value, { - 'timestamp': mock.ANY, 'severity': 'INFO', 'user': user.email, 'message': message, **extra, - }) + expected_log = { + 'timestamp': mock.ANY, 'severity': 'INFO', 'user': user.email, **extra, + } + if message is not None: + expected_log['message'] = message + self.assertDictEqual(log_value, expected_log) if validate: validate(log_value) From 389c783d9103649627ba31194686d9f0f7a040a6 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 13 Aug 2024 14:24:36 -0400 Subject: [PATCH 583/736] add new tag --- CHANGELOG.md | 1 + ...ariantfunctionaldata_functional_data_tag.py | 18 ++++++++++++++++++ seqr/models.py | 5 +++++ .../panel/view-fields/TagFieldView.jsx | 1 + 4 files changed, 25 insertions(+) create mode 100644 seqr/migrations/0072_alter_variantfunctionaldata_functional_data_tag.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 57532e5186..498245bdde 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # _seqr_ Changes ## dev +* Add "Validated Name" functional tag (REQUIRES DB MIGRATION) ## 8/9/24 * Update directory structure for search backend diff --git a/seqr/migrations/0072_alter_variantfunctionaldata_functional_data_tag.py b/seqr/migrations/0072_alter_variantfunctionaldata_functional_data_tag.py new file mode 100644 index 0000000000..28cbd69544 --- /dev/null +++ b/seqr/migrations/0072_alter_variantfunctionaldata_functional_data_tag.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.13 on 2024-08-13 18:24 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('seqr', '0071_igvsample_index_file_path'), + ] + + operations = [ + migrations.AlterField( + model_name='variantfunctionaldata', + name='functional_data_tag', + field=models.TextField(choices=[('Functional Data', (('Biochemical Function', '{"description": "Gene product performs a biochemical function shared with other known genes in the disease of interest, or consistent with the phenotype.", "color": "#311B92"}'), ('Protein Interaction', '{"description": "Gene product interacts with proteins previously implicated (genetically or biochemically) in the disease of interest.", "color": "#4A148C"}'), ('Expression', '{"description": "Gene is expressed in tissues relevant to the disease of interest and/or is altered in expression in patients who have the disease.", "color": "#7C4DFF"}'), ('Patient Cells', '{"description": "Gene and/or gene product function is demonstrably altered in patients carrying candidate mutations.", "color": "#B388FF"}'), ('Non-patient cells', '{"description": "Gene and/or gene product function is demonstrably altered in human cell culture models carrying candidate mutations.", "color": "#9575CD"}'), ('Animal Model', '{"description": "Non-human animal models with a similarly disrupted copy of the affected gene show a phenotype consistent with human disease state.", "color": "#AA00FF"}'), ('Non-human cell culture model', '{"description": "Non-human cell-culture models with a similarly disrupted copy of the affected gene show a phenotype consistent with human disease state.", "color": "#BA68C8"}'), ('Rescue', '{"description": "The cellular phenotype in patient-derived cells or engineered equivalents can be rescued by addition of the wild-type gene product.", "color": "#663399"}'))), ('Functional Scores', (('Genome-wide Linkage', '{"metadata_title": "LOD Score", "description": "Max LOD score used in analysis to restrict where you looked for causal variants; provide best score available, whether it be a cumulative LOD score across multiple families or just the best family\'s LOD score.", "color": "#880E4F"}'), ('Bonferroni corrected p-value', '{"metadata_title": "P-value", "description": "Bonferroni-corrected p-value for gene if association testing/burden testing/etc was used to identify the gene.", "color": "#E91E63"}'), ('Kindreds w/ Overlapping SV & Similar Phenotype', '{"metadata_title": "#", "description": "Number of kindreds (1+) previously reported/in databases as having structural variant overlapping the gene and a similar phenotype.", "color": "#FF5252"}'))), ('Additional Kindreds (Literature, MME)', (('Additional Unrelated Kindreds w/ Causal Variants in Gene', '{"metadata_title": "# additional families", "description": "Number of additional kindreds with causal variants in this gene (Any other kindreds from collaborators, MME, literature etc). Do not count your family in this total.", "color": "#D84315"}'),)), ('Additional Information', (('Incomplete Penetrance', '{"description": "Variant has been shown to be disease-causing (in literature, functional studies, etc.) but one or more individuals in this family with the variant do not present with clinical features of the disorder.", "color": "#E985DC"}'), ('Partial Phenotype Contribution', '{"metadata_title": "HPO Terms", "description": "Variant is believed to be part of the solve, explaining only some of the phenotypes.", "color": "#1F42D9"}'), ('Validated Name', '{"description": "Variant name which differs from the computed name.", "color": "#0E7694", "metadata_title": "Name"}')))]), + ), + ] diff --git a/seqr/models.py b/seqr/models.py index 56153623b4..78fc0e3b11 100644 --- a/seqr/models.py +++ b/seqr/models.py @@ -968,6 +968,11 @@ class VariantFunctionalData(ModelWithGUID): 'description': 'Variant is believed to be part of the solve, explaining only some of the phenotypes.', 'color': '#1F42D9', })), + ('Validated Name', json.dumps({ + 'description': 'Variant name which differs from the computed name.', + 'color': '#0E7694', + 'metadata_title': 'Name', + })), )), ) diff --git a/ui/shared/components/panel/view-fields/TagFieldView.jsx b/ui/shared/components/panel/view-fields/TagFieldView.jsx index ab492b93e1..3fc7b916e3 100644 --- a/ui/shared/components/panel/view-fields/TagFieldView.jsx +++ b/ui/shared/components/panel/view-fields/TagFieldView.jsx @@ -45,6 +45,7 @@ const LIST_FORMAT_PROPS = { const METADATA_FIELD_PROPS = { [NOTES_METADATA_TITLE]: { width: 16, maxLength: 50, placeholder: 'Enter up to 50 characters' }, + Name: { width: 16, maxLength: 100, placeholder: 'Enter up to 100 characters' }, Reason: { width: 16, maxLength: 50, placeholder: 'Brief reason for excluding. Enter up to 50 characters' }, 'Test Type(s)': { width: 16, From 34a80c6e1854ae891806bbdbe110c203d9ccb815 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 13 Aug 2024 14:41:30 -0400 Subject: [PATCH 584/736] pull validated name for reports --- seqr/views/utils/anvil_metadata_utils.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index 27a07730ce..b11c7346f9 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -267,7 +267,10 @@ def _get_nested_variant_name(v): def _get_sv_name(variant_json, pop_sv_name=True): + validated_sv_name = variant_json.pop('validated_sv_name', None) sv_name = variant_json.pop('svName', None) if pop_sv_name else variant_json.get('svName') + if validated_sv_name: + return validated_sv_name[0] if variant_json.get('svType'): return sv_name or '{svType}:chr{chrom}:{pos}-{end}'.format(**variant_json) return None @@ -324,13 +327,18 @@ def _get_parsed_saved_discovery_variants_by_family( ): tag_types = VariantTagType.objects.filter(project__isnull=True, category=DISCOVERY_CATEGORY) - project_saved_variants = SavedVariant.objects.filter( - varianttag__variant_tag_type__in=tag_types, family__id__in=families, - **({} if include_svs else {'alt__isnull': False}), - ).order_by('created_date').distinct().annotate( + annotations = dict( tags=ArrayAgg('varianttag__variant_tag_type__name', distinct=True), partial_hpo_terms=ArrayAgg('variantfunctionaldata__metadata', distinct=True, filter=Q(variantfunctionaldata__functional_data_tag='Partial Phenotype Contribution')), ) + if include_svs: + annotations['validated_sv_name'] = ArrayAgg('variantfunctionaldata__metadata', distinct=True, filter=Q(variantfunctionaldata__functional_data_tag='Validated Name')) + variant_attr_fields = ['validated_sv_name'] + (variant_attr_fields or []) + + project_saved_variants = SavedVariant.objects.filter( + varianttag__variant_tag_type__in=tag_types, family__id__in=families, + **({} if include_svs else {'alt__isnull': False}), + ).order_by('created_date').distinct().annotate(**annotations) variants = [] gene_ids = set() From 570b7af83cc1c752d66d441777c48fee10212265 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 13 Aug 2024 14:54:47 -0400 Subject: [PATCH 585/736] update tests --- seqr/fixtures/report_variants.json | 13 +++++++++++++ seqr/views/apis/report_api_tests.py | 4 ++-- seqr/views/apis/summary_data_api_tests.py | 2 +- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/seqr/fixtures/report_variants.json b/seqr/fixtures/report_variants.json index aaa821eceb..e0722385b4 100644 --- a/seqr/fixtures/report_variants.json +++ b/seqr/fixtures/report_variants.json @@ -168,5 +168,18 @@ "functional_data_tag": "Partial Phenotype Contribution", "metadata": "Uncertain" } +}, +{ + "model": "seqr.variantfunctionaldata", + "pk": 31, + "fields": { + "guid": "VFD0000031_prefix_19107_DEL_r0", + "created_date": "2018-07-24T15:34:01.353Z", + "created_by": null, + "last_modified_date": "2024-07-24T15:34:01.365Z", + "saved_variants": [7], + "functional_data_tag": "Validated Name", + "metadata": "DEL:chr1:249045123-249045456" + } } ] diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index b236343669..4652f435fe 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -1195,7 +1195,7 @@ def test_family_metadata(self): 'other_individual_ids': 'NA20870; NA20888', 'individual_count': 3, 'family_structure': 'other', - 'genes': 'DEL:chr1:249045487-249045898; OR4G11P', + 'genes': 'DEL:chr1:249045123-249045456; OR4G11P', 'pmid_id': None, 'phenotype_description': None, 'analysisStatus': 'Q', @@ -1409,7 +1409,7 @@ def test_variant_metadata(self): 'internal_project_id': 'Test Reprocessed Project', 'ref': None, 'svType': 'DEL', - 'sv_name': 'DEL:chr1:249045487-249045898', + 'sv_name': 'DEL:chr1:249045123-249045456', 'tags': ['Tier 1 - Novel gene and phenotype'], 'variant_inheritance': 'unknown', 'variant_reference_assembly': 'GRCh37', diff --git a/seqr/views/apis/summary_data_api_tests.py b/seqr/views/apis/summary_data_api_tests.py index cc80bc86fb..adc752cc81 100644 --- a/seqr/views/apis/summary_data_api_tests.py +++ b/seqr/views/apis/summary_data_api_tests.py @@ -50,7 +50,7 @@ "zygosity-2": "Heterozygous", "ref-1": "TC", "svType-2": "DEL", - "sv_name-2": "DEL:chr1:249045487-249045898", + "sv_name-2": "DEL:chr1:249045123-249045456", "chrom-2": "1", "pos-2": 249045487, 'end-2': 249045898, From cd37d9af498fce930181c27f667e61186b2663d2 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 14 Aug 2024 10:22:42 -0400 Subject: [PATCH 586/736] remove ONT from search backend --- .github/workflows/hail-search-unit-tests.yaml | 1 - hail_search/queries/multi_data_types.py | 5 -- hail_search/queries/ont_snv_indel.py | 31 ------- hail_search/test_search.py | 85 ------------------- 4 files changed, 122 deletions(-) delete mode 100644 hail_search/queries/ont_snv_indel.py diff --git a/.github/workflows/hail-search-unit-tests.yaml b/.github/workflows/hail-search-unit-tests.yaml index 016e4e4382..7539aa5c8a 100644 --- a/.github/workflows/hail-search-unit-tests.yaml +++ b/.github/workflows/hail-search-unit-tests.yaml @@ -28,7 +28,6 @@ jobs: - name: Run coverage tests run: | export DATASETS_DIR=./hail_search/fixtures - export ONT_ENABLED=true export MACHINE_MEM=24 export JAVA_OPTS_XSS=16M coverage run --source="./hail_search" --omit="./hail_search/__main__.py","./hail_search/test_utils.py" -m pytest hail_search/ diff --git a/hail_search/queries/multi_data_types.py b/hail_search/queries/multi_data_types.py index e346cdc8f2..7e519619e1 100644 --- a/hail_search/queries/multi_data_types.py +++ b/hail_search/queries/multi_data_types.py @@ -8,13 +8,8 @@ from hail_search.queries.snv_indel_37 import SnvIndelHailTableQuery37 from hail_search.queries.sv import SvHailTableQuery from hail_search.queries.gcnv import GcnvHailTableQuery -from hail_search.queries.ont_snv_indel import OntSnvIndelHailTableQuery - -ONT_ENABLED = os.environ.get('ONT_ENABLED') QUERY_CLASSES = [SnvIndelHailTableQuery, SnvIndelHailTableQuery37, MitoHailTableQuery, SvHailTableQuery, GcnvHailTableQuery] -if ONT_ENABLED: - QUERY_CLASSES.append(OntSnvIndelHailTableQuery) QUERY_CLASS_MAP = {(cls.DATA_TYPE, cls.GENOME_VERSION): cls for cls in QUERY_CLASSES} SNV_INDEL_DATA_TYPE = SnvIndelHailTableQuery.DATA_TYPE diff --git a/hail_search/queries/ont_snv_indel.py b/hail_search/queries/ont_snv_indel.py deleted file mode 100644 index c84e22736a..0000000000 --- a/hail_search/queries/ont_snv_indel.py +++ /dev/null @@ -1,31 +0,0 @@ -from aiohttp.web import HTTPBadRequest - -from hail_search.constants import EXTENDED_SPLICE_KEY, UTR_ANNOTATOR_KEY, SCREEN_KEY -from hail_search.queries.base import BaseHailTableQuery -from hail_search.queries.snv_indel import SnvIndelHailTableQuery -from hail_search.queries.snv_indel_37 import SnvIndelHailTableQuery37 - - -class OntSnvIndelHailTableQuery(SnvIndelHailTableQuery): - - DATA_TYPE = 'ONT_SNV_INDEL' - - CORE_FIELDS = BaseHailTableQuery.CORE_FIELDS - ANNOTATION_OVERRIDE_FIELDS = SnvIndelHailTableQuery37.ANNOTATION_OVERRIDE_FIELDS + [SCREEN_KEY] - - def _get_loaded_filter_ht(self, *args, **kwargs): - return None - - def _add_project_lookup_data(self, *args, **kwargs): - raise HTTPBadRequest(reason='Variant lookup is not supported for ONT data') - - def _get_allowed_consequence_ids(self, annotations): - return super()._get_allowed_consequence_ids({ - k: v for k, v in annotations.items() if k not in {EXTENDED_SPLICE_KEY, UTR_ANNOTATOR_KEY} - }) - - @staticmethod - def _get_allowed_transcripts_filter(allowed_consequence_ids): - return SnvIndelHailTableQuery37._get_allowed_transcripts_filter( - allowed_consequence_ids.get(SnvIndelHailTableQuery37.TRANSCRIPT_CONSEQUENCE_FIELD) - ) diff --git a/hail_search/test_search.py b/hail_search/test_search.py index 9e0f64033e..429bef701b 100644 --- a/hail_search/test_search.py +++ b/hail_search/test_search.py @@ -271,91 +271,6 @@ async def test_single_family_search(self): await self._assert_expected_search( [GRCH37_VARIANT], genome_version='GRCh37', sample_data=FAMILY_2_VARIANT_SAMPLE_DATA) - await self._assert_expected_search([{ - 'variantId': '1-8403825-CTTTTTTTT-C', - 'xpos': 1008403825, - 'chrom': '1', - 'pos': 8403825, - 'ref': 'CTTTTTTTT', - 'alt': 'C', - 'genomeVersion': '38', - 'liftedOverGenomeVersion': '37', - 'liftedOverChrom': '1', - 'liftedOverPos': 8463885, - 'familyGuids': ['F000002_2'], - 'genotypes': { - 'I000004_hg00731': { - 'sampleId': 'HG00731', 'sampleType': 'WGS', 'individualGuid': 'I000004_hg00731', 'familyGuid': 'F000002_2', - 'numAlt': 1, 'dp': 21, 'gq': 3, 'ab': 0.6190476190476191, - }, 'I000005_hg00732': { - 'sampleId': 'HG00732', 'sampleType': 'WGS', 'individualGuid': 'I000005_hg00732', 'familyGuid': 'F000002_2', - 'numAlt': 0, 'dp': 0, 'gq': 13, 'ab': None, - }, 'I000006_hg00733': { - 'sampleId': 'HG00733', 'sampleType': 'WGS', 'individualGuid': 'I000006_hg00733', 'familyGuid': 'F000002_2', - 'numAlt': -1, 'dp': None, 'gq': 0, 'ab': None, - }, - }, - 'genotypeFilters': 'RefCall', - 'populations': { - 'seqr': {'af': 0.1666666716337204, 'ac': 2, 'an': 12, 'hom': 0}, - 'topmed': {'af': 0.0023385800886899233, 'ac': 619, 'an': 264690, 'hom': 11, 'het': 597}, - 'exac': {'af': 0.0, 'ac': 0, 'an': 0, 'hom': 0, 'hemi': 0, 'het': 0, 'filter_af': 0.0}, - 'gnomad_exomes': {'af': 0.0, 'ac': 0, 'an': 0, 'hom': 0, 'hemi': 0, 'filter_af': 0.0}, - 'gnomad_genomes': {'af': 0.002653343603014946, 'ac': 188, 'an': 70854, 'hom': 2, 'hemi': 0, 'filter_af': 0.00288608786650002}, - }, - 'predictions': { - 'cadd': 0.6510000228881836, 'eigen': None, 'fathmm': None, 'gnomad_noncoding': None, 'mpc': None, - 'mut_pred': None, 'primate_ai': None, 'splice_ai': None, 'splice_ai_consequence': None, 'vest': None, - 'mut_taster': None, 'polyphen': None, 'revel': None, 'sift': None, - }, - 'screenRegionType': None, - 'clinvar': None, - 'hgmd': None, - 'transcripts': { - 'ENSG00000142599': [ - {'aminoAcids': None, 'canonical': 1, 'codons': None, 'geneId': 'ENSG00000142599', - 'hgvsc': 'ENST00000337907.7:c.1284+18894_1284+18901del', 'hgvsp': None, - 'transcriptId': 'ENST00000337907', 'isLofNagnag': None, 'transcriptRank': 0, - 'biotype': 'protein_coding', 'lofFilters': None, 'majorConsequence': 'intron_variant'}, - {'aminoAcids': None, 'canonical': None, 'codons': None, 'geneId': 'ENSG00000142599', - 'hgvsc': 'ENST00000377464.5:c.480+18894_480+18901del', 'hgvsp': None, - 'transcriptId': 'ENST00000377464', 'isLofNagnag': None, 'transcriptRank': 1, - 'biotype': 'protein_coding', 'lofFilters': None, 'majorConsequence': 'intron_variant'}, - {'aminoAcids': None, 'canonical': None, 'codons': None, 'geneId': 'ENSG00000142599', - 'hgvsc': 'ENST00000400907.6:c.1284+18894_1284+18901del', 'hgvsp': None, - 'transcriptId': 'ENST00000400907', 'isLofNagnag': None, 'transcriptRank': 2, - 'biotype': 'protein_coding', 'lofFilters': None, 'majorConsequence': 'intron_variant'}, - {'aminoAcids': None, 'canonical': None, 'codons': None, 'geneId': 'ENSG00000142599', - 'hgvsc': 'ENST00000400908.6:c.1284+18894_1284+18901del', 'hgvsp': None, - 'transcriptId': 'ENST00000400908', 'isLofNagnag': None, 'transcriptRank': 3, - 'biotype': 'protein_coding', 'lofFilters': None, 'majorConsequence': 'intron_variant'}, - {'aminoAcids': None, 'canonical': None, 'codons': None, 'geneId': 'ENSG00000142599', - 'hgvsc': 'ENST00000476556.5:c.-379+18894_-379+18901del', 'hgvsp': None, - 'transcriptId': 'ENST00000476556', 'isLofNagnag': None, 'transcriptRank': 4, - 'biotype': 'protein_coding', 'lofFilters': None, 'majorConsequence': 'intron_variant'}, - {'aminoAcids': None, 'canonical': None, 'codons': None, 'geneId': 'ENSG00000142599', - 'hgvsc': 'ENST00000488215.5:c.-379+18894_-379+18901del', 'hgvsp': None, - 'transcriptId': 'ENST00000488215', 'isLofNagnag': None, 'transcriptRank': 5, - 'biotype': 'protein_coding', 'lofFilters': None, 'majorConsequence': 'intron_variant'}, - {'aminoAcids': None, 'canonical': None, 'codons': None, 'geneId': 'ENSG00000142599', - 'hgvsc': 'ENST00000460659.5:n.334+18894_334+18901del', 'hgvsp': None, - 'transcriptId': 'ENST00000460659', 'isLofNagnag': None, 'transcriptRank': 6, - 'biotype': 'processed_transcript', 'lofFilters': None, 'majorConsequence': 'intron_variant'}, - {'aminoAcids': None, 'canonical': None, 'codons': None, 'geneId': 'ENSG00000142599', - 'hgvsc': 'ENST00000465125.1:n.301+18894_301+18901del', 'hgvsp': None, - 'transcriptId': 'ENST00000465125', 'isLofNagnag': None, 'transcriptRank': 7, - 'biotype': 'processed_transcript', 'lofFilters': None, 'majorConsequence': 'intron_variant'}, - {'aminoAcids': None, 'canonical': None, 'codons': None, 'geneId': 'ENSG00000142599', - 'hgvsc': 'ENST00000492766.5:n.268+18894_268+18901del', 'hgvsp': None, - 'transcriptId': 'ENST00000492766', 'isLofNagnag': None, 'transcriptRank': 8, - 'biotype': 'processed_transcript', 'lofFilters': None, 'majorConsequence': 'intron_variant'}, - ], - }, - 'mainTranscriptId': 'ENST00000337907', - 'selectedMainTranscriptId': None, - '_sort': [1008403825], - }], sample_data={'ONT_SNV_INDEL': FAMILY_2_VARIANT_SAMPLE_DATA['SNV_INDEL']}) - async def test_single_project_search(self): variant_gene_counts = { 'ENSG00000097046': {'total': 3, 'families': {'F000002_2': 2, 'F000003_3': 1}}, From 8aa6377bf0b77040097f656e10dcd2a922641431 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 14 Aug 2024 10:29:29 -0400 Subject: [PATCH 587/736] remove ONT as dataset_type choice --- CHANGELOG.md | 1 + .../reload_saved_variant_annotations_tests.py | 2 +- .../0072_alter_sample_dataset_type.py | 18 ++++++++++++++++++ seqr/models.py | 1 - 4 files changed, 20 insertions(+), 2 deletions(-) create mode 100644 seqr/migrations/0072_alter_sample_dataset_type.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 57532e5186..f3d549f19c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # _seqr_ Changes ## dev +* Remove ONT support (REQUIRES DB MIGRATION) ## 8/9/24 * Update directory structure for search backend diff --git a/seqr/management/tests/reload_saved_variant_annotations_tests.py b/seqr/management/tests/reload_saved_variant_annotations_tests.py index b85231c901..81cdb7ae5c 100644 --- a/seqr/management/tests/reload_saved_variant_annotations_tests.py +++ b/seqr/management/tests/reload_saved_variant_annotations_tests.py @@ -31,7 +31,7 @@ def test_command(self, mock_logger): with self.assertRaises(CommandError) as ce: call_command('reload_saved_variant_annotations', 'SV', 'GRCh37') - self.assertEqual(str(ce.exception), "Error: argument data_type: invalid choice: 'SV' (choose from 'MITO', 'ONT_SNV_INDEL', 'SNV_INDEL', 'SV_WES', 'SV_WGS')") + self.assertEqual(str(ce.exception), "Error: argument data_type: invalid choice: 'SV' (choose from 'MITO', 'SNV_INDEL', 'SV_WES', 'SV_WGS')") # Test success call_command('reload_saved_variant_annotations', 'SNV_INDEL', 'GRCh37') diff --git a/seqr/migrations/0072_alter_sample_dataset_type.py b/seqr/migrations/0072_alter_sample_dataset_type.py new file mode 100644 index 0000000000..15d5b76083 --- /dev/null +++ b/seqr/migrations/0072_alter_sample_dataset_type.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.13 on 2024-08-14 14:25 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('seqr', '0071_igvsample_index_file_path'), + ] + + operations = [ + migrations.AlterField( + model_name='sample', + name='dataset_type', + field=models.CharField(choices=[('SNV_INDEL', 'Variant Calls'), ('SV', 'SV Calls'), ('MITO', 'Mitochondria calls')], max_length=13), + ), + ] diff --git a/seqr/models.py b/seqr/models.py index 56153623b4..2fe045f21c 100644 --- a/seqr/models.py +++ b/seqr/models.py @@ -700,7 +700,6 @@ class Sample(ModelWithGUID): (DATASET_TYPE_VARIANT_CALLS, 'Variant Calls'), (DATASET_TYPE_SV_CALLS, 'SV Calls'), (DATASET_TYPE_MITO_CALLS, 'Mitochondria calls'), - ('ONT_SNV_INDEL', 'ONT Calls'), ) DATASET_TYPE_LOOKUP = dict(DATASET_TYPE_CHOICES) From 5f0bd944e503ba7af00dcaac4b02f316922d958a Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 14 Aug 2024 10:51:30 -0400 Subject: [PATCH 588/736] consistant link to vcf documentation in FAQ --- ui/pages/Public/components/Faq.jsx | 5 +++-- ui/shared/components/panel/LoadWorkspaceDataForm.jsx | 3 +-- ui/shared/utils/constants.js | 2 ++ 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/ui/pages/Public/components/Faq.jsx b/ui/pages/Public/components/Faq.jsx index 5bed4f052c..ea4ad40438 100644 --- a/ui/pages/Public/components/Faq.jsx +++ b/ui/pages/Public/components/Faq.jsx @@ -6,6 +6,7 @@ import { Header, Segment, List, Icon } from 'semantic-ui-react' import { WORKSPACE_REQUIREMENTS } from 'shared/components/panel/LoadWorkspaceDataForm' import { ActiveDisabledNavLink } from 'shared/components/StyledComponents' +import { VCF_DOCUMENTATION_URL } from 'shared/utils/constants' import { SeqrAvailability } from './LandingPage' const ENGLISH = 'en' @@ -193,7 +194,7 @@ const FAQS = [ regulatory approval is required to use seqr to analyze data stored on Terra or AnVIL.
To learn more about generating a joint called vcf, please refer to this   - + documentation
@@ -232,7 +233,7 @@ const FAQS = [
Para obtener más información sobre cómo generar un conjunto denominado vcf, consulte esta   - documentación + documentación
), }, diff --git a/ui/shared/components/panel/LoadWorkspaceDataForm.jsx b/ui/shared/components/panel/LoadWorkspaceDataForm.jsx index a7c17d57e0..3ea3ed4537 100644 --- a/ui/shared/components/panel/LoadWorkspaceDataForm.jsx +++ b/ui/shared/components/panel/LoadWorkspaceDataForm.jsx @@ -17,6 +17,7 @@ import { INDIVIDUAL_FIELD_SEX, INDIVIDUAL_FIELD_AFFECTED, SAMPLE_TYPE_OPTIONS, + VCF_DOCUMENTATION_URL, } from 'shared/utils/constants' import { validateUploadedFile } from 'shared/components/form/XHRUploaderField' import BulkUploadForm from 'shared/components/form/BulkUploadForm' @@ -28,8 +29,6 @@ import { RECEIVE_DATA } from 'redux/utils/reducerUtils' import { getAnvilLoadingDelayDate } from 'redux/selectors' import AnvilFileSelector from 'shared/components/form/AnvilFileSelector' -const VCF_DOCUMENTATION_URL = 'https://storage.googleapis.com/seqr-reference-data/seqr-vcf-info.pdf' - export const WORKSPACE_REQUIREMENTS = [ '"Writer" or "Owner" level access to the workspace', 'The "Can Share" permission enabled for the workspace', diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js index d302adc7f4..21b632361b 100644 --- a/ui/shared/utils/constants.js +++ b/ui/shared/utils/constants.js @@ -21,6 +21,8 @@ export const ANVIL_URL = 'https://anvil.terra.bio' export const GOOGLE_LOGIN_URL = '/login/google-oauth2' export const LOCAL_LOGIN_URL = '/login' +export const VCF_DOCUMENTATION_URL = 'https://storage.googleapis.com/seqr-reference-data/seqr-vcf-info.pdf' + export const GENOME_VERSION_37 = '37' export const GENOME_VERSION_38 = '38' export const GENOME_VERSION_OPTIONS = [ From 79accf9cd03fbb4f85de7e6bfa55673b9186e26b Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 14 Aug 2024 12:07:43 -0400 Subject: [PATCH 589/736] show sort when no search applied --- ui/pages/Report/components/CustomSearch.jsx | 1 + .../Search/components/VariantSearchForm.jsx | 4 +- ui/redux/rootReducer.js | 4 ++ .../panel/search/SearchDisplayForm.jsx | 60 +++++++++++++++++++ .../search/VariantSearchFormContainer.jsx | 15 +++-- .../panel/search/VariantSearchResults.jsx | 46 ++------------ 6 files changed, 84 insertions(+), 46 deletions(-) create mode 100644 ui/shared/components/panel/search/SearchDisplayForm.jsx diff --git a/ui/pages/Report/components/CustomSearch.jsx b/ui/pages/Report/components/CustomSearch.jsx index a3899dd733..9366267021 100644 --- a/ui/pages/Report/components/CustomSearch.jsx +++ b/ui/pages/Report/components/CustomSearch.jsx @@ -122,6 +122,7 @@ const CustomSearch = React.memo(({ match, history, loadContext, loading, searchP history={history} resultsPath="/report/custom_search" initialValues={searchParams} + match={match} > {configuredField(INCLUDE_ALL_PROJECTS_FIELD)} diff --git a/ui/pages/Search/components/VariantSearchForm.jsx b/ui/pages/Search/components/VariantSearchForm.jsx index 32025211b5..a88d8194f2 100644 --- a/ui/pages/Search/components/VariantSearchForm.jsx +++ b/ui/pages/Search/components/VariantSearchForm.jsx @@ -8,12 +8,13 @@ import { SaveSearchButton } from './SavedSearch' import VariantSearchFormContent from './VariantSearchFormContent' import { getIntitialSearch, getMultiProjectFamilies } from '../selectors' -const VariantSearchForm = React.memo(({ history, initialSearch, contentLoading, noEditProjects }) => ( +const VariantSearchForm = React.memo(({ history, initialSearch, contentLoading, noEditProjects, match }) => (
@@ -23,6 +24,7 @@ const VariantSearchForm = React.memo(({ history, initialSearch, contentLoading, VariantSearchForm.propTypes = { history: PropTypes.object, + match: PropTypes.object, initialSearch: PropTypes.object, contentLoading: PropTypes.bool, noEditProjects: PropTypes.bool, diff --git a/ui/redux/rootReducer.js b/ui/redux/rootReducer.js index 8053d84340..2bf6a8aae6 100644 --- a/ui/redux/rootReducer.js +++ b/ui/redux/rootReducer.js @@ -196,6 +196,10 @@ export const navigateSavedHashedSearch = (search, navigateSearch, resultsPath, h }) } +export const updateSearchSort = updates => (dispatch) => { + dispatch({ type: UPDATE_SEARCHED_VARIANT_DISPLAY, updates }) +} + export const loadSearchedVariants = ( { searchHash }, { displayUpdates, queryParams, updateQueryParams }, ) => (dispatch, getState) => { diff --git a/ui/shared/components/panel/search/SearchDisplayForm.jsx b/ui/shared/components/panel/search/SearchDisplayForm.jsx new file mode 100644 index 0000000000..612ab3eb56 --- /dev/null +++ b/ui/shared/components/panel/search/SearchDisplayForm.jsx @@ -0,0 +1,60 @@ +import React from 'react' +import PropTypes from 'prop-types' +import { connect } from 'react-redux' + +import { loadSearchedVariants, updateSearchSort } from 'redux/rootReducer' +import { + getTotalVariantsCount, + getVariantSearchDisplay, +} from 'redux/selectors' +import { VARIANT_SEARCH_SORT_FIELD, VARIANT_PAGINATION_FIELD } from '../../../utils/constants' +import FormWrapper from '../../form/FormWrapper' + +const FIELDS = [ + VARIANT_SEARCH_SORT_FIELD, +] + +const SearchDisplayForm = React.memo(({ + variantSearchDisplay, onSubmit, totalVariantsCount, formLocation, paginationOnly, +}) => { + const { recordsPerPage } = variantSearchDisplay + const paginationFields = (totalVariantsCount || 0) > recordsPerPage ? + [{ ...VARIANT_PAGINATION_FIELD, totalPages: Math.ceil(totalVariantsCount / recordsPerPage) }] : [] + const fields = paginationOnly ? paginationFields : [...FIELDS, ...paginationFields] + + return ( + + ) +}) + +SearchDisplayForm.propTypes = { + formLocation: PropTypes.string, + paginationOnly: PropTypes.bool, + onSubmit: PropTypes.func, + variantSearchDisplay: PropTypes.object, + totalVariantsCount: PropTypes.number, +} + +const mapStateToProps = (state, ownProps) => ({ + variantSearchDisplay: getVariantSearchDisplay(state), + totalVariantsCount: getTotalVariantsCount(state, ownProps), +}) + +const mapDispatchToProps = (dispatch, ownProps) => ({ + onSubmit: updates => ( + ownProps.searchOnSubmit ? dispatch(loadSearchedVariants(ownProps.match.params, { + displayUpdates: updates, + ...ownProps, + })) : dispatch(updateSearchSort(updates)) + ), +}) + +export default connect(mapStateToProps, mapDispatchToProps)(SearchDisplayForm) diff --git a/ui/shared/components/panel/search/VariantSearchFormContainer.jsx b/ui/shared/components/panel/search/VariantSearchFormContainer.jsx index 1fa9ef294c..41f52f86fe 100644 --- a/ui/shared/components/panel/search/VariantSearchFormContainer.jsx +++ b/ui/shared/components/panel/search/VariantSearchFormContainer.jsx @@ -1,6 +1,7 @@ import PropTypes from 'prop-types' import React from 'react' import { connect } from 'react-redux' +import { Segment } from 'semantic-ui-react' import createDecorator from 'final-form-calculate' import { navigateSavedHashedSearch } from 'redux/rootReducer' import { getSearchedVariantsErrorMessage, getSearchedVariantsIsLoading } from 'redux/selectors' @@ -8,6 +9,7 @@ import FormWrapper from 'shared/components/form/FormWrapper' import { toUniqueCsvString } from 'shared/utils/stringUtils' import { LOCUS_LIST_ITEMS_FIELD } from 'shared/utils/constants' +import SearchDisplayForm from './SearchDisplayForm' import { LOCUS_FIELD_NAME, PANEL_APP_FIELD_NAME } from './constants' const DECORATORS = [ @@ -22,9 +24,10 @@ const DECORATORS = [ ] const VariantSearchFormContainer = React.memo(( - { history, onSubmit, resultsPath, loading, variantsLoading, children, ...formProps }, -) => ( + { history, match, onSubmit, resultsPath, loading, variantsLoading, children, ...formProps }, +) => ([ {children} - -)) + , + !match.params.searchHash && ( + + ), +])) VariantSearchFormContainer.propTypes = { children: PropTypes.node, history: PropTypes.object.isRequired, + match: PropTypes.object, onSubmit: PropTypes.func, resultsPath: PropTypes.string, loading: PropTypes.bool, diff --git a/ui/shared/components/panel/search/VariantSearchResults.jsx b/ui/shared/components/panel/search/VariantSearchResults.jsx index e7fefd54e0..2bd36b7dff 100644 --- a/ui/shared/components/panel/search/VariantSearchResults.jsx +++ b/ui/shared/components/panel/search/VariantSearchResults.jsx @@ -13,14 +13,13 @@ import { getVariantSearchDisplay, getSearchedVariantExportConfig, } from 'redux/selectors' -import { VARIANT_SEARCH_SORT_FIELD, VARIANT_PAGINATION_FIELD } from '../../../utils/constants' import DataLoader from '../../DataLoader' import { QueryParamsEditor } from '../../QueryParamEditor' import { HorizontalSpacer } from '../../Spacers' import ExportTableButton from '../../buttons/ExportTableButton' -import FormWrapper from '../../form/FormWrapper' import Variants from '../variants/Variants' import GeneBreakdown from './GeneBreakdown' +import SearchDisplayForm from './SearchDisplayForm' const LargeRow = styled(Grid.Row)` font-size: 1.15em; @@ -32,10 +31,6 @@ const LargeRow = styled(Grid.Row)` const scrollToTop = () => window.scrollTo(0, 0) -const FIELDS = [ - VARIANT_SEARCH_SORT_FIELD, -] - export const DisplayVariants = React.memo(({ displayVariants, compoundHetToggle }) => ( @@ -50,15 +45,12 @@ DisplayVariants.propTypes = { } const BaseVariantSearchResultsContent = React.memo(({ - match, variantSearchDisplay, searchedVariantExportConfig, onSubmit, totalVariantsCount, additionalDisplayEdit, + match, variantSearchDisplay, searchedVariantExportConfig, totalVariantsCount, additionalDisplayEdit, displayVariants, compoundHetToggle, }) => { const { searchHash } = match.params const { page = 1, recordsPerPage } = variantSearchDisplay const variantDisplayPageOffset = (page - 1) * recordsPerPage - const paginationFields = totalVariantsCount > recordsPerPage ? - [{ ...VARIANT_PAGINATION_FIELD, totalPages: Math.ceil(totalVariantsCount / recordsPerPage) }] : [] - const fields = [...FIELDS, ...paginationFields] // eslint-disable-line react-perf/jsx-no-new-array-as-prop return [ @@ -69,15 +61,7 @@ const BaseVariantSearchResultsContent = React.memo(({ {additionalDisplayEdit} - + {searchedVariantExportConfig && 1000} />} @@ -87,15 +71,7 @@ const BaseVariantSearchResultsContent = React.memo(({ , - + @@ -106,7 +82,6 @@ const BaseVariantSearchResultsContent = React.memo(({ BaseVariantSearchResultsContent.propTypes = { match: PropTypes.object, - onSubmit: PropTypes.func, variantSearchDisplay: PropTypes.object, searchedVariantExportConfig: PropTypes.arrayOf(PropTypes.object), totalVariantsCount: PropTypes.number, @@ -123,18 +98,7 @@ const mapContentStateToProps = (state, ownProps) => ({ errorMessage: getSearchedVariantsErrorMessage(state), }) -const mapContentDispatchToProps = (dispatch, ownProps) => ({ - onSubmit: (updates) => { - dispatch(loadSearchedVariants(ownProps.match.params, { - displayUpdates: updates, - ...ownProps, - })) - }, -}) - -const VariantSearchResultsContent = connect( - mapContentStateToProps, mapContentDispatchToProps, -)(BaseVariantSearchResultsContent) +const VariantSearchResultsContent = connect(mapContentStateToProps)(BaseVariantSearchResultsContent) const BaseVariantSearchResults = React.memo(({ match, displayVariants, load, unload, initialLoad, variantsLoading, contextLoading, errorMessage, contentComponent, From 38f2721dff5d0fa43bb59e0d20e1a12d50f020d5 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 14 Aug 2024 12:16:35 -0400 Subject: [PATCH 590/736] show sort dropdown on search error --- .../panel/search/VariantSearchResults.jsx | 26 ++++++++++++++----- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/ui/shared/components/panel/search/VariantSearchResults.jsx b/ui/shared/components/panel/search/VariantSearchResults.jsx index 2bd36b7dff..93ae1e81b8 100644 --- a/ui/shared/components/panel/search/VariantSearchResults.jsx +++ b/ui/shared/components/panel/search/VariantSearchResults.jsx @@ -100,6 +100,24 @@ const mapContentStateToProps = (state, ownProps) => ({ const VariantSearchResultsContent = connect(mapContentStateToProps)(BaseVariantSearchResultsContent) +const ErrorResults = ({ errorMessage, match }) => ([ + + + + + , + + + + + , +]) + +ErrorResults.propTypes = { + errorMessage: PropTypes.string, + match: PropTypes.object, +} + const BaseVariantSearchResults = React.memo(({ match, displayVariants, load, unload, initialLoad, variantsLoading, contextLoading, errorMessage, contentComponent, ...props @@ -112,13 +130,7 @@ const BaseVariantSearchResults = React.memo(({ unload={unload} initialLoad={initialLoad} reloadOnIdUpdate - errorMessage={errorMessage && ( - - - - - - )} + errorMessage={errorMessage && } > {React.createElement(contentComponent || VariantSearchResultsContent, { match, displayVariants, ...props })} From 5bb52e835d72f4c96a7a5de6b56c0d20c4038d67 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Wed, 14 Aug 2024 16:40:02 -0400 Subject: [PATCH 591/736] fix lookup project_samples --- hail_search/queries/mito.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hail_search/queries/mito.py b/hail_search/queries/mito.py index 08c1af4bd6..b7ba456b05 100644 --- a/hail_search/queries/mito.py +++ b/hail_search/queries/mito.py @@ -333,8 +333,8 @@ def _add_project_lookup_data(self, ht, annotation_fields, *args, **kwargs): sample_type = 'WES' else: sample_type = 'WGS' - for family_guid in families: - families[family_guid] = {sample_type: True} + for family_guid, value in families.items(): + families[family_guid] = {sample_type: value} # Variant can be present in the lookup table with only ref calls, so is still not present in any projects if not variant_projects: From d8b35faa52ca97a1d1dadebc717c719212fdd51a Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 14 Aug 2024 17:31:36 -0400 Subject: [PATCH 592/736] bump changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c046eb4d5e..ebd050ecc4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ # _seqr_ Changes ## dev + +## 8/14/24 * Remove ONT support (REQUIRES DB MIGRATION) * Add "Validated Name" functional tag (REQUIRES DB MIGRATION) From 3bf24c38a9534f04f2399ddd956756cd0f56dd75 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 15 Aug 2024 10:09:26 -0400 Subject: [PATCH 593/736] remove unused kwarg --- hail_search/queries/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 63f7f62782..66b6334c95 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -339,7 +339,7 @@ def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, n_ return ht, comp_het_ht - def import_filtered_table(self, project_samples, num_families, intervals=None, **kwargs): + def import_filtered_table(self, project_samples, num_families, **kwargs): if num_families == 1: family_sample_data = list(project_samples.values())[0] family_guid = list(family_sample_data.keys())[0] From 21a1780b8608fd16584ff0483c56e29e0f95cc6e Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 15 Aug 2024 10:21:30 -0400 Subject: [PATCH 594/736] actually remove deprectaed populations on header freq change --- ui/shared/components/panel/search/FrequencyFilter.jsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ui/shared/components/panel/search/FrequencyFilter.jsx b/ui/shared/components/panel/search/FrequencyFilter.jsx index 695aa0fcff..f759a7cbe4 100644 --- a/ui/shared/components/panel/search/FrequencyFilter.jsx +++ b/ui/shared/components/panel/search/FrequencyFilter.jsx @@ -147,7 +147,7 @@ const callsetChange = (onChange, initialValues) => val => onChange( const freqChange = (onChange, initialValues) => val => onChange(FREQUENCIES.reduce((acc, { name }) => ({ ...acc, [name]: name !== THIS_CALLSET_FREQUENCY && name !== SV_CALLSET_FREQUENCY ? val : initialValues[name], -}), initialValues || {})) +}), {})) export const HeaderFrequencyFilter = ({ value, onChange, esEnabled, ...props }) => { const { callset, sv_callset: svCallset, ...freqValues } = value || {} From fe9958ca4c5fb6219d7b7ec2fd724a867486d403 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Thu, 15 Aug 2024 11:04:51 -0400 Subject: [PATCH 595/736] lookup sample_type fix (#4304) * lookup sample_type fix * oops --- hail_search/queries/mito.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hail_search/queries/mito.py b/hail_search/queries/mito.py index b7ba456b05..19a52a74fe 100644 --- a/hail_search/queries/mito.py +++ b/hail_search/queries/mito.py @@ -329,9 +329,11 @@ def _add_project_lookup_data(self, ht, annotation_fields, *args, **kwargs): )[0] for project_guid, families in variant_projects.items(): - if os.path.exists(self._get_table_path(f'projects/WES/{project_guid}.ht')): + # Temporarily use try/except to determine sample_type, to be removed when lookup table contains sample_type + try: + hl.read_table(self._get_table_path(f'projects/WES/{project_guid}.ht')) sample_type = 'WES' - else: + except Exception: sample_type = 'WGS' for family_guid, value in families.items(): families[family_guid] = {sample_type: value} From 2aeffc57982d5c0320453a7495af9afb5a770965 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 15 Aug 2024 11:23:28 -0400 Subject: [PATCH 596/736] add M chromosome as option for manual variants --- ui/pages/Project/components/CreateVariantButton.jsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ui/pages/Project/components/CreateVariantButton.jsx b/ui/pages/Project/components/CreateVariantButton.jsx index b0208c856e..bdd6a8e266 100644 --- a/ui/pages/Project/components/CreateVariantButton.jsx +++ b/ui/pages/Project/components/CreateVariantButton.jsx @@ -16,7 +16,7 @@ import { TAG_FORM_FIELD, TAG_FIELD_NAME } from '../constants' import { getTaggedVariantsByFamilyType, getProjectTagTypeOptions, getCurrentProject } from '../selectors' import SelectSavedVariantsTable, { VARIANT_POS_COLUMN, TAG_COLUMN, GENES_COLUMN } from './SelectSavedVariantsTable' -const CHROMOSOMES = [...Array(23).keys(), 'X', 'Y'].map(val => val.toString()).splice(1) +const CHROMOSOMES = [...Array(23).keys(), 'X', 'Y', 'M'].map(val => val.toString()).splice(1) const ZYGOSITY_OPTIONS = [{ value: 0, name: 'Hom Ref' }, { value: 1, name: 'Het' }, { value: 2, name: 'Hom Alt' }] const SV_FIELD_NAME = 'svName' From 4d81ace92df5944ad9e3c7395f915bd0a1571736 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 15 Aug 2024 11:41:37 -0400 Subject: [PATCH 597/736] lower step size fo SV GQ --- ui/shared/components/panel/search/constants.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ui/shared/components/panel/search/constants.js b/ui/shared/components/panel/search/constants.js index cd3ed3cafe..a045a020bb 100644 --- a/ui/shared/components/panel/search/constants.js +++ b/ui/shared/components/panel/search/constants.js @@ -638,7 +638,7 @@ export const SV_QUALITY_FILTER_FIELDS = [ labelHelp: 'The genotype quality (GQ) represents the quality of a Structural Variant call. Recommended SV-GQ cutoffs for filtering: > 10.', min: 0, max: 100, - step: 10, + step: 5, }, ] From 014153dd9e32a7dbc6cd33deab921e0937d3799d Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 15 Aug 2024 11:58:45 -0400 Subject: [PATCH 598/736] add current saved searches fixture --- seqr/fixtures/variant_searches.json | 463 ++++++++++++++++++++++------ 1 file changed, 363 insertions(+), 100 deletions(-) diff --git a/seqr/fixtures/variant_searches.json b/seqr/fixtures/variant_searches.json index 92d66e9b25..cc8c652a69 100644 --- a/seqr/fixtures/variant_searches.json +++ b/seqr/fixtures/variant_searches.json @@ -1,49 +1,47 @@ [ { "model": "seqr.variantsearch", - "pk": 1, + "pk": 79516, "fields": { - "guid": "VS0000001_de_novo_dominant_res", - "name": "De Novo/ Dominant Restrictive", + "guid": "VS0079516_", + "created_date": "2022-02-04T20:49:42Z", + "created_by": null, + "last_modified_date": "2024-04-01T16:11:45.701Z", + "name": "De Novo/Dominant Restrictive", + "order": 1.0, "search": { - "qualityFilter": { - "vcf_filter": "pass", - "min_ab": 20, - "min_gq": 20 - }, - "pathogenicity": { - "hgmd": [ - "disease_causing" - ], - "clinvar": [ - "pathogenic", - "likely_pathogenic" - ] - }, "freqs": { "g1k": { "ac": null, - "af": 0.001 + "af": 1 }, - "gnomad_genomes": { + "exac": { "ac": null, - "af": 0.001 + "af": 1 }, - "gnomad_exomes": { + "topmed": { + "ac": null, + "af": 1 + }, + "callset": { + "ac": null, + "af": 0.01 + }, + "gnomad_svs": { "ac": null, "af": 0.001 }, - "exac": { + "sv_callset": { "ac": null, "af": 0.001 }, - "topmed": { + "gnomad_exomes": { "ac": null, "af": 0.001 }, - "callset": { + "gnomad_genomes": { "ac": null, - "af": 0.01 + "af": 0.001 } }, "annotations": { @@ -51,12 +49,6 @@ "inframe_insertion", "inframe_deletion" ], - "nonsense": [ - "stop_gained" - ], - "frameshift": [ - "frameshift_variant" - ], "missense": [ "stop_lost", "initiator_codon_variant", @@ -64,35 +56,32 @@ "protein_altering_variant", "missense_variant" ], - "extended_splice_site": [ - "splice_region_variant" + "nonsense": [ + "stop_gained" ], + "splice_ai": "0.2", + "frameshift": [ + "frameshift_variant" + ], + "structural": [], + "extended_splice_site": [], "essential_splice_site": [ "splice_donor_variant", "splice_acceptor_variant" + ], + "structural_consequence": [ + "LOF", + "INV_SPAN", + "COPY_GAIN" ] }, + "datasetType": "VARIANTS", "inheritance": { + "mode": "de_novo", "filter": { "A": "has_alt", "N": "ref_ref" - }, - "mode": "de_novo" - } - } - } -}, -{ - "model": "seqr.variantsearch", - "pk": 2, - "fields": { - "guid": "VS0000002_recessive_restrictiv", - "name": "Recessive Restrictive", - "search": { - "qualityFilter": { - "vcf_filter": "pass", - "min_ab": 20, - "min_gq": 20 + } }, "pathogenicity": { "hgmd": [ @@ -103,43 +92,66 @@ "likely_pathogenic" ] }, + "qualityFilter": { + "min_ab": 20, + "min_gq": 30, + "min_qs": 50, + "vcf_filter": "pass" + } + } + } +}, +{ + "model": "seqr.variantsearch", + "pk": 79525, + "fields": { + "guid": "VS0079525_", + "created_date": "2022-02-04T21:28:12Z", + "created_by": null, + "last_modified_date": "2024-05-03T18:21:08.983Z", + "name": "Recessive Restrictive", + "order": 2.0, + "search": { "freqs": { "g1k": { "ac": null, - "af": 0.01 + "af": 1 }, - "gnomad_genomes": { + "exac": { "ac": null, - "af": 0.01 + "af": 1 }, - "gnomad_exomes": { + "topmed": { + "ac": null, + "af": 1 + }, + "callset": { + "ac": null, + "af": 0.03 + }, + "gnomad_svs": { "ac": null, "af": 0.01 }, - "exac": { + "sv_callset": { "ac": null, "af": 0.01 }, - "topmed": { + "gnomad_exomes": { "ac": null, "af": 0.01 }, - "callset": { + "gnomad_genomes": { "ac": null, - "af": 0.03 + "af": 0.01 } }, "annotations": { + "other": [], "in_frame": [ "inframe_insertion", "inframe_deletion" ], - "nonsense": [ - "stop_gained" - ], - "frameshift": [ - "frameshift_variant" - ], "missense": [ "stop_lost", "initiator_codon_variant", @@ -147,35 +159,191 @@ "protein_altering_variant", "missense_variant" ], - "extended_splice_site": [ - "splice_region_variant" + "nonsense": [ + "stop_gained" + ], + "splice_ai": "0.2", + "frameshift": [ + "frameshift_variant" ], + "structural": [], + "synonymous": [], + "extended_splice_site": [], "essential_splice_site": [ "splice_donor_variant", "splice_acceptor_variant" + ], + "structural_consequence": [ + "LOF", + "INV_SPAN" ] }, "inheritance": { + "mode": "recessive", "filter": { "A": null, "N": null }, - "mode": "recessive" + "annotationSecondary": true + }, + "pathogenicity": { + "hgmd": [ + "disease_causing" + ], + "clinvar": [ + "pathogenic", + "likely_pathogenic" + ] + }, + "qualityFilter": { + "min_ab": 20, + "min_gq": 30, + "min_qs": 50, + "vcf_filter": "pass" + }, + "annotations_secondary": { + "in_frame": [ + "inframe_insertion", + "inframe_deletion" + ], + "missense": [ + "stop_lost", + "initiator_codon_variant", + "start_lost", + "protein_altering_variant", + "missense_variant" + ], + "nonsense": [ + "stop_gained" + ], + "frameshift": [ + "frameshift_variant" + ], + "structural": [], + "extended_splice_site": [], + "essential_splice_site": [ + "splice_donor_variant", + "splice_acceptor_variant" + ], + "structural_consequence": [ + "LOF", + "INV_SPAN" + ] } } } }, { "model": "seqr.variantsearch", - "pk": 3, + "pk": 79517, "fields": { - "guid": "VS0000003_de_novo_dominant_per", - "name": "De Novo/ Dominant Permissive", + "guid": "VS0079517_", + "created_date": "2022-02-04T20:51:58Z", + "created_by": null, + "last_modified_date": "2024-04-01T16:12:23.216Z", + "name": "De Novo/Dominant Permissive", + "order": 3.0, "search": { - "qualityFilter": { - "vcf_filter": null, - "min_ab": 0, - "min_gq": 20 + "freqs": { + "g1k": { + "ac": null, + "af": 1 + }, + "exac": { + "ac": null, + "af": 1 + }, + "topmed": { + "ac": null, + "af": 1 + }, + "callset": { + "ac": null, + "af": 0.01 + }, + "gnomad_svs": { + "ac": null, + "af": 0.001 + }, + "sv_callset": { + "ac": null, + "af": 0.001 + }, + "gnomad_exomes": { + "ac": null, + "af": 0.001 + }, + "gnomad_genomes": { + "ac": null, + "af": 0.001 + } + }, + "annotations": { + "other": [ + "transcript_ablation", + "transcript_amplification", + "5_prime_UTR_variant", + "3_prime_UTR_variant", + "non_coding_exon_variant", + "TFBS_ablation", + "TFBS_amplification", + "TF_binding_site_variant", + "regulatory_region_variant", + "regulatory_region_ablation", + "regulatory_region_amplification" + ], + "in_frame": [ + "inframe_insertion", + "inframe_deletion" + ], + "missense": [ + "stop_lost", + "initiator_codon_variant", + "start_lost", + "protein_altering_variant", + "missense_variant" + ], + "nonsense": [ + "stop_gained" + ], + "splice_ai": "0.1", + "frameshift": [ + "frameshift_variant" + ], + "structural": [ + "gCNV_DEL", + "gCNV_DUP" + ], + "synonymous": [ + "synonymous_variant", + "stop_retained_variant" + ], + "extended_splice_site": [ + "splice_region_variant" + ], + "essential_splice_site": [ + "splice_donor_variant", + "splice_acceptor_variant" + ], + "structural_consequence": [ + "LOF", + "COPY_GAIN", + "DUP_PARTIAL", + "MSV_EXON_OVR", + "INTRONIC", + "INV_SPAN", + "UTR", + "INTERGENIC", + "PROMOTER" + ] + }, + "datasetType": "VARIANTS", + "inheritance": { + "mode": "de_novo", + "filter": { + "A": "has_alt", + "N": "ref_ref" + } }, "pathogenicity": { "hgmd": [ @@ -187,59 +355,139 @@ "vus_or_conflicting" ] }, + "qualityFilter": { + "min_ab": 10, + "min_gq": 30, + "min_qs": 20, + "vcf_filter": null + } + } + } +}, +{ + "model": "seqr.variantsearch", + "pk": 145435, + "fields": { + "guid": "VS0145435_", + "created_date": "2023-11-06T16:31:06Z", + "created_by": null, + "last_modified_date": "2024-05-03T18:21:23.219Z", + "name": "Recessive Permissive", + "order": 4.0, + "search": { "freqs": { "g1k": { "ac": null, - "af": 0.001 + "af": 1 }, - "gnomad_genomes": { + "exac": { "ac": null, - "af": 0.001 + "af": 1 }, - "gnomad_exomes": { + "topmed": { "ac": null, - "af": 0.001 + "af": 1 }, - "exac": { + "callset": { "ac": null, - "af": 0.001 + "af": 0.03 }, - "topmed": { + "gnomad_svs": { "ac": null, - "af": 0.001 + "af": 0.01 }, - "callset": { + "sv_callset": { + "ac": null, + "af": 0.01 + }, + "gnomad_exomes": { + "ac": null, + "af": 0.01 + }, + "gnomad_genomes": { "ac": null, "af": 0.01 } }, "annotations": { + "other": [ + "non_coding_exon_variant" + ], "in_frame": [ "inframe_insertion", "inframe_deletion" ], - "synonymous": [ - "synonymous_variant", - "stop_retained_variant" + "missense": [ + "stop_lost", + "initiator_codon_variant", + "start_lost", + "protein_altering_variant", + "missense_variant" ], "nonsense": [ "stop_gained" ], + "splice_ai": "0.1", "frameshift": [ "frameshift_variant" ], + "structural": [ + "gCNV_DUP", + "gCNV_DEL" + ], + "synonymous": [], + "extended_splice_site": [], + "essential_splice_site": [ + "splice_donor_variant", + "splice_acceptor_variant" + ], + "structural_consequence": [ + "LOF", + "MSV_EXON_OVR", + "INTRAGENIC_EXON_DUP", + "PARTIAL_EXON_DUP" + ] + }, + "inheritance": { + "mode": "recessive", + "filter": { + "A": null, + "N": null + }, + "annotationSecondary": true + }, + "pathogenicity": { + "hgmd": [ + "disease_causing" + ], + "clinvar": [ + "pathogenic", + "likely_pathogenic", + "vus_or_conflicting" + ] + }, + "qualityFilter": { + "min_ab": 10, + "min_gq": 30, + "min_qs": 50 + }, + "annotations_secondary": { "other": [ - "5_prime_UTR_variant", - "3_prime_UTR_variant", - "TF_binding_site_variant", - "non_coding_exon_variant", - "regulatory_region_variant", "transcript_ablation", "transcript_amplification", + "5_prime_UTR_variant", + "3_prime_UTR_variant", "TFBS_ablation", "TFBS_amplification", + "TF_binding_site_variant", + "regulatory_region_variant", "regulatory_region_ablation", - "regulatory_region_amplification" + "regulatory_region_amplification", + "non_coding_transcript_exon_variant__canonical" + ], + "in_frame": [ + "inframe_insertion", + "inframe_deletion" ], "missense": [ "stop_lost", @@ -248,20 +496,35 @@ "protein_altering_variant", "missense_variant" ], + "nonsense": [ + "stop_gained" + ], + "frameshift": [ + "frameshift_variant" + ], + "structural": [ + "gCNV_DEL", + "gCNV_DUP" + ], + "synonymous": [ + "synonymous_variant", + "stop_retained_variant" + ], "extended_splice_site": [ "splice_region_variant" ], "essential_splice_site": [ "splice_donor_variant", "splice_acceptor_variant" + ], + "structural_consequence": [ + "LOF", + "INTRONIC", + "UTR", + "PROMOTER", + "INTRAGENIC_EXON_DUP", + "PARTIAL_EXON_DUP" ] - }, - "inheritance": { - "filter": { - "A": "has_alt", - "N": "ref_ref" - }, - "mode": "de_novo" } } } From b9a0b115803a2e85fb9f753819216c4fba1e5652 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 15 Aug 2024 11:59:21 -0400 Subject: [PATCH 599/736] always update to latest searches on docker startup --- deploy/docker/seqr/entrypoint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/docker/seqr/entrypoint.sh b/deploy/docker/seqr/entrypoint.sh index 20ff4286cd..bfc5596274 100755 --- a/deploy/docker/seqr/entrypoint.sh +++ b/deploy/docker/seqr/entrypoint.sh @@ -46,13 +46,13 @@ if ! psql --host "$POSTGRES_SERVICE_HOSTNAME" -U "$POSTGRES_USERNAME" -l | grep python -u manage.py migrate python -u manage.py migrate --database=reference_data python -u manage.py loaddata variant_tag_types - python -u manage.py loaddata variant_searches python -u manage.py update_all_reference_data --use-cached-omim else # run any pending migrations if the database already exists python -u manage.py migrate python -u manage.py migrate --database=reference_data fi +python -u manage.py loaddata variant_searches python -u manage.py check From e7452fd1b7eae75968e562ca6acdb5b3021a7429 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 15 Aug 2024 12:12:57 -0400 Subject: [PATCH 600/736] fix unit tests --- .../tests/reset_cached_search_results_tests.py | 2 +- seqr/views/apis/variant_search_api_tests.py | 10 +++++----- seqr/views/utils/orm_to_json_utils_tests.py | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/seqr/management/tests/reset_cached_search_results_tests.py b/seqr/management/tests/reset_cached_search_results_tests.py index 509d77be23..432ab2df9a 100644 --- a/seqr/management/tests/reset_cached_search_results_tests.py +++ b/seqr/management/tests/reset_cached_search_results_tests.py @@ -15,7 +15,7 @@ class ResetCachedSearchResultsTest(TestCase): @classmethod def setUpTestData(cls): - result = VariantSearchResults.objects.create(search_hash='abc', variant_search_id=1) + result = VariantSearchResults.objects.create(search_hash='abc', variant_search_id=79516) result.families.set(Family.objects.filter(pk=1)) cls.result_guid = result.guid diff --git a/seqr/views/apis/variant_search_api_tests.py b/seqr/views/apis/variant_search_api_tests.py index 926368947a..d0b714746d 100644 --- a/seqr/views/apis/variant_search_api_tests.py +++ b/seqr/views/apis/variant_search_api_tests.py @@ -132,7 +132,7 @@ EXPECTED_SEARCH_CONTEXT_RESPONSE = { 'savedSearchesByGuid': { - 'VS0000001_de_novo_dominant_res': mock.ANY, 'VS0000002_recessive_restrictiv': mock.ANY, 'VS0000003_de_novo_dominant_per': mock.ANY, + 'VS0079516_': mock.ANY, 'VS0079525_': mock.ANY, 'VS0079517_': mock.ANY, 'VS0145435_': mock.ANY, }, 'projectsByGuid': {PROJECT_GUID: mock.ANY}, 'familiesByGuid': mock.ANY, @@ -700,7 +700,7 @@ def test_search_context(self): expected_response['projectsByGuid']['R0003_test'] = mock.ANY self.assertSetEqual(set(response_json), set(expected_response)) self.assertDictEqual(response_json, expected_response) - self.assertEqual(len(response_json['savedSearchesByGuid']), 3) + self.assertEqual(len(response_json['savedSearchesByGuid']), 4) self.assertSetEqual(set(response_json['projectsByGuid'][PROJECT_GUID].keys()), PROJECT_CONTEXT_FIELDS) self.assertSetEqual(set(response_json['projectsByGuid'][PROJECT_GUID]['datasetTypes']), {'SNV_INDEL', 'SV', 'MITO'}) self.assertSetEqual(set(response_json['projectsByGuid']['R0003_test']['datasetTypes']), {'SNV_INDEL'}) @@ -936,7 +936,7 @@ def test_saved_search(self): response = self.client.get(get_saved_search_url) self.assertEqual(response.status_code, 200) - self.assertEqual(len(response.json()['savedSearchesByGuid']), 3) + self.assertEqual(len(response.json()['savedSearchesByGuid']), 4) create_saved_search_url = reverse(create_saved_search_handler) @@ -971,7 +971,7 @@ def test_saved_search(self): response = self.client.get(get_saved_search_url) self.assertEqual(response.status_code, 200) - self.assertEqual(len(response.json()['savedSearchesByGuid']), 4) + self.assertEqual(len(response.json()['savedSearchesByGuid']), 5) # Test cannot save different searches with the same name body['filters'] = {'test': 'filter'} @@ -1001,7 +1001,7 @@ def test_saved_search(self): response = self.client.get(get_saved_search_url) self.assertEqual(response.status_code, 200) - self.assertEqual(len(response.json()['savedSearchesByGuid']), 3) + self.assertEqual(len(response.json()['savedSearchesByGuid']), 4) global_saved_search_guid = next(iter(response.json()['savedSearchesByGuid'])) diff --git a/seqr/views/utils/orm_to_json_utils_tests.py b/seqr/views/utils/orm_to_json_utils_tests.py index 3b5e009db9..a9e09e6d15 100644 --- a/seqr/views/utils/orm_to_json_utils_tests.py +++ b/seqr/views/utils/orm_to_json_utils_tests.py @@ -178,7 +178,7 @@ def test_json_for_variant_note(self): self.assertSetEqual(set(json.keys()), fields) def test_json_for_saved_search(self): - searches = VariantSearch.objects.filter(id=1) + searches = VariantSearch.objects.filter(name='De Novo/Dominant Restrictive') user = User.objects.get(username='test_user') json = get_json_for_saved_searches(searches, user)[0] From cef07930824788b4f00636ecf19393877d5b00a6 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 15 Aug 2024 12:27:36 -0400 Subject: [PATCH 601/736] update saved search criteria --- seqr/fixtures/variant_searches.json | 64 +++++++++++------------------ 1 file changed, 23 insertions(+), 41 deletions(-) diff --git a/seqr/fixtures/variant_searches.json b/seqr/fixtures/variant_searches.json index cc8c652a69..11b05577df 100644 --- a/seqr/fixtures/variant_searches.json +++ b/seqr/fixtures/variant_searches.json @@ -11,14 +11,6 @@ "order": 1.0, "search": { "freqs": { - "g1k": { - "ac": null, - "af": 1 - }, - "exac": { - "ac": null, - "af": 1 - }, "topmed": { "ac": null, "af": 1 @@ -69,13 +61,15 @@ "splice_donor_variant", "splice_acceptor_variant" ], + "other": [ + "non_coding_transcript_exon_variant__canonical" + ], "structural_consequence": [ "LOF", - "INV_SPAN", + "INTRAGENIC_EXON_DUP", "COPY_GAIN" ] }, - "datasetType": "VARIANTS", "inheritance": { "mode": "de_novo", "filter": { @@ -96,6 +90,7 @@ "min_ab": 20, "min_gq": 30, "min_qs": 50, + "min_gq_sv": 5, "vcf_filter": "pass" } } @@ -113,14 +108,6 @@ "order": 2.0, "search": { "freqs": { - "g1k": { - "ac": null, - "af": 1 - }, - "exac": { - "ac": null, - "af": 1 - }, "topmed": { "ac": null, "af": 1 @@ -139,11 +126,13 @@ }, "gnomad_exomes": { "ac": null, - "af": 0.01 + "af": 0.01, + "hh": 5 }, "gnomad_genomes": { "ac": null, - "af": 0.01 + "af": 0.01, + "hh": 5 } }, "annotations": { @@ -175,7 +164,7 @@ ], "structural_consequence": [ "LOF", - "INV_SPAN" + "INTRAGENIC_EXON_DUP" ] }, "inheritance": { @@ -199,6 +188,7 @@ "min_ab": 20, "min_gq": 30, "min_qs": 50, + "min_gq_sv": 5, "vcf_filter": "pass" }, "annotations_secondary": { @@ -227,7 +217,7 @@ ], "structural_consequence": [ "LOF", - "INV_SPAN" + "INTRAGENIC_EXON_DUP" ] } } @@ -245,14 +235,6 @@ "order": 3.0, "search": { "freqs": { - "g1k": { - "ac": null, - "af": 1 - }, - "exac": { - "ac": null, - "af": 1 - }, "topmed": { "ac": null, "af": 1 @@ -334,10 +316,12 @@ "INV_SPAN", "UTR", "INTERGENIC", + "INTRAGENIC_EXON_DUP", + "PARTIAL_EXON_DUP", + "BREAKEND_EXONIC", "PROMOTER" ] }, - "datasetType": "VARIANTS", "inheritance": { "mode": "de_novo", "filter": { @@ -376,14 +360,6 @@ "order": 4.0, "search": { "freqs": { - "g1k": { - "ac": null, - "af": 1 - }, - "exac": { - "ac": null, - "af": 1 - }, "topmed": { "ac": null, "af": 1 @@ -402,11 +378,13 @@ }, "gnomad_exomes": { "ac": null, - "af": 0.01 + "af": 0.01, + "hh": 5 }, "gnomad_genomes": { "ac": null, - "af": 0.01 + "af": 0.01, + "hh": 5 } }, "annotations": { @@ -445,6 +423,8 @@ "LOF", "MSV_EXON_OVR", "INTRAGENIC_EXON_DUP", + "INV_SPAN", + "BREAKEND_EXONIC", "PARTIAL_EXON_DUP" ] }, @@ -523,6 +503,8 @@ "UTR", "PROMOTER", "INTRAGENIC_EXON_DUP", + "INV_SPAN", + "BREAKEND_EXONIC", "PARTIAL_EXON_DUP" ] } From 1b0669fb97ec82459a4539218217e3adcd93f7f3 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Thu, 15 Aug 2024 12:38:23 -0400 Subject: [PATCH 602/736] use_ssd_dir (#4307) --- hail_search/queries/mito.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hail_search/queries/mito.py b/hail_search/queries/mito.py index 19a52a74fe..cad6a0b3d7 100644 --- a/hail_search/queries/mito.py +++ b/hail_search/queries/mito.py @@ -331,7 +331,7 @@ def _add_project_lookup_data(self, ht, annotation_fields, *args, **kwargs): for project_guid, families in variant_projects.items(): # Temporarily use try/except to determine sample_type, to be removed when lookup table contains sample_type try: - hl.read_table(self._get_table_path(f'projects/WES/{project_guid}.ht')) + hl.read_table(self._get_table_path(f'projects/WES/{project_guid}.ht', use_ssd_dir=True)) sample_type = 'WES' except Exception: sample_type = 'WGS' From 41385c7b28acf6a0d171a91cb332ecb83592cadb Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Thu, 15 Aug 2024 13:30:40 -0400 Subject: [PATCH 603/736] Dev (#4309) * lookup sample_type fix (#4304) * lookup sample_type fix * oops * use_ssd_dir (#4307) --- hail_search/queries/mito.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hail_search/queries/mito.py b/hail_search/queries/mito.py index b7ba456b05..cad6a0b3d7 100644 --- a/hail_search/queries/mito.py +++ b/hail_search/queries/mito.py @@ -329,9 +329,11 @@ def _add_project_lookup_data(self, ht, annotation_fields, *args, **kwargs): )[0] for project_guid, families in variant_projects.items(): - if os.path.exists(self._get_table_path(f'projects/WES/{project_guid}.ht')): + # Temporarily use try/except to determine sample_type, to be removed when lookup table contains sample_type + try: + hl.read_table(self._get_table_path(f'projects/WES/{project_guid}.ht', use_ssd_dir=True)) sample_type = 'WES' - else: + except Exception: sample_type = 'WGS' for family_guid, value in families.items(): families[family_guid] = {sample_type: value} From 2da25df0bc7ec17c4c5391b043039563a1b0cdb4 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 15 Aug 2024 16:33:29 -0400 Subject: [PATCH 604/736] include SVs in gregor report --- seqr/views/apis/report_api.py | 1 - seqr/views/utils/anvil_metadata_utils.py | 28 ++++++++++-------------- 2 files changed, 11 insertions(+), 18 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 2f83827ec5..d92a1f7fd3 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -404,7 +404,6 @@ def _add_row(row, family_id, row_type): format_id=_format_gregor_id, get_additional_individual_fields=_get_participant_row, post_process_variant=_post_process_gregor_variant, - include_svs=False, airtable_fields=[[PARTICIPANT_ID_FIELD, 'Recontactable'], [SMID_FIELD]], include_mondo=True, proband_only_variants=True, diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index b11c7346f9..049e7a8ea2 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -162,7 +162,7 @@ def parse_anvil_metadata( get_additional_sample_fields: Callable[[Sample, dict], dict] = None, get_additional_individual_fields: Callable[[Individual, dict], dict] = None, individual_samples: dict[Individual, Sample] = None, individual_data_types: dict[str, Iterable[str]] = None, - airtable_fields: Iterable[str] = None, mme_value: Aggregate = None, include_svs: bool = True, + airtable_fields: Iterable[str] = None, mme_value: Aggregate = None, variant_json_fields: Iterable[str] = None, variant_attr_fields: Iterable[str] = None, post_process_variant: Callable[[dict, list[dict]], dict] = None, include_no_individual_families: bool = False, omit_airtable: bool = False, include_family_name_display: bool = False, include_family_sample_metadata: bool = False, include_discovery_sample_id: bool = False, include_mondo: bool = False, omit_parent_mnvs: bool = False, @@ -186,7 +186,7 @@ def parse_anvil_metadata( sample_ids.add(sample.sample_id) saved_variants_by_family = _get_parsed_saved_discovery_variants_by_family( - list(family_data_by_id.keys()), bool(mme_value), include_svs, variant_json_fields, variant_attr_fields, + list(family_data_by_id.keys()), bool(mme_value), variant_json_fields, variant_attr_fields, ) condition_map = _get_condition_map(family_data_by_id.values()) @@ -322,7 +322,7 @@ def _get_discovery_notes(variant, gene_variants, omit_parent_mnvs): def _get_parsed_saved_discovery_variants_by_family( - families: Iterable[Family], include_metadata: bool, include_svs: dict, variant_json_fields: list[str], + families: Iterable[Family], include_metadata: bool, variant_json_fields: list[str], variant_attr_fields: list[str], ): tag_types = VariantTagType.objects.filter(project__isnull=True, category=DISCOVERY_CATEGORY) @@ -330,14 +330,11 @@ def _get_parsed_saved_discovery_variants_by_family( annotations = dict( tags=ArrayAgg('varianttag__variant_tag_type__name', distinct=True), partial_hpo_terms=ArrayAgg('variantfunctionaldata__metadata', distinct=True, filter=Q(variantfunctionaldata__functional_data_tag='Partial Phenotype Contribution')), + validated_sv_name=ArrayAgg('variantfunctionaldata__metadata', distinct=True, filter=Q(variantfunctionaldata__functional_data_tag='Validated Name')), ) - if include_svs: - annotations['validated_sv_name'] = ArrayAgg('variantfunctionaldata__metadata', distinct=True, filter=Q(variantfunctionaldata__functional_data_tag='Validated Name')) - variant_attr_fields = ['validated_sv_name'] + (variant_attr_fields or []) project_saved_variants = SavedVariant.objects.filter( varianttag__variant_tag_type__in=tag_types, family__id__in=families, - **({} if include_svs else {'alt__isnull': False}), ).order_by('created_date').distinct().annotate(**annotations) variants = [] @@ -356,10 +353,6 @@ def _get_parsed_saved_discovery_variants_by_family( phenotype_contribution = 'Uncertain' partial_hpo_terms = '' - variant_fields = ['genotypes'] - if include_svs: - variant_fields += ['svType', 'svName', 'end'] - parsed_variant = { 'chrom': 'MT' if chrom == 'M' else chrom, 'pos': pos, @@ -370,18 +363,19 @@ def _get_parsed_saved_discovery_variants_by_family( 'phenotype_contribution': phenotype_contribution, 'partial_contribution_explained': partial_hpo_terms.replace(', ', '|'), **{k: _get_transcript_field(k, config, main_transcript) for k, config in TRANSCRIPT_FIELDS.items()}, - **{k: variant_json.get(k) for k in variant_fields + (variant_json_fields or [])}, + **{k: variant_json.get(k) for k in ['genotypes', 'svType', 'svName', 'end'] + (variant_json_fields or [])}, 'ClinGen_allele_ID': variant_json.get('CAID'), - **{k: getattr(variant, k) for k in ['family_id', 'ref', 'alt'] + (variant_attr_fields or [])}, + **{k: getattr(variant, k) for k in ['family_id', 'ref', 'alt', 'validated_sv_name'] + (variant_attr_fields or [])}, } if include_metadata: parsed_variant.update({ 'seqr_chosen_consequence': main_transcript.get('majorConsequence'), }) - if include_svs: - parsed_variant.update({ - 'sv_name': _get_sv_name(parsed_variant), - }) + + # TODO clean up SV name logic + parsed_variant.update({ + 'sv_name': _get_sv_name(parsed_variant), + }) variants.append(parsed_variant) genes_by_id = get_genes(gene_ids) From 4e79f2c7b159475fe4b561be1e5b85548aefdf8d Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 16 Aug 2024 11:16:23 -0400 Subject: [PATCH 605/736] update gregor SV columns --- seqr/views/apis/report_api.py | 19 ++++++++------- seqr/views/utils/anvil_metadata_utils.py | 31 ++++++++---------------- 2 files changed, 20 insertions(+), 30 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index d92a1f7fd3..3509ff2ea2 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -122,7 +122,7 @@ def _add_row(row, family_id, row_type): if row_type == DISCOVERY_ROW_TYPE: missing_gene_rows = [ '{chrom}-{pos}-{ref}-{alt}'.format(**discovery_row) for discovery_row in row - if not (discovery_row.get(GENE_COLUMN) or discovery_row.get('svType'))] + if not (discovery_row.get(GENE_COLUMN) or discovery_row.get('sv_type'))] if missing_gene_rows: raise ErrorsWarningsException( [f'Discovery variant(s) {", ".join(missing_gene_rows)} in family {family_id} have no associated gene']) @@ -135,7 +135,6 @@ def _add_row(row, family_id, row_type): 'Gene_Class': 'gene_known_for_phenotype', 'inheritance_description': 'variant_inheritance', 'variant_genome_build': 'variant_reference_assembly', - 'sv_type': 'svType', 'discovery_notes': 'notes', }.items()}, **discovery_row, @@ -390,10 +389,7 @@ def _add_row(row, family_id, row_type): participant_rows.append({**row, 'consent_code': consent_code}) smids_by_airtable_record_id.update(row[SMID_FIELD] or {}) elif row_type == DISCOVERY_ROW_TYPE and row: - for variant in row: - genetic_findings_rows.append({ - **variant, 'variant_type': 'SNV/INDEL', - }) + genetic_findings_rows.extend(row) parse_anvil_metadata( projects, @@ -582,9 +578,14 @@ def _get_phenotype_row(feature): def _post_process_gregor_variant(row, gene_variants): - return {'linked_variant': next( - v['genetic_findings_id'] for v in gene_variants if v['genetic_findings_id'] != row['genetic_findings_id'] - ) if len(gene_variants) > 1 else None} + sv_name = row.pop('sv_name') + return { + 'hgvs': row.pop('validated_name') or sv_name, + 'linked_variant': next( + v['genetic_findings_id'] for v in gene_variants if v['genetic_findings_id'] != row['genetic_findings_id'] + ) if len(gene_variants) > 1 else None, + 'variant_type': 'SNV/INDEL' if row['alt'] else 'SV', + } def _get_airtable_row(data_type, airtable_metadata): diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index 049e7a8ea2..b9e7b483e9 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -263,17 +263,7 @@ def parse_anvil_metadata( def _get_nested_variant_name(v): - return _get_sv_name(v, pop_sv_name=False) or f"{v['chrom']}-{v['pos']}-{v['ref']}-{v['alt']}" - - -def _get_sv_name(variant_json, pop_sv_name=True): - validated_sv_name = variant_json.pop('validated_sv_name', None) - sv_name = variant_json.pop('svName', None) if pop_sv_name else variant_json.get('svName') - if validated_sv_name: - return validated_sv_name[0] - if variant_json.get('svType'): - return sv_name or '{svType}:chr{chrom}:{pos}-{end}'.format(**variant_json) - return None + return v['sv_name'] or f"{v['chrom']}-{v['pos']}-{v['ref']}-{v['alt']}" def _get_loaded_before_date_project_individual_samples(projects, max_loaded_date): @@ -312,7 +302,7 @@ def _get_discovery_notes(variant, gene_variants, omit_parent_mnvs): parent_mnv = next((v for v in gene_variants if len(v['individual_genotype']) == 1), gene_variants[0]) if parent_mnv['genetic_findings_id'] == variant['genetic_findings_id'] and omit_parent_mnvs: return None - variant_type = 'complex structural' if parent_mnv.get('svType') else 'multinucleotide' + variant_type = 'complex structural' if parent_mnv.get('sv_type') else 'multinucleotide' parent_name = _get_nested_variant_name(parent_mnv) parent_details = [parent_mnv[key] for key in ['hgvsc', 'hgvsp'] if parent_mnv.get(key)] parent = f'{parent_name} ({", ".join(parent_details)})' if parent_details else parent_name @@ -330,7 +320,7 @@ def _get_parsed_saved_discovery_variants_by_family( annotations = dict( tags=ArrayAgg('varianttag__variant_tag_type__name', distinct=True), partial_hpo_terms=ArrayAgg('variantfunctionaldata__metadata', distinct=True, filter=Q(variantfunctionaldata__functional_data_tag='Partial Phenotype Contribution')), - validated_sv_name=ArrayAgg('variantfunctionaldata__metadata', distinct=True, filter=Q(variantfunctionaldata__functional_data_tag='Validated Name')), + validated_name=ArrayAgg('variantfunctionaldata__metadata', distinct=True, filter=Q(variantfunctionaldata__functional_data_tag='Validated Name')), ) project_saved_variants = SavedVariant.objects.filter( @@ -362,20 +352,18 @@ def _get_parsed_saved_discovery_variants_by_family( 'gene_known_for_phenotype': 'Known' if 'Known gene for phenotype' in variant.tags else 'Candidate', 'phenotype_contribution': phenotype_contribution, 'partial_contribution_explained': partial_hpo_terms.replace(', ', '|'), + 'sv_name': (variant_json.get('svName') or '{svType}:chr{chrom}:{pos}-{end}'.format(**variant_json)) if variant_json.get('svType') else None, **{k: _get_transcript_field(k, config, main_transcript) for k, config in TRANSCRIPT_FIELDS.items()}, - **{k: variant_json.get(k) for k in ['genotypes', 'svType', 'svName', 'end'] + (variant_json_fields or [])}, - 'ClinGen_allele_ID': variant_json.get('CAID'), - **{k: getattr(variant, k) for k in ['family_id', 'ref', 'alt', 'validated_sv_name'] + (variant_attr_fields or [])}, + **{k: variant_json.get(k) for k in ['genotypes'] + (variant_json_fields or [])}, + **{k: variant_json.get(field) for k, field in [ + ('ClinGen_allele_ID', 'CAID'), ('sv_type', 'svType'), ('chrom_end', 'endChrom'), ('pos_end', 'end'), + ]}, + **{k: getattr(variant, k) for k in ['family_id', 'ref', 'alt', 'validated_name'] + (variant_attr_fields or [])}, } if include_metadata: parsed_variant.update({ 'seqr_chosen_consequence': main_transcript.get('majorConsequence'), }) - - # TODO clean up SV name logic - parsed_variant.update({ - 'sv_name': _get_sv_name(parsed_variant), - }) variants.append(parsed_variant) genes_by_id = get_genes(gene_ids) @@ -489,6 +477,7 @@ def _get_genetic_findings_rows(rows: list[dict], individual: Individual, family_ HET: 'Heteroplasmy', HOM_ALT: 'Homoplasmy', }[zygosity], + 'copy_number': individual_genotype.get('cn'), 'allele_balance_or_heteroplasmy_percentage': heteroplasmy, 'variant_inheritance': _get_variant_inheritance(individual, genotypes), **row, From d2fa8f124653851631fcfbba2da96c0d19d994bf Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 16 Aug 2024 11:28:54 -0400 Subject: [PATCH 606/736] update manual SV sv_type values --- seqr/views/utils/anvil_metadata_utils.py | 3 ++- .../components/CreateVariantButton.jsx | 14 ++--------- ui/shared/utils/constants.js | 25 +++++++++++-------- 3 files changed, 18 insertions(+), 24 deletions(-) diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index b9e7b483e9..6d5252bcd0 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -467,6 +467,7 @@ def _get_genetic_findings_rows(rows: list[dict], individual: Individual, family_ genotypes = row['genotypes'] individual_genotype = genotypes.get(individual.guid) or {} zygosity = _get_genotype_zygosity(individual_genotype) + copy_number = individual_genotype.get('cn') or -1 if zygosity: heteroplasmy = individual_genotype.get('hl') findings_id = f'{participant_id}_{row["chrom"]}_{row["pos"]}' @@ -477,7 +478,7 @@ def _get_genetic_findings_rows(rows: list[dict], individual: Individual, family_ HET: 'Heteroplasmy', HOM_ALT: 'Homoplasmy', }[zygosity], - 'copy_number': individual_genotype.get('cn'), + 'copy_number': copy_number if copy_number >= 0 else None, 'allele_balance_or_heteroplasmy_percentage': heteroplasmy, 'variant_inheritance': _get_variant_inheritance(individual, genotypes), **row, diff --git a/ui/pages/Project/components/CreateVariantButton.jsx b/ui/pages/Project/components/CreateVariantButton.jsx index b0208c856e..8c20ca3d33 100644 --- a/ui/pages/Project/components/CreateVariantButton.jsx +++ b/ui/pages/Project/components/CreateVariantButton.jsx @@ -10,7 +10,7 @@ import UpdateButton from 'shared/components/buttons/UpdateButton' import { Select, IntegerInput, LargeMultiselect } from 'shared/components/form/Inputs' import { validators, configuredField } from 'shared/components/form/FormHelpers' import { AwesomeBarFormInput } from 'shared/components/page/AwesomeBar' -import { GENOME_VERSION_FIELD } from 'shared/utils/constants' +import { GENOME_VERSION_FIELD, SV_TYPES } from 'shared/utils/constants' import { TAG_FORM_FIELD, TAG_FIELD_NAME } from '../constants' import { getTaggedVariantsByFamilyType, getProjectTagTypeOptions, getCurrentProject } from '../selectors' @@ -131,16 +131,6 @@ const SAVED_VARIANT_FIELD = { control: SavedVariantField, } -const SV_TYPE_OPTIONS = [ - { value: 'DEL', text: 'Deletion' }, - { value: 'DUP', text: 'Duplication' }, - { value: 'Multiallelic CNV' }, - { value: 'Insertion' }, - { value: 'Inversion' }, - { value: 'Complex SVs' }, - { value: 'Other' }, -] - const validateHasTranscriptId = (value, allValues, props, name) => { if (!value) { return undefined @@ -193,7 +183,7 @@ const SV_FIELDS = [ name: 'svType', label: 'SV Type', component: Select, - options: SV_TYPE_OPTIONS, + options: SV_TYPES, validate: validators.required, width: 8, }, diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js index d0eec5f8c0..289e33ece6 100644 --- a/ui/shared/utils/constants.js +++ b/ui/shared/utils/constants.js @@ -728,17 +728,7 @@ export const VEP_GROUP_SV = 'structural' export const VEP_GROUP_SV_CONSEQUENCES = 'structural_consequence' export const VEP_GROUP_SV_NEW = 'new_structural_variants' -const VEP_SV_TYPES = [ - { - description: 'A deletion called from exome data', - text: 'Exome Deletion', - value: 'gCNV_DEL', - }, - { - description: 'A duplication called from exome data', - text: 'Exome Duplication', - value: 'gCNV_DUP', - }, +export const SV_TYPES = [ { description: 'A deletion called from genome data', text: 'Deletion', @@ -780,6 +770,19 @@ const VEP_SV_TYPES = [ value: 'BND', }, ] +const VEP_SV_TYPES = [ + { + description: 'A deletion called from exome data', + text: 'Exome Deletion', + value: 'gCNV_DEL', + }, + { + description: 'A duplication called from exome data', + text: 'Exome Duplication', + value: 'gCNV_DUP', + }, + ...SV_TYPES, +] export const EXTENDED_INTRONIC_DESCRIPTION = "A variant which falls in the first 9 bases of the 5' end of intron or the within the last 9 bases of the 3' end of intron" From 5e9c2f89a85f7901d4ab9be91661ed1a5f52d731 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 16 Aug 2024 13:00:57 -0400 Subject: [PATCH 607/736] use condition gene for Svs --- seqr/views/utils/anvil_metadata_utils.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index 6d5252bcd0..67f8d197f9 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -471,6 +471,8 @@ def _get_genetic_findings_rows(rows: list[dict], individual: Individual, family_ if zygosity: heteroplasmy = individual_genotype.get('hl') findings_id = f'{participant_id}_{row["chrom"]}_{row["pos"]}' + if row['sv_type']: + findings_id += f'_{row["sv_type"]}' parsed_row = { 'genetic_findings_id': findings_id, 'participant_id': participant_id, @@ -587,7 +589,7 @@ def _get_condition_map(families): omim_conditions_by_id_gene = defaultdict(lambda: defaultdict(list)) for omim in Omim.objects.filter(phenotype_mim_number__in=mim_numbers).values( 'phenotype_mim_number', 'phenotype_description', 'phenotype_inheritance', 'chrom', 'start', 'end', - 'gene__gene_id', + 'gene__gene_id', 'gene__gene_symbol', ): omim_conditions_by_id_gene[omim['phenotype_mim_number']][omim['gene__gene_id']].append(omim) @@ -628,6 +630,15 @@ def _update_conditions(family_subject_row, variants, omim_conditions, mondo_cond variant_conditions += omim_conditions[mim_number][gene_id] if set_conditions_for_variants: + if v['sv_type'] and mim_numbers and not variant_conditions: + # For SVs report the gene linked to the condition instead of the annotated gene if conflicting + possible_gene_conditions = [ + conditions for mim_number in mim_numbers + for gene_id, conditions in omim_conditions[mim_number].items() if gene_id and conditions + ] + if len(possible_gene_conditions) == 1: + variant_conditions = possible_gene_conditions[0] + v[GENE_COLUMN] = variant_conditions[0]['gene__gene_symbol'] conditions = _format_omim_conditions(variant_conditions) if variant_conditions else mondo_condition v.update(conditions) else: From fbac1759c7350fb4cf8f40f2bc02654b38ff208a Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 16 Aug 2024 13:30:05 -0400 Subject: [PATCH 608/736] gregor syntax fixes --- seqr/views/apis/report_api_tests.py | 4 ++-- seqr/views/utils/anvil_metadata_utils.py | 12 +++++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index 4652f435fe..f0c6550635 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -437,8 +437,8 @@ {'column': 'notes'}, {'column': 'sv_type'}, {'column': 'chrom_end'}, - {'column': 'pos_end'}, - {'column': 'copy_number'}, + {'column': 'pos_end', 'data_type': 'integer'}, + {'column': 'copy_number', 'data_type': 'integer'}, {'column': 'hgvs'}, {'column': 'gene_disease_validity'}, ] diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index 67f8d197f9..635903c6e5 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -336,6 +336,7 @@ def _get_parsed_saved_discovery_variants_by_family( main_transcript = _get_variant_main_transcript(variant) gene_id = main_transcript.get('geneId') gene_ids.add(gene_id) + sv_type = variant_json.get('svType') partial_hpo_terms = variant.partial_hpo_terms[0] if variant.partial_hpo_terms else '' phenotype_contribution = 'Partial' if partial_hpo_terms else 'Full' @@ -352,13 +353,14 @@ def _get_parsed_saved_discovery_variants_by_family( 'gene_known_for_phenotype': 'Known' if 'Known gene for phenotype' in variant.tags else 'Candidate', 'phenotype_contribution': phenotype_contribution, 'partial_contribution_explained': partial_hpo_terms.replace(', ', '|'), - 'sv_name': (variant_json.get('svName') or '{svType}:chr{chrom}:{pos}-{end}'.format(**variant_json)) if variant_json.get('svType') else None, + 'sv_type': sv_type, + 'sv_name': (variant_json.get('svName') or '{svType}:chr{chrom}:{pos}-{end}'.format(**variant_json)) if sv_type else None, + 'validated_name': variant.validated_name[0] if variant.validated_name else None, **{k: _get_transcript_field(k, config, main_transcript) for k, config in TRANSCRIPT_FIELDS.items()}, **{k: variant_json.get(k) for k in ['genotypes'] + (variant_json_fields or [])}, - **{k: variant_json.get(field) for k, field in [ - ('ClinGen_allele_ID', 'CAID'), ('sv_type', 'svType'), ('chrom_end', 'endChrom'), ('pos_end', 'end'), - ]}, - **{k: getattr(variant, k) for k in ['family_id', 'ref', 'alt', 'validated_name'] + (variant_attr_fields or [])}, + **{k: variant_json.get(field) if sv_type else None for k, field in [('chrom_end', 'endChrom'), ('pos_end', 'end')]}, + 'ClinGen_allele_ID': variant_json.get('CAID'), + **{k: getattr(variant, k) for k in ['family_id', 'ref', 'alt'] + (variant_attr_fields or [])}, } if include_metadata: parsed_variant.update({ From 32a0ec2e43313eefe47cf8b755617635d8f57bcd Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 16 Aug 2024 14:05:03 -0400 Subject: [PATCH 609/736] fix gregor tests --- seqr/fixtures/1kg_project.json | 1 + seqr/fixtures/reference_data.json | 2 +- seqr/utils/gene_utils_tests.py | 12 ++++++++---- .../search/elasticsearch/es_utils_tests.py | 4 ++-- seqr/utils/search/hail_search_utils_tests.py | 2 +- seqr/views/apis/family_api_tests.py | 2 +- seqr/views/apis/gene_api_tests.py | 13 +++++++++---- seqr/views/apis/report_api_tests.py | 19 ++++++++----------- 8 files changed, 31 insertions(+), 24 deletions(-) diff --git a/seqr/fixtures/1kg_project.json b/seqr/fixtures/1kg_project.json index 3c46e3ad04..b52625b92a 100644 --- a/seqr/fixtures/1kg_project.json +++ b/seqr/fixtures/1kg_project.json @@ -156,6 +156,7 @@ "coded_phenotype": "microcephaly, seizures", "mondo_id": "MONDO:0044976", "post_discovery_mondo_id": "MONDO:0044970", + "post_discovery_omim_numbers": [615123], "case_review_notes": "
internal notes 2
\n
 
", "case_review_summary": "
internal case review summary 2
\n
 
" } diff --git a/seqr/fixtures/reference_data.json b/seqr/fixtures/reference_data.json index 6fd43023c2..5008543959 100644 --- a/seqr/fixtures/reference_data.json +++ b/seqr/fixtures/reference_data.json @@ -1064,7 +1064,7 @@ "model": "reference_data.omim", "pk": 1, "fields": { - "gene": 1, + "gene": 6, "mim_number": 147571, "gene_description": "ISG15 ubiquitin-like modifier", "comments": "", diff --git a/seqr/utils/gene_utils_tests.py b/seqr/utils/gene_utils_tests.py index ad5944ea90..5d9b1ca9ce 100644 --- a/seqr/utils/gene_utils_tests.py +++ b/seqr/utils/gene_utils_tests.py @@ -16,7 +16,7 @@ def test_get_gene(self): self.assertSetEqual(set(json.keys()), GENE_DETAIL_FIELDS) def test_get_genes(self): - gene_ids = {GENE_ID, 'ENSG00000227232'} + gene_ids = {GENE_ID, 'ENSG00000227232', 'ENSG00000240361'} user = User.objects.get(pk=1) json = get_genes(gene_ids) @@ -41,12 +41,16 @@ def test_get_genes(self): self.assertSetEqual( set(gene['constraints'].keys()), {'misZ', 'misZRank', 'pli', 'pliRank', 'louef', 'louefRank', 'totalGenes'}) self.assertSetEqual(set(gene['cnSensitivity'].keys()), {'phi', 'pts'}) - self.assertSetEqual( - set(gene['omimPhenotypes'][0].keys()), - {'mimNumber', 'phenotypeMimNumber', 'phenotypeDescription', 'phenotypeInheritance', 'chrom', 'start', 'end'}) + self.assertListEqual(gene['omimPhenotypes'], []) self.assertSetEqual(set(gene['genCc'].keys()), {'hgncId', 'classifications'}) self.assertSetEqual(set(gene['clinGen'].keys()), {'haploinsufficiency', 'triplosensitivity', 'href'}) + omim_gene = json['ENSG00000240361'] + self.assertSetEqual( + set(omim_gene['omimPhenotypes'][0].keys()), + {'mimNumber', 'phenotypeMimNumber', 'phenotypeDescription', 'phenotypeInheritance', 'chrom', 'start', + 'end'}) + sparse_gene = json['ENSG00000227232'] self.assertIsNone(sparse_gene['primateAi']) self.assertDictEqual(sparse_gene['constraints'], {}) diff --git a/seqr/utils/search/elasticsearch/es_utils_tests.py b/seqr/utils/search/elasticsearch/es_utils_tests.py index 0775a6ab45..16f4f5a8c4 100644 --- a/seqr/utils/search/elasticsearch/es_utils_tests.py +++ b/seqr/utils/search/elasticsearch/es_utils_tests.py @@ -3440,7 +3440,7 @@ def test_sort(self): 'type': 'number', 'script': { 'params': { - 'omim_gene_ids': ['ENSG00000223972', 'ENSG00000135953'] + 'omim_gene_ids': ['ENSG00000240361', 'ENSG00000135953'] }, 'source': mock.ANY, } @@ -3450,7 +3450,7 @@ def test_sort(self): 'type': 'number', 'script': { 'params': { - 'omim_gene_ids': ['ENSG00000223972', 'ENSG00000135953'] + 'omim_gene_ids': ['ENSG00000240361', 'ENSG00000135953'] }, 'source': mock.ANY, } diff --git a/seqr/utils/search/hail_search_utils_tests.py b/seqr/utils/search/hail_search_utils_tests.py index 3818309757..f10c3f98da 100644 --- a/seqr/utils/search/hail_search_utils_tests.py +++ b/seqr/utils/search/hail_search_utils_tests.py @@ -92,7 +92,7 @@ def test_query_variants(self): query_variants(self.results_model, user=self.user, sort='in_omim') self._test_expected_search_call( num_results=2, dataset_type='SNV_INDEL', sample_data={'SNV_INDEL': EXPECTED_SAMPLE_DATA['SNV_INDEL']}, - sort='in_omim', sort_metadata=['ENSG00000223972', 'ENSG00000135953'], + sort='in_omim', sort_metadata=['ENSG00000240361', 'ENSG00000135953'], **VARIANT_ID_SEARCH, ) diff --git a/seqr/views/apis/family_api_tests.py b/seqr/views/apis/family_api_tests.py index ee7324a83e..30c3f59811 100644 --- a/seqr/views/apis/family_api_tests.py +++ b/seqr/views/apis/family_api_tests.py @@ -132,7 +132,7 @@ def test_family_page_data(self): 'chrom': '1', 'start': 11869, 'end': 14409, - 'geneSymbol': 'DDX11L1', + 'geneSymbol': 'OR4G11P', 'mimNumber': 147571, 'phenotypeMimNumber': 616126, 'phenotypeDescription': 'Immunodeficiency 38', diff --git a/seqr/views/apis/gene_api_tests.py b/seqr/views/apis/gene_api_tests.py index 3ddc5db398..4dad494b75 100644 --- a/seqr/views/apis/gene_api_tests.py +++ b/seqr/views/apis/gene_api_tests.py @@ -27,11 +27,11 @@ def test_genes_info(self): url = reverse(genes_info) self.check_require_login(url) - response = self.client.get('{}?geneIds={},ENSG00000269981,foo'.format(url, GENE_ID)) + response = self.client.get('{}?geneIds={},ENSG00000269981,ENSG00000240361,foo'.format(url, GENE_ID)) self.assertEqual(response.status_code, 200) genes = response.json()['genesById'] - self.assertSetEqual(set(genes.keys()), {GENE_ID, 'ENSG00000269981'}) + self.assertSetEqual(set(genes.keys()), {GENE_ID, 'ENSG00000269981', 'ENSG00000240361'}) self.assertSetEqual(set(genes[GENE_ID].keys()), GENE_DETAIL_FIELDS) self.assertDictEqual(genes[GENE_ID], { 'chromGrch37': '1', @@ -54,14 +54,19 @@ def test_genes_info(self): 'geneNames': '', 'geneSymbol': 'DDX11L1', 'mgiMarkerId': None, - 'mimNumber': 147571, + 'mimNumber': None, 'notes': [], - 'omimPhenotypes': [{'mimNumber': 147571, 'phenotypeDescription': 'Immunodeficiency 38', 'phenotypeInheritance': 'Autosomal recessive', 'phenotypeMimNumber': 616126, 'chrom': '1', 'start': 11869, 'end': 14409}], + 'omimPhenotypes': [], 'primateAi': {'percentile25': 0.587214291096, 'percentile75': 0.821286439896}, 'sHet': {'postMean': 0.90576}, 'startGrch37': 11869, 'startGrch38': 11869, }) + self.assertEqual(genes['ENSG00000240361']['mimNumber'], 147571) + self.assertListEqual( + genes['ENSG00000240361']['omimPhenotypes'], + [{'mimNumber': 147571, 'phenotypeDescription': 'Immunodeficiency 38', 'phenotypeInheritance': 'Autosomal recessive', 'phenotypeMimNumber': 616126, 'chrom': '1', 'start': 11869, 'end': 14409}], + ) def test_create_update_and_delete_gene_note(self): diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index f0c6550635..7b44c4a0f3 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -414,8 +414,8 @@ {'column': 'variant_reference_assembly', 'required': True, 'data_type': 'enumeration', 'enumerations': ['GRCh37', 'GRCh38']}, {'column': 'chrom', 'required': True}, {'column': 'pos', 'required': True, 'data_type': 'integer'}, - {'column': 'ref','required': True}, - {'column': 'alt', 'required': True}, + {'column': 'ref','required': 'CONDITIONAL (variant_type = SNV/INDEL, variant_type = RE)'}, + {'column': 'alt', 'required': 'CONDITIONAL (variant_type = SNV/INDEL, variant_type = RE)'}, {'column': 'ClinGen_allele_ID'}, {'column': 'gene_of_interest', 'required': True}, {'column': 'transcript'}, @@ -617,12 +617,13 @@ ], [ 'Broad_NA20889_1_248367227', 'Broad_NA20889', '', 'SNV/INDEL', 'GRCh37', '1', '248367227', 'TC', 'T', 'CA1501729', 'OR4G11P', 'ENST00000505820', 'c.3955G>A', 'c.1586-17C>G', 'Heterozygous', '', 'unknown', - 'Broad_NA20889_1_249045487', '', 'Candidate', 'IRIDA syndrome', 'MONDO:0008788', 'Autosomal dominant', + 'Broad_NA20889_1_249045487_DEL', '', 'Candidate', 'Immunodeficiency 38', 'OMIM:616126', 'Autosomal recessive', 'Partial', 'HP:0000501|HP:0000365', '', 'SR-ES', '', '', '', '', '', '', '', ], [ - 'Broad_NA20889_1_249045487', 'Broad_NA20889', '', 'SNV/INDEL', 'GRCh37', '1', '249045487', 'A', 'G', '', + 'Broad_NA20889_1_249045487_DEL', 'Broad_NA20889', '', 'SV', 'GRCh37', '1', '249045487', '', '', '', 'OR4G11P', '', '', '', 'Heterozygous', '', 'unknown', 'Broad_NA20889_1_248367227', '', 'Candidate', - 'IRIDA syndrome', 'MONDO:0008788', 'Autosomal dominant', 'Full', '', '', 'SR-ES', '', '', '', '', '', '', '', + 'Immunodeficiency 38', 'OMIM:616126', 'Autosomal recessive', 'Full', '', '', 'SR-ES', '', 'DEL', '', + '249045898', '1', 'DEL:chr1:249045123-249045456', '', ], ] @@ -936,13 +937,9 @@ def test_gregor_export(self, mock_subprocess, mock_temp_dir, mock_open, mock_dat project.consent_code = 'H' project.save() - # Currently not reporting SV discoveries, so modify fixture data to report comp het pair - # Remove this once we are reporting SVs + # For SV variant, test reports in gene associated with OMIM condition even if not annotated variant = SavedVariant.objects.get(id=7) - variant.ref = 'A' - variant.alt = 'G' - variant.saved_variant_json['genotypes']['I000017_na20889']['numAlt'] = 1 - variant.saved_variant_json['transcripts'] = {'ENSG00000240361': []} + variant.saved_variant_json['transcripts'] = {'ENSG00000135953': []} variant.save() responses.calls.reset() From 50b64a488e8d7dbed22695933078dc1ddfe88e9c Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 16 Aug 2024 14:16:57 -0400 Subject: [PATCH 610/736] update import gregor tests --- seqr/views/apis/individual_api_tests.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/seqr/views/apis/individual_api_tests.py b/seqr/views/apis/individual_api_tests.py index 72f72df621..b9142b17ba 100644 --- a/seqr/views/apis/individual_api_tests.py +++ b/seqr/views/apis/individual_api_tests.py @@ -996,6 +996,11 @@ def test_import_gregor_metadata(self, mock_subprocess): genetic_findings_table = deepcopy(GENETIC_FINDINGS_TABLE) genetic_findings_table[2] = genetic_findings_table[2][:11] + genetic_findings_table[4][11:14] + \ genetic_findings_table[2][14:] + genetic_findings_table.append([ + 'Broad_NA20889_1_249045487', 'Broad_NA20889', '', 'SNV/INDEL', 'GRCh37', '1', '249045487', 'A', 'G', '', + 'OR4G11P', '', '', '', 'Heterozygous', '', 'unknown', 'Broad_NA20889_1_248367227', '', 'Candidate', + 'IRIDA syndrome', 'MONDO:0008788', 'Autosomal dominant', 'Full', '', '', 'SR-ES', '', '', '', '', '', '', '', + ]) self._set_metadata_file_iter(mock_subprocess, genetic_findings_table) url = reverse(import_gregor_metadata, args=[PM_REQUIRED_PROJECT_GUID]) @@ -1177,9 +1182,9 @@ def test_import_gregor_metadata(self, mock_subprocess): self.assertIsNone(comp_het_tag.metadata) self.assertDictEqual(json.loads(next(t for t in existing_variant_tags if t != comp_het_tag).metadata), { 'gene_known_for_phenotype': 'Candidate', - 'condition_id': 'MONDO:0008788', - 'known_condition_name': 'IRIDA syndrome', - 'condition_inheritance': 'Autosomal dominant', + 'condition_id': 'OMIM:616126', + 'known_condition_name': 'Immunodeficiency 38', + 'condition_inheritance': 'Autosomal recessive', }) self.assertDictEqual(json.loads(next(t for t in new_variant_tags if t != comp_het_tag).metadata), { 'gene_known_for_phenotype': 'Candidate', From 907cc21b07129450991cca3d45448e32f1d33f40 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 16 Aug 2024 14:18:58 -0400 Subject: [PATCH 611/736] fix validated name family metadata --- seqr/views/apis/report_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 3509ff2ea2..8839c48f59 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -849,7 +849,7 @@ def _add_row(row, family_id, row_type): family = families_by_id[family_id] if 'inheritance_models' not in family: family.update({'genes': set(), 'inheritance_models': set()}) - family['genes'].update({v.get(GENE_COLUMN) or v.get('sv_name') or v.get('gene_id') or '' for v in row}) + family['genes'].update({v.get(GENE_COLUMN) or v.get('validated_name') or v.get('sv_name') or v.get('gene_id') or '' for v in row}) family['inheritance_models'].update({v['variant_inheritance'] for v in row}) parse_anvil_metadata( From 4e6e421f83d6ba0da150665679740e8e9d329e8c Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 16 Aug 2024 14:25:26 -0400 Subject: [PATCH 612/736] update variant metadata tests --- seqr/views/apis/report_api_tests.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index 7b44c4a0f3..9fa87646cd 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -498,9 +498,11 @@ 'additional_family_members_with_variant': '', 'allele_balance_or_heteroplasmy_percentage': None, 'analysisStatus': 'Q', + 'chrom_end': None, 'clinvar': None, 'condition_id': None, - 'end': None, + 'copy_number': None, + 'pos_end': None, 'hgvsc': '', 'hgvsp': '', 'method_of_discovery': 'SR-ES', @@ -508,9 +510,10 @@ 'phenotype_contribution': 'Full', 'partial_contribution_explained': '', 'seqr_chosen_consequence': None, - 'svType': None, + 'sv_type': None, 'sv_name': None, 'transcript': None, + 'validated_name': None, } PARTICIPANT_TABLE = [ @@ -1323,7 +1326,6 @@ def test_variant_metadata(self): 'condition_id': 'MONDO:0044970', 'condition_inheritance': 'Unknown', 'displayName': '2', - 'end': 1912634, 'familyGuid': 'F000002_2', 'family_id': '2', 'gene_of_interest': 'OR4G11P', @@ -1350,7 +1352,7 @@ def test_variant_metadata(self): self.assertEqual(response.status_code, 200) response_json = response.json() self.assertListEqual(list(response_json.keys()), ['rows']) - row_ids += ['NA20889_1_248367227', 'NA20889_1_249045487'] + row_ids += ['NA20889_1_248367227', 'NA20889_1_249045487_DEL'] self.assertListEqual([r['genetic_findings_id'] for r in response_json['rows']], row_ids) self.assertDictEqual(response_json['rows'][1], expected_row) self.assertDictEqual(response_json['rows'][2], expected_mnv) @@ -1361,7 +1363,8 @@ def test_variant_metadata(self): 'chrom': '1', 'ClinGen_allele_ID': 'CA1501729', 'clinvar': {'alleleId': None, 'clinicalSignificance': '', 'goldStars': None, 'variationId': None}, - 'condition_id': 'MONDO:0008788', + 'condition_id': 'OMIM:616126', + 'condition_inheritance': 'Autosomal recessive', 'displayName': '12', 'familyGuid': 'F000012_12', 'family_id': '12', @@ -1369,6 +1372,7 @@ def test_variant_metadata(self): 'gene_id': 'ENSG00000240361', 'gene_known_for_phenotype': 'Candidate', 'genetic_findings_id': 'NA20889_1_248367227', + 'known_condition_name': 'Immunodeficiency 38', 'hgvsc': 'c.3955G>A', 'hgvsp': 'c.1586-17C>G', 'participant_id': 'NA20889', @@ -1392,21 +1396,23 @@ def test_variant_metadata(self): 'condition_id': 'OMIM:616126', 'condition_inheritance': 'Autosomal recessive', 'known_condition_name': 'Immunodeficiency 38', + 'copy_number': 1, 'displayName': '12', - 'end': 249045898, + 'pos_end': 249045898, 'familyGuid': 'F000012_12', 'family_id': '12', 'gene_of_interest': None, 'gene_id': None, 'gene_known_for_phenotype': 'Candidate', - 'genetic_findings_id': 'NA20889_1_249045487', + 'genetic_findings_id': 'NA20889_1_249045487_DEL', 'participant_id': 'NA20889', 'pos': 249045487, 'projectGuid': 'R0003_test', 'internal_project_id': 'Test Reprocessed Project', 'ref': None, - 'svType': 'DEL', - 'sv_name': 'DEL:chr1:249045123-249045456', + 'sv_type': 'DEL', + 'sv_name': 'DEL:chr1:249045487-249045898', + 'validated_name': 'DEL:chr1:249045123-249045456', 'tags': ['Tier 1 - Novel gene and phenotype'], 'variant_inheritance': 'unknown', 'variant_reference_assembly': 'GRCh37', From 7fa1724467d04a94b2b5ad1819f92b92fbb50431 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 16 Aug 2024 14:38:20 -0400 Subject: [PATCH 613/736] update report ui --- ui/pages/SummaryData/components/IndividualMetadata.jsx | 4 ++-- ui/shared/utils/constants.js | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/ui/pages/SummaryData/components/IndividualMetadata.jsx b/ui/pages/SummaryData/components/IndividualMetadata.jsx index 91c7610390..431c8c6d9b 100644 --- a/ui/pages/SummaryData/components/IndividualMetadata.jsx +++ b/ui/pages/SummaryData/components/IndividualMetadata.jsx @@ -65,10 +65,10 @@ const getColumns = (data) => { const hasAirtable = data && data[0] && data[0][AIRTABLE_DBGAP_SUBMISSION_FIELD] return [...CORE_COLUMNS, ...(hasAirtable ? AIRTABLE_COLUMNS : [])].concat( ...[...Array(maxSavedVariants).keys()].map(i => VARIANT_METADATA_COLUMNS.map( - ({ name, format, fieldName, secondaryExportColumn, ...col }) => ({ + ({ name, format, secondaryExportColumn, ...col }) => ({ name: `${name}-${i + 1}`, secondaryExportColumn: secondaryExportColumn && `${secondaryExportColumn}-${i + 1}`, - format: format ? row => format({ [fieldName]: row[`${fieldName}-${i + 1}`] }) : null, + format: format ? row => format({ [name]: row[`${name}-${i + 1}`] }) : null, ...col, }), )), diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js index 289e33ece6..621472eba7 100644 --- a/ui/shared/utils/constants.js +++ b/ui/shared/utils/constants.js @@ -1888,7 +1888,8 @@ export const VARIANT_METADATA_COLUMNS = [ { name: 'variant_reference_assembly' }, { name: 'chrom' }, { name: 'pos' }, - { name: 'end' }, + { name: 'chrom_end' }, + { name: 'pos_end' }, { name: 'ref' }, { name: 'alt' }, { name: 'gene_of_interest', secondaryExportColumn: 'gene_id' }, @@ -1897,8 +1898,10 @@ export const VARIANT_METADATA_COLUMNS = [ { name: 'hgvsc' }, { name: 'hgvsp' }, { name: 'zygosity' }, + { name: 'copy_number' }, { name: 'sv_name' }, - { name: 'sv_type', fieldName: 'svType', format: ({ svType }) => SVTYPE_LOOKUP[svType] || svType }, + { name: 'validated_name' }, + { name: 'sv_type', format: ({ sv_type }) => SVTYPE_LOOKUP[sv_type] || sv_type }, // eslint-disable-line camelcase { name: 'variant_inheritance' }, { name: 'gene_known_for_phenotype' }, { name: 'phenotype_contribution' }, From ff39f4dbb30a88abdc740b66a056d16e78fb1e58 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 16 Aug 2024 14:44:25 -0400 Subject: [PATCH 614/736] fix individual metadata tests --- seqr/views/apis/summary_data_api_tests.py | 25 +++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/seqr/views/apis/summary_data_api_tests.py b/seqr/views/apis/summary_data_api_tests.py index 56e4bb5b75..200c25eab6 100644 --- a/seqr/views/apis/summary_data_api_tests.py +++ b/seqr/views/apis/summary_data_api_tests.py @@ -43,17 +43,21 @@ "variant_inheritance-1": "unknown", "variant_inheritance-2": "unknown", 'genetic_findings_id-1': 'NA20889_1_248367227', - 'genetic_findings_id-2': 'NA20889_1_249045487', + 'genetic_findings_id-2': 'NA20889_1_249045487_DEL', "hgvsc-1": "c.3955G>A", "date_data_generation": "2017-02-05", "zygosity-1": "Heterozygous", "zygosity-2": "Heterozygous", + 'copy_number-1': None, + 'copy_number-2': 1, "ref-1": "TC", - "svType-2": "DEL", - "sv_name-2": "DEL:chr1:249045123-249045456", + "sv_type-2": "DEL", + "sv_name-2": "DEL:chr1:249045487-249045898", + "validated_name-2": "DEL:chr1:249045123-249045456", "chrom-2": "1", + 'chrom_end-2': None, "pos-2": 249045487, - 'end-2': 249045898, + 'pos_end-2': 249045898, "maternal_id": "", "paternal_id": "", "maternal_guid": "", @@ -97,9 +101,11 @@ 'seqr_chosen_consequence-2': None, 'gene_of_interest-2': None, 'gene_id-2': None, - 'svType-1': None, + 'sv_type-1': None, 'sv_name-1': None, - 'end-1': None, + 'validated_name-1': None, + 'chrom_end-1': None, + 'pos_end-1': None, 'notes-1': '', 'notes-2': '', 'phenotype_contribution-1': 'Partial', @@ -155,8 +161,10 @@ 'phenotype_contribution-1': 'Full', 'partial_contribution_explained-1': '', 'pos-1': 248367227, - 'end-1': None, + 'chrom_end-1': None, + 'pos_end-1': None, 'ref-1': 'TC', + 'copy_number-1': None, 'zygosity-1': 'Heterozygous', 'variant_reference_assembly-1': 'GRCh38', 'gene_of_interest-1': None, @@ -165,8 +173,9 @@ 'hgvsp-1': '', 'notes-1': '', 'seqr_chosen_consequence-1': None, - 'svType-1': None, + 'sv_type-1': None, 'sv_name-1': None, + 'validated_name-1': None, 'transcript-1': None, 'analysis_groups': '', 'ClinGen_allele_ID-1': 'CA1501729', From bcaa4d94cf84c8aaccbb2764b9eee5919c31c7e1 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 16 Aug 2024 15:03:23 -0400 Subject: [PATCH 615/736] fix js tests --- .../components/IndividualMetadata.test.js | 31 +++++++++++-------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/ui/pages/SummaryData/components/IndividualMetadata.test.js b/ui/pages/SummaryData/components/IndividualMetadata.test.js index 44c5ad173d..f6abaaf0ac 100644 --- a/ui/pages/SummaryData/components/IndividualMetadata.test.js +++ b/ui/pages/SummaryData/components/IndividualMetadata.test.js @@ -20,25 +20,30 @@ const DATA = [ 'variant_inheritance-2': 'unknown', hpo_absent: '', 'genetic_findings_id-1': 'NA20889_1_248367227', - 'genetic_findings_id-2': 'NA20889_1_249045487', + 'genetic_findings_id-2': 'NA20889_1_249045487_DEL', 'hgvsc-1': 'c.3955G>A', date_data_generation: '2017-02-05', + 'copy_number-1': null, + 'copy_number-2': 1, 'zygosity-1': 'Heterozygous', 'zygosity-2': 'Heterozygous', 'ref-1': 'TC', - 'svType-2': 'Deletion', + 'sv_type-2': 'Deletion', 'sv_name-2': 'DEL:chr12:49045487-49045898', + 'validated_name-2': 'DEL:chr12:49045123-49045456', 'chrom-2': '12', + 'chrom_end-2': null, 'pos-2': '49045487', - 'end-2': '49045898', + 'pos_end-2': '49045898', maternal_id: '', paternal_id: '', maternal_guid: '', paternal_guid: '', 'hgvsp-1': 'c.1586-17C>G', internal_project_id: 'Test Reprocessed Project', + 'chrom_end-1': null, 'pos-1': 248367227, - 'end-1': null, + 'pos_end-1': null, data_type: 'WES', familyGuid: 'F000012_12', family_history: 'Yes', @@ -85,21 +90,21 @@ test('IndividualMetadata render and export', () => { 'hpo_present', 'hpo_absent', 'MME', 'pmid_id', 'condition_id', 'known_condition_name', 'condition_inheritance', 'disorders', 'phenotype_description', 'analysis_groups', 'analysis_status', 'solve_status', 'data_type', 'date_data_generation', 'filter_flags', 'consanguinity', 'family_history', 'genetic_findings_id-1', 'variant_reference_assembly-1', - 'chrom-1', 'pos-1', 'end-1', 'ref-1', 'alt-1', 'gene_of_interest-1', 'gene_id-1', 'seqr_chosen_consequence-1', 'transcript-1', - 'hgvsc-1', 'hgvsp-1', 'zygosity-1', 'sv_name-1', 'sv_type-1', 'variant_inheritance-1', 'gene_known_for_phenotype-1', + 'chrom-1', 'pos-1', 'chrom_end-1', 'pos_end-1', 'ref-1', 'alt-1', 'gene_of_interest-1', 'gene_id-1', 'seqr_chosen_consequence-1', 'transcript-1', + 'hgvsc-1', 'hgvsp-1', 'zygosity-1', 'copy_number-1', 'sv_name-1', 'validated_name-1', 'sv_type-1', 'variant_inheritance-1', 'gene_known_for_phenotype-1', 'phenotype_contribution-1', 'partial_contribution_explained-1', 'notes-1', 'ClinGen_allele_ID-1', - 'genetic_findings_id-2', 'variant_reference_assembly-2', 'chrom-2', 'pos-2', 'end-2', + 'genetic_findings_id-2', 'variant_reference_assembly-2', 'chrom-2', 'pos-2', 'chrom_end-2', 'pos_end-2', 'ref-2', 'alt-2', 'gene_of_interest-2', 'gene_id-2', 'seqr_chosen_consequence-2', 'transcript-2', 'hgvsc-2', 'hgvsp-2', - 'zygosity-2', 'sv_name-2', 'sv_type-2', 'variant_inheritance-2', 'gene_known_for_phenotype-2', + 'zygosity-2', 'copy_number-2', 'sv_name-2', 'validated_name-2', 'sv_type-2', 'variant_inheritance-2', 'gene_known_for_phenotype-2', 'phenotype_contribution-2', 'partial_contribution_explained-2', 'notes-2', 'ClinGen_allele_ID-2']) expect(exportConfig.processRow(DATA[0])).toEqual([ 'Test Reprocessed Project', 'R0003_test', '12', 'F000012_12', 'NA20889', 'I000017_na20889', '', '', '', '', 'Self', 'Female', 'Ashkenazi Jewish', 'Affected', 'HP:0011675 (Arrhythmia)|HP:0001509 ()', '', 'Yes', null, 'OMIM:616126', 'Immunodeficiency 38', 'Autosomal recessive', null, null, undefined, 'Waiting for data', 'Tier 1', - 'WES', '2017-02-05', '', undefined, 'Yes', 'NA20889_1_248367227', undefined, '1', 248367227, null, 'TC', 'T', - 'OR4G11P', 'ENSG00000240361', 'intron_variant', 'ENST00000505820', 'c.3955G>A', 'c.1586-17C>G', 'Heterozygous', undefined, undefined, - 'unknown', 'Candidate', 'Partial', 'HP:0000501|HP:0000365', undefined, 'CA1501729', 'NA20889_1_249045487', undefined, - '12', '49045487', '49045898', undefined, undefined, undefined, undefined, undefined, - undefined, undefined, undefined, 'Heterozygous', 'DEL:chr12:49045487-49045898', 'Deletion', + 'WES', '2017-02-05', '', undefined, 'Yes', 'NA20889_1_248367227', undefined, '1', 248367227, null, null, 'TC', 'T', + 'OR4G11P', 'ENSG00000240361', 'intron_variant', 'ENST00000505820', 'c.3955G>A', 'c.1586-17C>G', 'Heterozygous', null, undefined, undefined, undefined, + 'unknown', 'Candidate', 'Partial', 'HP:0000501|HP:0000365', undefined, 'CA1501729', 'NA20889_1_249045487_DEL', undefined, + '12', '49045487', null, '49045898', undefined, undefined, undefined, undefined, undefined, + undefined, undefined, undefined, 'Heterozygous', 1, 'DEL:chr12:49045487-49045898', 'DEL:chr12:49045123-49045456', 'Deletion', 'unknown', 'Candidate', 'Full', '', undefined, null]) }) From fef241a0a96a8d091c93105e5c0ba6affb9938bf Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 19 Aug 2024 12:48:05 -0400 Subject: [PATCH 616/736] correctly report hemizygous --- seqr/views/utils/anvil_metadata_utils.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index 635903c6e5..6381b0f5b8 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -286,13 +286,14 @@ def _get_sorted_search_samples(projects): HET = 'Heterozygous' HOM_ALT = 'Homozygous' +HEMI = 'Hemizygous' -def _get_genotype_zygosity(genotype): +def _get_genotype_zygosity(genotype, individual=None, variant=None): num_alt = genotype.get('numAlt') cn = genotype.get('cn') if num_alt == 2 or cn == 0 or (cn != None and cn > 3): - return HOM_ALT + return HEMI if (variant or {}).get('chrom') == 'X' and individual.sex == Individual.SEX_MALE else HOM_ALT if num_alt == 1 or cn == 1 or cn == 3: return HET return None @@ -468,7 +469,7 @@ def _get_genetic_findings_rows(rows: list[dict], individual: Individual, family_ for row in (rows or []): genotypes = row['genotypes'] individual_genotype = genotypes.get(individual.guid) or {} - zygosity = _get_genotype_zygosity(individual_genotype) + zygosity = _get_genotype_zygosity(individual_genotype, individual, row) copy_number = individual_genotype.get('cn') or -1 if zygosity: heteroplasmy = individual_genotype.get('hl') From a930887cee7feddfa72eb74853fce46d5c9c62f7 Mon Sep 17 00:00:00 2001 From: snyk-bot Date: Mon, 19 Aug 2024 16:54:04 +0000 Subject: [PATCH 617/736] fix: requirements.txt to reduce vulnerabilities The following vulnerabilities are fixed by pinning transitive dependencies: - https://snyk.io/vuln/SNYK-PYTHON-DJANGO-7435780 - https://snyk.io/vuln/SNYK-PYTHON-DJANGO-7436273 - https://snyk.io/vuln/SNYK-PYTHON-DJANGO-7436514 - https://snyk.io/vuln/SNYK-PYTHON-DJANGO-7436646 - https://snyk.io/vuln/SNYK-PYTHON-DJANGO-7642790 - https://snyk.io/vuln/SNYK-PYTHON-DJANGO-7642791 - https://snyk.io/vuln/SNYK-PYTHON-DJANGO-7642813 - https://snyk.io/vuln/SNYK-PYTHON-DJANGO-7642814 - https://snyk.io/vuln/SNYK-PYTHON-REQUESTS-6928867 - https://snyk.io/vuln/SNYK-PYTHON-SQLPARSE-6615674 - https://snyk.io/vuln/SNYK-PYTHON-ZIPP-7430899 --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index a3175ce89a..191441bf4e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -182,3 +182,4 @@ urllib3==1.26.19 # requests whitenoise==6.3.0 # via -r requirements.in +zipp>=3.19.1 # not directly required, pinned by Snyk to avoid a vulnerability From 5b06e2fb433d4a99a6f21ab82095d53b145c749e Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 19 Aug 2024 13:41:55 -0400 Subject: [PATCH 618/736] update v3 pedigree path --- seqr/views/apis/anvil_workspace_api_tests.py | 2 +- seqr/views/apis/data_manager_api_tests.py | 4 ++-- seqr/views/utils/airflow_utils.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/seqr/views/apis/anvil_workspace_api_tests.py b/seqr/views/apis/anvil_workspace_api_tests.py index ce044695b2..9f7048e1ae 100644 --- a/seqr/views/apis/anvil_workspace_api_tests.py +++ b/seqr/views/apis/anvil_workspace_api_tests.py @@ -748,7 +748,7 @@ def _assert_valid_operation(self, project, test_add_data=True): ) self.mock_gsutil.assert_called_with( - f'rsync -r {gs_path}', f'gs://seqr-loading-temp/v03/{genome_version}/WES/SNV_INDEL/pedigrees/', self.manager_user, + f'rsync -r {gs_path}', f'gs://seqr-loading-temp/v3.1/{genome_version}/SNV_INDEL/pedigrees/WES/', self.manager_user, ) self.assert_airflow_calls(additional_tasks_check=test_add_data) diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index 888adcdbf0..68fbcf59d2 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -1662,7 +1662,7 @@ def test_load_data(self, mock_temp_dir, mock_open): self.assertRegex(error, '400 Client Error: Bad Request') dag_json = dag_json.replace('mito_callset.mt', 'sv_callset.vcf').replace( - 'WGS', 'WES').replace('MITO', 'GCNV').replace('v01', 'v03') + 'WGS', 'WES').replace('MITO', 'GCNV').replace('v01', 'v3.1') error_message = f"""ERROR triggering internal WES SV loading: {errors[0]} DAG LOADING_PIPELINE should be triggered with following: @@ -1761,7 +1761,7 @@ def _has_expected_gs_calls(self, mock_open, dataset_type, sample_type='WGS', has ) for project in self.PROJECTS ] + [ mock.call( - f'gsutil rsync -r gs://seqr-datasets/v02/GRCh38/RDG_{sample_type}_Broad_Internal/base/projects/{project}/ gs://seqr-loading-temp/v03/GRCh38/{sample_type}/{dataset_type}/pedigrees/', + f'gsutil rsync -r gs://seqr-datasets/v02/GRCh38/RDG_{sample_type}_Broad_Internal/base/projects/{project}/ gs://seqr-loading-temp/v3.1/GRCh38/{dataset_type}/pedigrees/{sample_type}/', stdout=-1, stderr=-2, shell=True, # nosec ) for project in self.PROJECTS ], any_order=True) diff --git a/seqr/views/utils/airflow_utils.py b/seqr/views/utils/airflow_utils.py index 0be413da2a..c8a84de65a 100644 --- a/seqr/views/utils/airflow_utils.py +++ b/seqr/views/utils/airflow_utils.py @@ -19,7 +19,7 @@ DAG_NAME = 'LOADING_PIPELINE' AIRFLOW_AUTH_SCOPE = "https://www.googleapis.com/auth/cloud-platform" SEQR_V2_DATASETS_GS_PATH = 'gs://seqr-datasets/v02' -SEQR_V3_PEDIGREE_GS_PATH = 'gs://seqr-loading-temp/v03' +SEQR_V3_PEDIGREE_GS_PATH = 'gs://seqr-loading-temp/v3.1' class DagRunningException(Exception): @@ -161,7 +161,7 @@ def _get_dag_project_gs_path(project: str, genome_version: str, sample_type: str def _get_gs_pedigree_path(genome_version: str, sample_type: str, dataset_type: str): - return f'{SEQR_V3_PEDIGREE_GS_PATH}/{GENOME_VERSION_LOOKUP[genome_version]}/{sample_type}/{dataset_type}/pedigrees/' + return f'{SEQR_V3_PEDIGREE_GS_PATH}/{GENOME_VERSION_LOOKUP[genome_version]}/{dataset_type}/pedigrees/{sample_type}/' def _wait_for_dag_variable_update(projects): From 8274b9eca471d5e0d901b4f5e7064702576c18c7 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 19 Aug 2024 13:59:43 -0400 Subject: [PATCH 619/736] use helper method for clustering --- hail_search/queries/base.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index c511eac29f..5ba5b52a3e 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -647,18 +647,7 @@ def _parse_intervals(self, intervals, gene_ids=None, **kwargs): intervals = [[f'chr{interval[0]}', *interval[1:]] for interval in (intervals or [])] if len(intervals) > MAX_GENE_INTERVALS and len(intervals) == len(gene_ids or []): - intervals = sorted(intervals) - distance = 100000 - while len(intervals) > MAX_GENE_INTERVALS: - merged_intervals = [intervals[0]] - for chrom, start, end in intervals[1:]: - prev_chrom, prev_start, prev_end = merged_intervals[-1] - if chrom == prev_chrom and start - prev_end < distance: - merged_intervals[-1] = (chrom, prev_start, max(prev_end, end)) - else: - merged_intervals.append((chrom, start, end)) - intervals = merged_intervals - distance += 100000 + intervals = self.cluster_intervals(sorted(intervals)) parsed_intervals = [ hl.eval(hl.locus_interval(*interval, reference_genome=self.GENOME_VERSION, invalid_missing=True)) @@ -677,6 +666,21 @@ def _parse_intervals(self, intervals, gene_ids=None, **kwargs): return parsed_intervals + @classmethod + def cluster_intervals(cls, intervals, distance=100000, max_intervals=MAX_GENE_INTERVALS): + if len(intervals) <= max_intervals: + return intervals + + merged_intervals = [intervals[0]] + for chrom, start, end in intervals[1:]: + prev_chrom, prev_start, prev_end = merged_intervals[-1] + if chrom == prev_chrom and start - prev_end < distance: + merged_intervals[-1] = (chrom, prev_start, max(prev_end, end)) + else: + merged_intervals.append((chrom, start, end)) + + return cls.cluster_intervals(merged_intervals, distance=distance+100000, max_intervals=max_intervals) + def _should_add_chr_prefix(self): return self.GENOME_VERSION == GENOME_VERSION_GRCh38 From ecf97112b3b69b0c2b0ac75c762432c73eef5a45 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 19 Aug 2024 14:15:20 -0400 Subject: [PATCH 620/736] test clustering --- hail_search/queries/base.py | 4 ++-- hail_search/test_search.py | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 5ba5b52a3e..5f3a2c195e 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -675,9 +675,9 @@ def cluster_intervals(cls, intervals, distance=100000, max_intervals=MAX_GENE_IN for chrom, start, end in intervals[1:]: prev_chrom, prev_start, prev_end = merged_intervals[-1] if chrom == prev_chrom and start - prev_end < distance: - merged_intervals[-1] = (chrom, prev_start, max(prev_end, end)) + merged_intervals[-1] = [chrom, prev_start, max(prev_end, end)] else: - merged_intervals.append((chrom, start, end)) + merged_intervals.append([chrom, start, end]) return cls.cluster_intervals(merged_intervals, distance=distance+100000, max_intervals=max_intervals) diff --git a/hail_search/test_search.py b/hail_search/test_search.py index cabff967a9..8d8c720605 100644 --- a/hail_search/test_search.py +++ b/hail_search/test_search.py @@ -12,6 +12,7 @@ FAMILY_2_MITO_SAMPLE_DATA, FAMILY_2_ALL_SAMPLE_DATA, MITO_VARIANT1, MITO_VARIANT2, MITO_VARIANT3, \ EXPECTED_SAMPLE_DATA_WITH_SEX, SV_WGS_SAMPLE_DATA_WITH_SEX, VARIANT_LOOKUP_VARIANT from hail_search.web_app import init_web_app, sync_to_async_hail_query +from hail_search.queries.base import BaseHailTableQuery PROJECT_2_VARIANT = { 'variantId': '1-10146-ACC-A', @@ -581,6 +582,26 @@ async def test_location_search(self): gene_ids=['ENSG00000171621'], ) + async def test_cluster_intervals(self): + intervals = [ + ['1', 11785723, 11806455], ['1', 91500851, 91525764], ['2', 1234, 5678], ['2', 12345, 67890], + ['7', 1, 11100], ['7', 202020, 20202020], + ] + + self.assertListEqual(BaseHailTableQuery.cluster_intervals(intervals, max_intervals=5), [ + ['1', 11785723, 11806455], ['1', 91500851, 91525764], ['2', 1234, 67890], + ['7', 1, 11100], ['7', 202020, 20202020], + ]) + + self.assertListEqual(BaseHailTableQuery.cluster_intervals(intervals, max_intervals=4), [ + ['1', 11785723, 11806455], ['1', 91500851, 91525764], ['2', 1234, 67890], ['7', 1, 20202020], + ]) + + self.assertListEqual(BaseHailTableQuery.cluster_intervals(intervals, max_intervals=3), [ + ['1', 11785723, 91525764], ['2', 1234, 67890], ['7', 1, 20202020], + ]) + + async def test_variant_id_search(self): await self._assert_expected_search([VARIANT2], omit_data_type='SV_WES', **RSID_SEARCH) From faff59d6075cd0235fdce1c449e25e6e4e587f4a Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 19 Aug 2024 15:21:59 -0400 Subject: [PATCH 621/736] edit igv button --- .../components/FamilyTable/IndividualRow.jsx | 42 +++++++++++++++---- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/ui/pages/Project/components/FamilyTable/IndividualRow.jsx b/ui/pages/Project/components/FamilyTable/IndividualRow.jsx index bf30caffb5..990967d63c 100644 --- a/ui/pages/Project/components/FamilyTable/IndividualRow.jsx +++ b/ui/pages/Project/components/FamilyTable/IndividualRow.jsx @@ -29,7 +29,7 @@ import { import { snakecaseToTitlecase } from 'shared/utils/stringUtils' import { updateIndividual } from 'redux/rootReducer' -import { getSamplesByGuid, getMmeSubmissionsByGuid } from 'redux/selectors' +import { getSamplesByGuid, getMmeSubmissionsByGuid, getIGVSamplesByFamilySampleIndividual } from 'redux/selectors' import { HPO_FORM_FIELDS } from '../HpoTerms' import { CASE_REVIEW_STATUS_MORE_INFO_NEEDED, CASE_REVIEW_STATUS_OPTIONS, CASE_REVIEW_TABLE_NAME, INDIVIDUAL_DETAIL_FIELDS, @@ -500,6 +500,28 @@ const EDIT_INDIVIDUAL_FIELDS = [INDIVIDUAL_FIELD_SEX, INDIVIDUAL_FIELD_AFFECTED] { ...field, component: connect(mapParentOptionsStateToProps)(Select), inline: true, width: 8 } ))) +const EDIT_IGV_FIELDS = [ + { name: 'filePath', label: 'File' }, +] + +const EditIndividualButton = ({ project, displayName, fieldName, ...props }) => ( + +) + +EditIndividualButton.propTypes = { + project: PropTypes.object.isRequired, + displayName: PropTypes.string, + fieldName: PropTypes.string, +} + class IndividualRow extends React.PureComponent { static propTypes = { @@ -551,17 +573,21 @@ class IndividualRow extends React.PureComponent { {`ADDED ${new Date(createdDate).toLocaleDateString().toUpperCase()}`}
- +
From b319ade59e40b777b3845358c64dfa450884d940 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 19 Aug 2024 15:44:46 -0400 Subject: [PATCH 622/736] correct form interactions --- seqr/views/apis/igv_api.py | 1 + .../components/FamilyTable/IndividualRow.jsx | 21 ++++++++++++++----- ui/pages/Project/reducers.js | 15 +++++++++---- 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/seqr/views/apis/igv_api.py b/seqr/views/apis/igv_api.py index e74e26856a..531ebc7408 100644 --- a/seqr/views/apis/igv_api.py +++ b/seqr/views/apis/igv_api.py @@ -150,6 +150,7 @@ def _get_valid_matched_individuals(individual_dataset_mapping): @pm_or_data_manager_required +# TODO allow access def update_individual_igv_sample(request, individual_guid): individual = Individual.objects.get(guid=individual_guid) project = individual.family.project diff --git a/ui/pages/Project/components/FamilyTable/IndividualRow.jsx b/ui/pages/Project/components/FamilyTable/IndividualRow.jsx index 990967d63c..33b41d6fe5 100644 --- a/ui/pages/Project/components/FamilyTable/IndividualRow.jsx +++ b/ui/pages/Project/components/FamilyTable/IndividualRow.jsx @@ -8,6 +8,7 @@ import { Label, Popup, Form, Input, Loader } from 'semantic-ui-react' import orderBy from 'lodash/orderBy' import { SearchInput, YearSelector, RadioButtonGroup, ButtonRadioGroup, Select } from 'shared/components/form/Inputs' +import { validators } from 'shared/components/form/FormHelpers' import PedigreeIcon from 'shared/components/icons/PedigreeIcon' import Modal from 'shared/components/modal/Modal' import { AwesomeBarFormInput } from 'shared/components/page/AwesomeBar' @@ -35,7 +36,7 @@ import { CASE_REVIEW_STATUS_MORE_INFO_NEEDED, CASE_REVIEW_STATUS_OPTIONS, CASE_REVIEW_TABLE_NAME, INDIVIDUAL_DETAIL_FIELDS, ONSET_AGE_OPTIONS, INHERITANCE_MODE_OPTIONS, INHERITANCE_MODE_LOOKUP, AR_FIELDS, } from '../../constants' -import { updateIndividuals } from '../../reducers' +import { updateIndividuals, updateIndividualIGV } from '../../reducers' import { getCurrentProject, getParentOptionsByIndividual } from '../../selectors' import CaseReviewStatusDropdown from './CaseReviewStatusDropdown' @@ -500,8 +501,9 @@ const EDIT_INDIVIDUAL_FIELDS = [INDIVIDUAL_FIELD_SEX, INDIVIDUAL_FIELD_AFFECTED] { ...field, component: connect(mapParentOptionsStateToProps)(Select), inline: true, width: 8 } ))) +// TODO dropdown with valid options const EDIT_IGV_FIELDS = [ - { name: 'filePath', label: 'File' }, + { name: 'filePath', label: 'IGV File Path', validate: validators.required }, ] const EditIndividualButton = ({ project, displayName, fieldName, ...props }) => ( @@ -529,7 +531,9 @@ class IndividualRow extends React.PureComponent { individual: PropTypes.object.isRequired, mmeSubmission: PropTypes.object, samplesByGuid: PropTypes.object.isRequired, + alignmentSample: PropTypes.object, dispatchUpdateIndividual: PropTypes.func, + dispatchUpdateIndividualIGV: PropTypes.func, updateIndividualPedigree: PropTypes.func, tableName: PropTypes.string, } @@ -552,7 +556,10 @@ class IndividualRow extends React.PureComponent { } render() { - const { project, individual, mmeSubmission, samplesByGuid, tableName, updateIndividualPedigree } = this.props + const { + project, individual, mmeSubmission, samplesByGuid, tableName, updateIndividualPedigree, alignmentSample, + dispatchUpdateIndividualIGV, + } = this.props const { displayName, sex, affected, createdDate, sampleGuids } = individual let loadedSamples = sampleGuids.map( @@ -584,11 +591,11 @@ class IndividualRow extends React.PureComponent { />
) @@ -623,10 +630,14 @@ const mapStateToProps = (state, ownProps) => ({ project: getCurrentProject(state), samplesByGuid: getSamplesByGuid(state), mmeSubmission: getMmeSubmissionsByGuid(state)[ownProps.individual.mmeSubmissionGuid], + alignmentSample: ( + getIGVSamplesByFamilySampleIndividual(state)[ownProps.individual.familyGuid]?.alignment || {} + )[ownProps.individual.individualGuid], }) const mapDispatchToProps = { dispatchUpdateIndividual: updateIndividual, + dispatchUpdateIndividualIGV: values => updateIndividualIGV(values), updateIndividualPedigree: values => updateIndividuals({ individuals: [values], delete: values.delete }), } diff --git a/ui/pages/Project/reducers.js b/ui/pages/Project/reducers.js index 4bba0918ca..d77a5a923c 100644 --- a/ui/pages/Project/reducers.js +++ b/ui/pages/Project/reducers.js @@ -198,15 +198,22 @@ export const addVariantsDataset = values => (dispatch, getState) => new HttpRequ }, ).post(values) +export const updateIndividualIGV = (values, onError) => dispatch => ( + new HttpRequestHelper( + `/api/individual/${values.individualGuid}/update_igv_sample`, + responseJson => dispatch({ type: RECEIVE_DATA, updatesById: responseJson }), + onError, + ).post(values) +) + export const addIGVDataset = ({ mappingFile, ...values }) => (dispatch) => { const errors = [] return Promise.all(mappingFile.updates.map( - ({ individualGuid, individualId, ...update }) => new HttpRequestHelper( - `/api/individual/${individualGuid}/update_igv_sample`, - responseJson => dispatch({ type: RECEIVE_DATA, updatesById: responseJson }), + ({ individualGuid, individualId, ...update }) => updateIndividualIGV( + { individualGuid, ...update, ...values }, e => errors.push(`Error updating ${individualId}: ${e.body && e.body.error ? e.body.error : e.message}`), - ).post({ ...update, ...values }), + )(dispatch), )).then(() => { if (errors.length) { const err = new Error() From b4eeef3f5e152749c3b1e0ebe59d54bc6687921f Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 19 Aug 2024 16:11:30 -0400 Subject: [PATCH 623/736] grant access to external anvil projects --- seqr/views/apis/igv_api.py | 20 ++++++++++++-------- seqr/views/utils/permissions_utils.py | 4 ++-- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/seqr/views/apis/igv_api.py b/seqr/views/apis/igv_api.py index 531ebc7408..c53354a447 100644 --- a/seqr/views/apis/igv_api.py +++ b/seqr/views/apis/igv_api.py @@ -3,6 +3,7 @@ import re import requests +from django.core.exceptions import PermissionDenied from django.http import StreamingHttpResponse, HttpResponse from seqr.models import Individual, IgvSample @@ -12,8 +13,9 @@ from seqr.views.utils.json_to_orm_utils import get_or_create_model_from_json from seqr.views.utils.json_utils import create_json_response from seqr.views.utils.orm_to_json_utils import get_json_for_sample -from seqr.views.utils.permissions_utils import get_project_and_check_permissions, check_project_permissions, \ - login_and_policies_required, pm_or_data_manager_required, get_project_guids_user_can_view +from seqr.views.utils.permissions_utils import get_project_and_check_permissions, external_anvil_project_can_edit, \ + login_and_policies_required, pm_or_data_manager_required, get_project_guids_user_can_view, user_is_data_manager, \ + user_is_pm GS_STORAGE_ACCESS_CACHE_KEY = 'gs_storage_access_cache_entry' GS_STORAGE_URL = 'https://storage.googleapis.com' @@ -149,12 +151,14 @@ def _get_valid_matched_individuals(individual_dataset_mapping): GCNV_FILE_EXTENSIONS = tuple(ext for ext, sample_type in SAMPLE_TYPE_MAP if sample_type == IgvSample.SAMPLE_TYPE_GCNV) -@pm_or_data_manager_required -# TODO allow access +@login_and_policies_required def update_individual_igv_sample(request, individual_guid): individual = Individual.objects.get(guid=individual_guid) project = individual.family.project - check_project_permissions(project, request.user, can_edit=True) + user = request.user + + if not (user_is_pm(user) or user_is_data_manager(user) or external_anvil_project_can_edit(project, user)): + raise PermissionDenied(f'{user} does not have sufficient permissions for {project}') request_json = json.loads(request.body) @@ -167,9 +171,9 @@ def update_individual_igv_sample(request, individual_guid): if not sample_type: raise Exception('Invalid file extension for "{}" - valid extensions are {}'.format( file_path, ', '.join([suffix for suffix, _ in SAMPLE_TYPE_MAP]))) - if not does_file_exist(file_path, user=request.user): + if not does_file_exist(file_path, user=user): raise Exception('Error accessing "{}"'.format(file_path)) - if request_json.get('indexFilePath') and not does_file_exist(request_json['indexFilePath'], user=request.user): + if request_json.get('indexFilePath') and not does_file_exist(request_json['indexFilePath'], user=user): raise Exception('Error accessing "{}"'.format(request_json['indexFilePath'])) sample, created = get_or_create_model_from_json( @@ -177,7 +181,7 @@ def update_individual_igv_sample(request, individual_guid): update_json={ 'file_path': file_path, **{field: request_json.get(field) for field in ['sampleId', 'indexFilePath']} - }, user=request.user) + }, user=user) response = { 'igvSamplesByGuid': { diff --git a/seqr/views/utils/permissions_utils.py b/seqr/views/utils/permissions_utils.py index b440915052..e0bf9b8cbb 100644 --- a/seqr/views/utils/permissions_utils.py +++ b/seqr/views/utils/permissions_utils.py @@ -123,7 +123,7 @@ def get_project_and_check_permissions(project_guid, user, **kwargs): return _get_project_and_check_permissions(project_guid, user, check_project_permissions, **kwargs) def get_project_and_check_pm_permissions(project_guid, user, override_permission_func=None): - return _get_project_and_check_permissions(project_guid, user, _check_project_pm_permission, + return _get_project_and_check_permissions(project_guid, user, check_project_pm_permission, override_permission_func=override_permission_func) def _get_project_and_check_permissions(project_guid, user, _check_permission_func, **kwargs): @@ -131,7 +131,7 @@ def _get_project_and_check_permissions(project_guid, user, _check_permission_fun _check_permission_func(project, user, **kwargs) return project -def _check_project_pm_permission(project, user, override_permission_func=None, **kwargs): +def check_project_pm_permission(project, user, override_permission_func=None, **kwargs): if user_is_pm(user) or (project.has_case_review and has_project_permissions(project, user, can_edit=True)): return From 4e7daaac4e946b1b3572589aa720de550d4e0adc Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 20 Aug 2024 11:33:42 -0400 Subject: [PATCH 624/736] update unit tests --- seqr/views/apis/igv_api_tests.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/seqr/views/apis/igv_api_tests.py b/seqr/views/apis/igv_api_tests.py index 2080f655cb..d01138c303 100644 --- a/seqr/views/apis/igv_api_tests.py +++ b/seqr/views/apis/igv_api_tests.py @@ -8,14 +8,14 @@ from seqr.views.apis.igv_api import fetch_igv_track, receive_igv_table_handler, update_individual_igv_sample, \ igv_genomes_proxy, receive_bulk_igv_table_handler from seqr.views.apis.igv_api import GS_STORAGE_ACCESS_CACHE_KEY -from seqr.views.utils.test_utils import AuthenticationTestCase +from seqr.views.utils.test_utils import AnvilAuthenticationTestCase STREAMING_READS_CONTENT = [b'CRAM\x03\x83', b'\\\t\xfb\xa3\xf7%\x01', b'[\xfc\xc9\t\xae'] PROJECT_GUID = 'R0001_1kg' @mock.patch('seqr.views.utils.permissions_utils.PM_USER_GROUP', 'project-managers') -class IgvAPITest(AuthenticationTestCase): +class IgvAPITest(AnvilAuthenticationTestCase): fixtures = ['users', '1kg_project'] @responses.activate @@ -292,6 +292,20 @@ def test_add_alignment_sample(self, mock_local_file_exists, mock_subprocess): })) self.assertEqual(response.status_code, 200) + # Test External AnVIL projects + ext_anvil_edit_url = reverse(update_individual_igv_sample, args=['I000019_na21987']) + self.login_collaborator() + response = self.client.post(ext_anvil_edit_url, content_type='application/json', data=json.dumps({ + 'filePath': '/readviz/NA21987.cram', + })) + self.assertEqual(response.status_code, 403) + + self.login_manager() + response = self.client.post(ext_anvil_edit_url, content_type='application/json', data=json.dumps({ + 'filePath': '/readviz/NA21987.cram', + })) + self.assertEqual(response.status_code, 200) + @responses.activate def test_igv_genomes_proxy(self): url_path = 'igv.org.genomes/foo?query=true' From e66040b4db17f7f717dbc6d412bb28b62fa0bf91 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 20 Aug 2024 12:01:28 -0400 Subject: [PATCH 625/736] firat pass laodable vcf select --- .../components/FamilyTable/IndividualRow.jsx | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/ui/pages/Project/components/FamilyTable/IndividualRow.jsx b/ui/pages/Project/components/FamilyTable/IndividualRow.jsx index 33b41d6fe5..2fd420bb70 100644 --- a/ui/pages/Project/components/FamilyTable/IndividualRow.jsx +++ b/ui/pages/Project/components/FamilyTable/IndividualRow.jsx @@ -9,6 +9,7 @@ import orderBy from 'lodash/orderBy' import { SearchInput, YearSelector, RadioButtonGroup, ButtonRadioGroup, Select } from 'shared/components/form/Inputs' import { validators } from 'shared/components/form/FormHelpers' +import LoadOptionsSelect from 'shared/components/form/LoadOptionsSelect' import PedigreeIcon from 'shared/components/icons/PedigreeIcon' import Modal from 'shared/components/modal/Modal' import { AwesomeBarFormInput } from 'shared/components/page/AwesomeBar' @@ -501,9 +502,21 @@ const EDIT_INDIVIDUAL_FIELDS = [INDIVIDUAL_FIELD_SEX, INDIVIDUAL_FIELD_AFFECTED] { ...field, component: connect(mapParentOptionsStateToProps)(Select), inline: true, width: 8 } ))) -// TODO dropdown with valid options +const mapIgvOptionsStateToProps = state => ({ + url: `/api/project/${getCurrentProject(state).projectGuid}/get_igv_options`, +}) + const EDIT_IGV_FIELDS = [ - { name: 'filePath', label: 'IGV File Path', validate: validators.required }, + { + name: 'filePath', + label: 'IGV File Path', + component: connect(mapIgvOptionsStateToProps)(LoadOptionsSelect), + optionsResponseKey: 'groups', + errorHeader: 'Unable to Load IGV Files', + validationErrorHeader: 'No User Groups Available', + validationErrorMessage: 'Contact your system administrator to have them configure user groups', + validate: validators.required, + }, ] const EditIndividualButton = ({ project, displayName, fieldName, ...props }) => ( From df7ed2eb8902b5be7347866b805d1bb73c75d548 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 20 Aug 2024 12:32:14 -0400 Subject: [PATCH 626/736] load igv files from workspace --- seqr/models.py | 6 +++++ seqr/urls.py | 3 ++- seqr/views/apis/anvil_workspace_api.py | 23 +++++++++++++++---- seqr/views/apis/igv_api.py | 17 +++----------- .../components/FamilyTable/IndividualRow.jsx | 16 ++++++++----- 5 files changed, 40 insertions(+), 25 deletions(-) diff --git a/seqr/models.py b/seqr/models.py index 852a83e9ba..e6675b9949 100644 --- a/seqr/models.py +++ b/seqr/models.py @@ -780,6 +780,12 @@ class IgvSample(ModelWithGUID): (SAMPLE_TYPE_JUNCTION, 'RNAseq Junction'), (SAMPLE_TYPE_GCNV, 'gCNV'), ) + SAMPLE_TYPE_FILE_EXTENSIONS = { + SAMPLE_TYPE_ALIGNMENT: ('bam', 'cram'), + SAMPLE_TYPE_COVERAGE: ('bigWig',), + SAMPLE_TYPE_JUNCTION: ('junctions.bed.gz',), + SAMPLE_TYPE_GCNV: ('bed.gz',), + } individual = models.ForeignKey('Individual', on_delete=models.PROTECT) sample_type = models.CharField(max_length=15, choices=SAMPLE_TYPE_CHOICES) diff --git a/seqr/urls.py b/seqr/urls.py index d49755881b..a56da19889 100644 --- a/seqr/urls.py +++ b/seqr/urls.py @@ -145,7 +145,7 @@ project_samples, project_notifications, mark_read_project_notifications, subscribe_project_notifications from seqr.views.apis.project_categories_api import update_project_categories_handler from seqr.views.apis.anvil_workspace_api import anvil_workspace_page, create_project_from_workspace, \ - grant_workspace_access, validate_anvil_vcf, add_workspace_data, get_anvil_vcf_list + grant_workspace_access, validate_anvil_vcf, add_workspace_data, get_anvil_vcf_list, get_anvil_igv_options from matchmaker.views import external_api from seqr.views.utils.file_utils import save_temp_file from seqr.views.apis.feature_updates_api import get_feature_updates @@ -351,6 +351,7 @@ 'create_project_from_workspace/(?P[^/]+)/(?P[^/]+)/validate_vcf': validate_anvil_vcf, 'create_project_from_workspace/(?P[^/]+)/(?P[^/]+)/submit': create_project_from_workspace, 'create_project_from_workspace/(?P[^/]+)/(?P[^/]+)/get_vcf_list': get_anvil_vcf_list, + 'anvil_workspace/(?P[^/]+)/(?P[^/]+)/get_igv_options': get_anvil_igv_options, 'feature_updates': get_feature_updates, diff --git a/seqr/views/apis/anvil_workspace_api.py b/seqr/views/apis/anvil_workspace_api.py index 5cf6af9036..7d5caf9b39 100644 --- a/seqr/views/apis/anvil_workspace_api.py +++ b/seqr/views/apis/anvil_workspace_api.py @@ -109,17 +109,32 @@ def grant_workspace_access(request, namespace, name): return create_json_response({'success': True}) -@anvil_workspace_access_required(meta_fields=['workspace.bucketName']) -def get_anvil_vcf_list(request, namespace, name, workspace_meta): +def _get_workspace_files(request, namespace, name, workspace_meta): bucket_name = workspace_meta['workspace']['bucketName'] bucket_path = 'gs://{bucket}'.format(bucket=bucket_name.rstrip('/')) - data_path_list = [path.replace(bucket_path, '') for path in get_gs_file_list(bucket_path, request.user) - if path.endswith(VCF_FILE_EXTENSIONS)] + return bucket_path, get_gs_file_list(bucket_path, request.user) + + +@anvil_workspace_access_required(meta_fields=['workspace.bucketName']) +def get_anvil_vcf_list(*args): + bucket_path, file_list = _get_workspace_files(*args) + data_path_list = [path.replace(bucket_path, '') for path in file_list if path.endswith(VCF_FILE_EXTENSIONS)] data_path_list = _merge_sharded_vcf(data_path_list) return create_json_response({'dataPathList': data_path_list}) +@anvil_workspace_access_required(meta_fields=['workspace.bucketName']) +def get_anvil_igv_options(*args): + bucket_path, file_list = _get_workspace_files(*args) + igv_options = [ + {'name': path.replace(bucket_path, ''), 'value': path} for path in file_list + if path.endswith(IgvSample.SAMPLE_TYPE_FILE_EXTENSIONS[IgvSample.SAMPLE_TYPE_ALIGNMENT]) + ] + + return create_json_response({'igv_options': igv_options}) + + @anvil_workspace_access_required(meta_fields=['workspace.bucketName']) def validate_anvil_vcf(request, namespace, name, workspace_meta): body = json.loads(request.body) diff --git a/seqr/views/apis/igv_api.py b/seqr/views/apis/igv_api.py index c53354a447..0ef01440e4 100644 --- a/seqr/views/apis/igv_api.py +++ b/seqr/views/apis/igv_api.py @@ -38,7 +38,7 @@ def _process_alignment_records(rows, num_id_cols=1, **kwargs): sample_id = None index_file_path = None if len(row) > num_cols: - if file_path.endswith(GCNV_FILE_EXTENSIONS): + if file_path.endswith(IgvSample.SAMPLE_TYPE_FILE_EXTENSIONS[IgvSample.SAMPLE_TYPE_GCNV]): sample_id = row[num_cols] else: index_file_path = row[num_cols] @@ -140,17 +140,6 @@ def _get_valid_matched_individuals(individual_dataset_mapping): return _process_igv_table_handler(_parse_uploaded_file, _get_valid_matched_individuals) -SAMPLE_TYPE_MAP = [ - ('bam', IgvSample.SAMPLE_TYPE_ALIGNMENT), - ('cram', IgvSample.SAMPLE_TYPE_ALIGNMENT), - ('bigWig', IgvSample.SAMPLE_TYPE_COVERAGE), - ('junctions.bed.gz', IgvSample.SAMPLE_TYPE_JUNCTION), - ('bed.gz', IgvSample.SAMPLE_TYPE_GCNV), -] - -GCNV_FILE_EXTENSIONS = tuple(ext for ext, sample_type in SAMPLE_TYPE_MAP if sample_type == IgvSample.SAMPLE_TYPE_GCNV) - - @login_and_policies_required def update_individual_igv_sample(request, individual_guid): individual = Individual.objects.get(guid=individual_guid) @@ -167,10 +156,10 @@ def update_individual_igv_sample(request, individual_guid): if not file_path: raise ValueError('request must contain fields: filePath') - sample_type = next((st for suffix, st in SAMPLE_TYPE_MAP if file_path.endswith(suffix)), None) + sample_type = next((st for st, suffixes in IgvSample.SAMPLE_TYPE_FILE_EXTENSIONS.items() if file_path.endswith(suffixes)), None) if not sample_type: raise Exception('Invalid file extension for "{}" - valid extensions are {}'.format( - file_path, ', '.join([suffix for suffix, _ in SAMPLE_TYPE_MAP]))) + file_path, ', '.join([suffix for suffixes in IgvSample.SAMPLE_TYPE_FILE_EXTENSIONS.values() for suffix in suffixes]))) if not does_file_exist(file_path, user=user): raise Exception('Error accessing "{}"'.format(file_path)) if request_json.get('indexFilePath') and not does_file_exist(request_json['indexFilePath'], user=user): diff --git a/ui/pages/Project/components/FamilyTable/IndividualRow.jsx b/ui/pages/Project/components/FamilyTable/IndividualRow.jsx index 2fd420bb70..0d70e51b35 100644 --- a/ui/pages/Project/components/FamilyTable/IndividualRow.jsx +++ b/ui/pages/Project/components/FamilyTable/IndividualRow.jsx @@ -502,19 +502,23 @@ const EDIT_INDIVIDUAL_FIELDS = [INDIVIDUAL_FIELD_SEX, INDIVIDUAL_FIELD_AFFECTED] { ...field, component: connect(mapParentOptionsStateToProps)(Select), inline: true, width: 8 } ))) -const mapIgvOptionsStateToProps = state => ({ - url: `/api/project/${getCurrentProject(state).projectGuid}/get_igv_options`, -}) +const mapIgvOptionsStateToProps = (state) => { + const { namespace, name } = getCurrentProject(state) + return { + url: `/api/anvil_workspace/${namespace}/${name}/get_igv_options`, + } +} const EDIT_IGV_FIELDS = [ { name: 'filePath', label: 'IGV File Path', component: connect(mapIgvOptionsStateToProps)(LoadOptionsSelect), - optionsResponseKey: 'groups', + optionsResponseKey: 'igv_options', + formatOption: value => value, errorHeader: 'Unable to Load IGV Files', - validationErrorHeader: 'No User Groups Available', - validationErrorMessage: 'Contact your system administrator to have them configure user groups', + validationErrorHeader: 'No IGV Files Found', + validationErrorMessage: 'No BAMs or CRAMs were found in the workspace associated with this project', validate: validators.required, }, ] From 8bd80b48e7a9d5ad56b924ef019e1ec0d775e733 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 20 Aug 2024 12:50:15 -0400 Subject: [PATCH 627/736] add missing import --- seqr/views/apis/anvil_workspace_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seqr/views/apis/anvil_workspace_api.py b/seqr/views/apis/anvil_workspace_api.py index 7d5caf9b39..49fb942c2a 100644 --- a/seqr/views/apis/anvil_workspace_api.py +++ b/seqr/views/apis/anvil_workspace_api.py @@ -13,7 +13,7 @@ from django.shortcuts import redirect from reference_data.models import GENOME_VERSION_LOOKUP -from seqr.models import Project, CAN_EDIT, Sample, Individual +from seqr.models import Project, CAN_EDIT, Sample, Individual, IgvSample from seqr.views.react_app import render_app_html from seqr.views.utils.airtable_utils import AirtableSession, ANVIL_REQUEST_TRACKING_TABLE from seqr.utils.search.constants import VCF_FILE_EXTENSIONS From c121287f1710ce4a4aa9f21f00776ed88ab94c9b Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 20 Aug 2024 12:58:45 -0400 Subject: [PATCH 628/736] test get igv options --- seqr/views/apis/anvil_workspace_api_tests.py | 27 ++++++++++++++++---- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/seqr/views/apis/anvil_workspace_api_tests.py b/seqr/views/apis/anvil_workspace_api_tests.py index 9f7048e1ae..089940fd99 100644 --- a/seqr/views/apis/anvil_workspace_api_tests.py +++ b/seqr/views/apis/anvil_workspace_api_tests.py @@ -7,7 +7,7 @@ from seqr.models import Project, Family, Individual from seqr.views.apis.anvil_workspace_api import anvil_workspace_page, create_project_from_workspace, \ - validate_anvil_vcf, grant_workspace_access, add_workspace_data, get_anvil_vcf_list + validate_anvil_vcf, grant_workspace_access, add_workspace_data, get_anvil_vcf_list, get_anvil_igv_options from seqr.views.utils.test_utils import AnvilAuthenticationTestCase, AuthenticationTestCase, AirflowTestCase, \ TEST_WORKSPACE_NAMESPACE, TEST_WORKSPACE_NAME, TEST_WORKSPACE_NAME1, TEST_NO_PROJECT_WORKSPACE_NAME, TEST_NO_PROJECT_WORKSPACE_NAME2 from seqr.views.utils.terra_api_utils import remove_token, TerraAPIException, TerraRefreshTokenFailedException @@ -424,9 +424,26 @@ def test_validate_anvil_vcf(self, mock_subprocess, mock_file_logger, mock_utils_ @mock.patch('seqr.utils.file_utils.logger') @mock.patch('seqr.utils.file_utils.subprocess.Popen') - def test_get_anvil_vcf_list(self, mock_subprocess, mock_file_logger, mock_utils_logger): + def test_get_anvil_igv_options(self, *args): + # Requesting to load data from a workspace without an existing project + url = reverse(get_anvil_igv_options, args=[TEST_WORKSPACE_NAMESPACE, TEST_WORKSPACE_NAME1]) + expected_options = [ + {'name': '/test.bam', 'value': 'gs://test_bucket/test.bam'}, + {'name': '/data/test.cram', 'value': 'gs://test_bucket/data/test.cram'}, + ] + self._test_get_workspace_files(url, 'igv_options', expected_options, *args) + + @mock.patch('seqr.utils.file_utils.logger') + @mock.patch('seqr.utils.file_utils.subprocess.Popen') + def test_get_anvil_vcf_list(self, *args): # Requesting to load data from a workspace without an existing project url = reverse(get_anvil_vcf_list, args=[TEST_WORKSPACE_NAMESPACE, TEST_WORKSPACE_NAME1]) + expected_files = [ + '/test.vcf', '/data/test.vcf.gz', '/data/test-101.vcf.gz', '/data/test-102.vcf.gz', '/sharded/test-*.vcf.gz', + ] + self._test_get_workspace_files(url, 'dataPathList', expected_files, *args) + + def _test_get_workspace_files(self, url, response_key, expected_files, mock_subprocess, mock_file_logger, mock_utils_logger): self.check_manager_login(url, login_redirect_url='/login/google-oauth2') mock_utils_logger.warning.assert_called_with('User does not have sufficient permissions for workspace {}/{}' .format(TEST_WORKSPACE_NAMESPACE, TEST_WORKSPACE_NAME1), @@ -436,7 +453,7 @@ def test_get_anvil_vcf_list(self, mock_subprocess, mock_file_logger, mock_utils_ mock_subprocess.return_value.communicate.return_value = b'', None response = self.client.get(url, content_type='application/json') self.assertEqual(response.status_code, 200) - self.assertDictEqual(response.json(), {'dataPathList': []}) + self.assertDictEqual(response.json(), {response_key: []}) mock_subprocess.assert_called_with('gsutil ls gs://test_bucket', stdout=-1, stderr=-1, shell=True) # nosec mock_file_logger.info.assert_called_with('==> gsutil ls gs://test_bucket', self.manager_user) @@ -446,6 +463,7 @@ def test_get_anvil_vcf_list(self, mock_subprocess, mock_file_logger, mock_utils_ mock_subprocess.return_value.communicate.return_value = b'\n'.join([ b'Warning: some packages are out of date', b'gs://test_bucket/test.vcf', b'gs://test_bucket/test.tsv', + b'gs://test_bucket/test.bam', b'gs://test_bucket/test.bam.bai', b'gs://test_bucket/data/test.cram', # path with common prefix but not sharded VCFs b'gs://test_bucket/data/test.vcf.gz', b'gs://test_bucket/data/test-101.vcf.gz', b'gs://test_bucket/data/test-102.vcf.gz', @@ -455,8 +473,7 @@ def test_get_anvil_vcf_list(self, mock_subprocess, mock_file_logger, mock_utils_ ]), None response = self.client.get(url, content_type='application/json') self.assertEqual(response.status_code, 200) - self.assertDictEqual(response.json(), {'dataPathList': ['/test.vcf', '/data/test.vcf.gz', '/data/test-101.vcf.gz', - '/data/test-102.vcf.gz', '/sharded/test-*.vcf.gz']}) + self.assertDictEqual(response.json(), {response_key: expected_files}) mock_subprocess.assert_has_calls([ mock.call('gsutil ls gs://test_bucket', stdout=-1, stderr=-1, shell=True), # nosec mock.call().communicate(), From 7476f11dbf542593328974e4dde6ecc90e882905 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 20 Aug 2024 13:23:56 -0400 Subject: [PATCH 629/736] fix mocking for test --- seqr/views/apis/igv_api_tests.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/seqr/views/apis/igv_api_tests.py b/seqr/views/apis/igv_api_tests.py index d01138c303..2bf245109d 100644 --- a/seqr/views/apis/igv_api_tests.py +++ b/seqr/views/apis/igv_api_tests.py @@ -15,15 +15,15 @@ @mock.patch('seqr.views.utils.permissions_utils.PM_USER_GROUP', 'project-managers') +@mock.patch('seqr.utils.file_utils.subprocess.Popen') class IgvAPITest(AnvilAuthenticationTestCase): fixtures = ['users', '1kg_project'] @responses.activate @mock.patch('seqr.utils.file_utils.logger') - @mock.patch('seqr.utils.file_utils.subprocess.Popen') @mock.patch('seqr.views.apis.igv_api.safe_redis_get_json') @mock.patch('seqr.views.apis.igv_api.safe_redis_set_json') - def test_proxy_google_to_igv(self, mock_set_redis, mock_get_redis, mock_subprocess, mock_file_logger): + def test_proxy_google_to_igv(self, mock_set_redis, mock_get_redis, mock_file_logger, mock_subprocess): mock_ls_subprocess = mock.MagicMock() mock_access_token_subprocess = mock.MagicMock() mock_subprocess.side_effect = [mock_ls_subprocess, mock_access_token_subprocess] @@ -75,7 +75,6 @@ def test_proxy_google_to_igv(self, mock_set_redis, mock_get_redis, mock_subproce mock_set_redis.assert_not_called() mock_subprocess.assert_not_called() - @mock.patch('seqr.utils.file_utils.subprocess.Popen') @mock.patch('seqr.utils.file_utils.open') def test_proxy_local_to_igv(self, mock_open, mock_subprocess): mock_subprocess.return_value.stdout = STREAMING_READS_CONTENT @@ -97,7 +96,8 @@ def test_proxy_local_to_igv(self, mock_open, mock_subprocess): self.assertListEqual([val for val in response.streaming_content], STREAMING_READS_CONTENT) mock_open.assert_called_with('/project_A/sample_1.bai', 'rb') - def test_receive_alignment_table_handler(self): + def test_receive_alignment_table_handler(self, mock_subprocess): + mock_subprocess.return_value.wait.return_value = 0 url = reverse(receive_igv_table_handler, args=[PROJECT_GUID]) self.check_pm_login(url) @@ -134,7 +134,8 @@ def test_receive_alignment_table_handler(self): self.assertEqual(response.status_code, 200) @mock.patch('seqr.views.apis.igv_api.load_uploaded_file') - def test_receive_bulk_alignment_table_handler(self, mock_load_uploaded_file): + def test_receive_bulk_alignment_table_handler(self, mock_load_uploaded_file, mock_subprocess): + mock_subprocess.return_value.wait.return_value = 0 url = reverse(receive_bulk_igv_table_handler) self.check_pm_login(url) @@ -199,7 +200,7 @@ def test_receive_bulk_alignment_table_handler(self, mock_load_uploaded_file): {'individualGuid': 'I000018_na21234', 'individualId': 'NA21234', 'filePath': 'gs://readviz/NA21234.cram', 'indexFilePath': None, 'sampleId': None} ]) - @mock.patch('seqr.utils.file_utils.subprocess.Popen') + @mock.patch('seqr.utils.file_utils.os.path.isfile') def test_add_alignment_sample(self, mock_local_file_exists, mock_subprocess): url = reverse(update_individual_igv_sample, args=['I000001_na19675']) @@ -307,7 +308,7 @@ def test_add_alignment_sample(self, mock_local_file_exists, mock_subprocess): self.assertEqual(response.status_code, 200) @responses.activate - def test_igv_genomes_proxy(self): + def test_igv_genomes_proxy(self, mock_subprocess): url_path = 'igv.org.genomes/foo?query=true' s3_url = reverse(igv_genomes_proxy, args=['s3', url_path]) From 64041a2c4fc8e6584b36028f57bd6cd7d2ef2772 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 20 Aug 2024 13:31:32 -0400 Subject: [PATCH 630/736] fix js tests --- ui/pages/Project/fixtures.js | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ui/pages/Project/fixtures.js b/ui/pages/Project/fixtures.js index c7e3df65c8..964f5625ca 100644 --- a/ui/pages/Project/fixtures.js +++ b/ui/pages/Project/fixtures.js @@ -282,6 +282,7 @@ export const STATE_WITH_2_FAMILIES = { caseReviewStatusLastModifiedDate: '2016-12-06T10:28:00.000Z', createdDate: '2016-12-06T10:28:00.000Z', sampleGuids: ['S2310656_wal_mc16200_mc16203'], + igvSampleGuids: ['IS2310656_wal_mc16200_mc16203'], sex: 'F', }, I021475_na19675_2: { @@ -325,6 +326,14 @@ export const STATE_WITH_2_FAMILIES = { sampleType: "WES", }, }, + igvSamplesByGuid: { + IS2310656_wal_mc16200_mc16203: { + projectGuid: 'R0237_1000_genomes_demo', + individualGuid: 'I021476_na19678_2', + sampleGuid: 'IS2310656_wal_mc16200_mc16203', + filePath: 'gs://seqr-datasets/GRCh37/cmg_sankaran_wes/CMG_MYOSEQ_MC16203.cram', + }, + }, analysisGroupsByGuid: { AG0000183_test_group: { analysisGroupGuid: "AG0000183_test_group", From b8dbf5c94869bccec8e95eb95466b992f2e72234 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 20 Aug 2024 14:38:29 -0400 Subject: [PATCH 631/736] better align fixture permission for anvil and local --- seqr/fixtures/1kg_project.json | 2 +- seqr/views/apis/igv_api_tests.py | 2 +- seqr/views/apis/project_api_tests.py | 12 ++++++------ seqr/views/utils/test_utils.py | 8 +------- 4 files changed, 9 insertions(+), 15 deletions(-) diff --git a/seqr/fixtures/1kg_project.json b/seqr/fixtures/1kg_project.json index 3c46e3ad04..afb1fa5064 100644 --- a/seqr/fixtures/1kg_project.json +++ b/seqr/fixtures/1kg_project.json @@ -36,7 +36,7 @@ "description": "", "consent_code": "H", "workspace_name": "empty", - "workspace_namespace": "my-seqr-billing", + "workspace_namespace": "ext-data", "subscribers": 6, "can_edit_group": 2, "can_view_group": 3, diff --git a/seqr/views/apis/igv_api_tests.py b/seqr/views/apis/igv_api_tests.py index 2bf245109d..ae4d3f90de 100644 --- a/seqr/views/apis/igv_api_tests.py +++ b/seqr/views/apis/igv_api_tests.py @@ -17,7 +17,7 @@ @mock.patch('seqr.views.utils.permissions_utils.PM_USER_GROUP', 'project-managers') @mock.patch('seqr.utils.file_utils.subprocess.Popen') class IgvAPITest(AnvilAuthenticationTestCase): - fixtures = ['users', '1kg_project'] + fixtures = ['users', 'social_auth', '1kg_project'] @responses.activate @mock.patch('seqr.utils.file_utils.logger') diff --git a/seqr/views/apis/project_api_tests.py b/seqr/views/apis/project_api_tests.py index 7fd5c541a0..9dc4a4053f 100644 --- a/seqr/views/apis/project_api_tests.py +++ b/seqr/views/apis/project_api_tests.py @@ -16,7 +16,7 @@ PROJECT_FIELDS, LOCUS_LIST_FIELDS, PA_LOCUS_LIST_FIELDS, NO_INTERNAL_CASE_REVIEW_INDIVIDUAL_FIELDS, \ SAMPLE_FIELDS, SUMMARY_FAMILY_FIELDS, INTERNAL_INDIVIDUAL_FIELDS, INDIVIDUAL_FIELDS, TAG_TYPE_FIELDS, \ FAMILY_NOTE_FIELDS, MATCHMAKER_SUBMISSION_FIELDS, ANALYSIS_GROUP_FIELDS, \ - EXT_WORKSPACE_NAMESPACE, EXT_WORKSPACE_NAME, DYNAMIC_ANALYSIS_GROUP_FIELDS + EXT_WORKSPACE_NAMESPACE, TEST_EMPTY_PROJECT_WORKSPACE, DYNAMIC_ANALYSIS_GROUP_FIELDS PROJECT_GUID = 'R0001_1kg' EMPTY_PROJECT_GUID = 'R0002_empty' @@ -28,7 +28,7 @@ 'name': 'new_project', 'description': 'new project description', 'genomeVersion': '38', 'isDemo': True, 'disableMme': True, 'consentCode': 'H', } -WORKSPACE_JSON = {'workspaceName': EXT_WORKSPACE_NAME, 'workspaceNamespace': EXT_WORKSPACE_NAMESPACE} +WORKSPACE_JSON = {'workspaceName': TEST_EMPTY_PROJECT_WORKSPACE, 'workspaceNamespace': EXT_WORKSPACE_NAMESPACE} WORKSPACE_CREATE_PROJECT_JSON = deepcopy(WORKSPACE_JSON) WORKSPACE_CREATE_PROJECT_JSON.update(BASE_CREATE_PROJECT_JSON) @@ -206,13 +206,13 @@ def test_update_project_workspace(self): response_json = response.json() self.assertSetEqual(set(response_json.keys()), PROJECT_FIELDS) - self.assertEqual(response_json['workspaceName'], EXT_WORKSPACE_NAME) + self.assertEqual(response_json['workspaceName'], TEST_EMPTY_PROJECT_WORKSPACE) self.assertEqual(response_json['workspaceNamespace'], EXT_WORKSPACE_NAMESPACE) self.assertEqual(response_json['genomeVersion'], '37') self.assertNotEqual(response_json['description'], 'updated project description') project = Project.objects.get(guid=PROJECT_GUID) - self.assertEqual(project.workspace_name, EXT_WORKSPACE_NAME) + self.assertEqual(project.workspace_name, TEST_EMPTY_PROJECT_WORKSPACE) self.assertEqual(project.workspace_namespace, EXT_WORKSPACE_NAMESPACE) def test_project_page_data(self): @@ -716,7 +716,7 @@ def test_create_and_delete_project(self, *args, **kwargs): mock.call(self.pm_user)]) self.mock_get_ws_access_level.assert_has_calls([ mock.call(self.pm_user, 'bar', 'foo'), - mock.call(self.pm_user, 'ext-data', 'anvil-non-analyst-project 1000 Genomes Demo'), + mock.call(self.pm_user, 'ext-data', 'empty'), ]) def _assert_expected_airtable_requests(self, mock_airtable_logger): @@ -764,7 +764,7 @@ def test_project_overview(self): super(AnvilProjectAPITest, self).test_project_overview() self.mock_list_workspaces.assert_not_called() self.assert_no_extra_anvil_calls() - self.mock_get_ws_access_level.assert_called_with(self.collaborator_user, 'my-seqr-billing', 'empty') + self.mock_get_ws_access_level.assert_called_with(self.collaborator_user, 'ext-data', 'empty') self.assertEqual(self.mock_get_ws_access_level.call_count, 4) def test_project_collaborators(self): diff --git a/seqr/views/utils/test_utils.py b/seqr/views/utils/test_utils.py index 568c68734b..6e79acc07f 100644 --- a/seqr/views/utils/test_utils.py +++ b/seqr/views/utils/test_utils.py @@ -374,7 +374,7 @@ def assert_no_logs(self): 'bucketName': 'test_bucket' }, }, { - 'workspace_namespace': TEST_WORKSPACE_NAMESPACE, + 'workspace_namespace': EXT_WORKSPACE_NAMESPACE, 'workspace_name': TEST_EMPTY_PROJECT_WORKSPACE, 'public': False, 'acl': { @@ -424,12 +424,6 @@ def assert_no_logs(self): "canShare": True, "canCompute": True }, - 'test_pm_user@test.com': { - "accessLevel": "WRITER", - "pending": False, - "canShare": False, - "canCompute": False - }, }, 'workspace': { 'authorizationDomain': [], From cb76afac53b331d1cdc7d3e67b339fb4bad4671f Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 20 Aug 2024 14:48:02 -0400 Subject: [PATCH 632/736] fix test --- seqr/views/apis/saved_variant_api_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seqr/views/apis/saved_variant_api_tests.py b/seqr/views/apis/saved_variant_api_tests.py index 0f5ecf40d4..2a6b34972f 100644 --- a/seqr/views/apis/saved_variant_api_tests.py +++ b/seqr/views/apis/saved_variant_api_tests.py @@ -1011,7 +1011,7 @@ def test_saved_variant_data(self, *args): super(AnvilSavedVariantAPITest, self).test_saved_variant_data(*args) self.mock_list_workspaces.assert_called_with(self.analyst_user) self.mock_get_ws_access_level.assert_called_with( - mock.ANY, 'my-seqr-billing', 'empty') + mock.ANY, 'ext-data', 'empty') self.mock_get_ws_access_level.assert_any_call( mock.ANY, 'my-seqr-billing', 'anvil-1kg project n\u00e5me with uni\u00e7\u00f8de') self.assertEqual(self.mock_get_ws_access_level.call_count, 17) From 5df53395889e62c0d1ba13b3a514f5e5c69b9f83 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 20 Aug 2024 14:57:55 -0400 Subject: [PATCH 633/736] filter gene symbol mapping to genes defiend in build --- panelapp/panelapp_utils.py | 3 ++- seqr/utils/gene_utils.py | 11 +++++++---- seqr/utils/search/utils.py | 27 +++++++++++++++++---------- seqr/views/apis/individual_api.py | 2 +- 4 files changed, 27 insertions(+), 16 deletions(-) diff --git a/panelapp/panelapp_utils.py b/panelapp/panelapp_utils.py index 2d3a19874f..dd9fb18bd8 100644 --- a/panelapp/panelapp_utils.py +++ b/panelapp/panelapp_utils.py @@ -7,6 +7,7 @@ from urllib3.exceptions import MaxRetryError from panelapp.models import PaLocusList, PaLocusListGene +from reference_data.models import GENOME_VERSION_GRCh38 from seqr.models import LocusList as SeqrLocusList, LocusListGene as SeqrLocusListGene from seqr.utils.gene_utils import parse_locus_list_items from seqr.utils.logging_utils import SeqrLogger @@ -47,7 +48,7 @@ def _extract_ensembl_id_from_json(raw_gene_json): panel_genes_by_id = {_extract_ensembl_id_from_json(gene): gene for gene in all_genes_for_panel if _extract_ensembl_id_from_json(gene)} raw_ensbl_38_gene_ids_csv = ','.join(panel_genes_by_id.keys()) - genes_by_id, _, invalid_items = parse_locus_list_items({'rawItems': raw_ensbl_38_gene_ids_csv}) + genes_by_id, _, invalid_items = parse_locus_list_items({'rawItems': raw_ensbl_38_gene_ids_csv}, genome_version=GENOME_VERSION_GRCh38) if len(invalid_items) > 0: logger.warning('Genes found in panel {} but not in reference data, ignoring genes {}' .format(panel_app_id, invalid_items), user) diff --git a/seqr/utils/gene_utils.py b/seqr/utils/gene_utils.py index c590b888fc..f4ecc9da9f 100644 --- a/seqr/utils/gene_utils.py +++ b/seqr/utils/gene_utils.py @@ -40,8 +40,11 @@ def _get_genes(gene_ids, user=None, gene_fields=None): return {gene['geneId']: gene for gene in _get_json_for_genes(genes, user=user, gene_fields=gene_fields)} -def get_gene_ids_for_gene_symbols(gene_symbols): - genes = GeneInfo.objects.filter(gene_symbol__in=gene_symbols).only('gene_symbol', 'gene_id').order_by('-gencode_release') +def get_gene_ids_for_gene_symbols(gene_symbols, genome_version=None): + gene_filter = {'gene_symbol__in': gene_symbols} + if genome_version: + gene_filter[f'start_grch{genome_version}__isnull'] = False + genes = GeneInfo.objects.filter(**gene_filter).only('gene_symbol', 'gene_id').order_by('-gencode_release') symbols_to_ids = defaultdict(list) for gene in genes: symbols_to_ids[gene.gene_symbol].append(gene.gene_id) @@ -150,7 +153,7 @@ def _process_result(result, gene): return _get_json_for_models(genes, process_result=_process_result) -def parse_locus_list_items(request_json): +def parse_locus_list_items(request_json, genome_version=None): raw_items = request_json.get('rawItems') if not raw_items: return None, None, None @@ -185,7 +188,7 @@ def parse_locus_list_items(request_json): else: gene_symbols.add(item.replace('', '')) - gene_symbols_to_ids = get_gene_ids_for_gene_symbols(gene_symbols) + gene_symbols_to_ids = get_gene_ids_for_gene_symbols(gene_symbols, genome_version=genome_version) invalid_items += [symbol for symbol in gene_symbols if not gene_symbols_to_ids.get(symbol)] gene_ids.update({gene_ids[0] for gene_ids in gene_symbols_to_ids.values() if len(gene_ids)}) genes_by_id = get_genes(list(gene_ids)) if gene_ids else {} diff --git a/seqr/utils/search/utils.py b/seqr/utils/search/utils.py index 5cb4972743..5fdfbb4d45 100644 --- a/seqr/utils/search/utils.py +++ b/seqr/utils/search/utils.py @@ -82,7 +82,7 @@ def get_search_samples(projects, active_only=True): return _get_filtered_search_samples({'individual__family__project__in': projects}, active_only=active_only) -def _get_families_search_data(families, dataset_type=None): +def _get_families_search_data(families, dataset_type): samples = _get_filtered_search_samples({'individual__family__in': families}) if len(samples) < 1: raise InvalidSearchException('No search data found for families {}'.format( @@ -93,7 +93,11 @@ def _get_families_search_data(families, dataset_type=None): if not samples: raise InvalidSearchException(f'Unable to search against dataset type "{dataset_type}"') - projects = Project.objects.filter(family__individual__sample__in=samples).values_list('genome_version', 'name').distinct() + return samples + + +def _get_search_genome_version(families): + projects = Project.objects.filter(family__in=families).values_list('genome_version', 'name').distinct() project_versions = defaultdict(set) for genome_version, project_name in projects: project_versions[genome_version].add(project_name) @@ -104,7 +108,7 @@ def _get_families_search_data(families, dataset_type=None): raise InvalidSearchException( f'Searching across multiple genome builds is not supported. Remove projects with differing genome builds from search: {summary}') - return samples, next(iter(project_versions.keys())) + return next(iter(project_versions.keys())) def delete_search_backend_data(data_id): @@ -145,7 +149,8 @@ def _get_variants_for_variant_ids(families, variant_ids, user, user_email=None, dataset_type = _variant_ids_dataset_type(parsed_variant_ids.values()) return backend_specific_call(get_es_variants_for_variant_ids, get_hail_variants_for_variant_ids)( - *_get_families_search_data(families, dataset_type=dataset_type), parsed_variant_ids, user, user_email=user_email, **kwargs + _get_families_search_data(families, dataset_type=dataset_type), _get_search_genome_version(families), + parsed_variant_ids, user, user_email=user_email, **kwargs ) @@ -174,7 +179,8 @@ def variant_lookup(user, parsed_variant_id, **kwargs): def sv_variant_lookup(user, variant_id, families, **kwargs): - samples, _ = _get_families_search_data(families, dataset_type=Sample.DATASET_TYPE_SV_CALLS) + _get_search_genome_version(families) + samples = _get_families_search_data(families, dataset_type=Sample.DATASET_TYPE_SV_CALLS) return _variant_lookup( hail_sv_variant_lookup, user, variant_id, **kwargs, samples=samples, cache_key_suffix=user, dataset_type=Sample.DATASET_TYPE_SV_CALLS, @@ -233,10 +239,14 @@ def query_variants(search_model, sort=XPOS_SORT_KEY, skip_genotype_filter=False, def _query_variants(search_model, user, previous_search_results, sort=None, num_results=100, **kwargs): search = deepcopy(search_model.variant_search.search) + families = search_model.families.all() + genome_version = _get_search_genome_version(families) + _validate_sort(sort, families) + rs_ids = None variant_ids = None parsed_variant_ids = None - genes, intervals, invalid_items = parse_locus_list_items(search.get('locus', {})) + genes, intervals, invalid_items = parse_locus_list_items(search.get('locus', {}), genome_version=genome_version) if invalid_items: raise InvalidSearchException('Invalid genes/intervals: {}'.format(', '.join(invalid_items))) if not (genes or intervals): @@ -257,9 +267,6 @@ def _query_variants(search_model, user, previous_search_results, sort=None, num_ } parsed_search.update(search) - families = search_model.families.all() - _validate_sort(sort, families) - dataset_type, secondary_dataset_type, lookup_dataset_type = _search_dataset_type(parsed_search) parsed_search.update({'dataset_type': dataset_type, 'secondary_dataset_type': secondary_dataset_type}) search_dataset_type = None @@ -269,7 +276,7 @@ def _query_variants(search_model, user, previous_search_results, sort=None, num_ elif dataset_type == Sample.DATASET_TYPE_SV_CALLS: search_dataset_type = DATASET_TYPE_NO_MITO - samples, genome_version = _get_families_search_data(families, dataset_type=search_dataset_type) + samples = _get_families_search_data(families, dataset_type=search_dataset_type) if parsed_search.get('inheritance'): samples = _parse_inheritance(parsed_search, samples) diff --git a/seqr/views/apis/individual_api.py b/seqr/views/apis/individual_api.py index 86afdfd3eb..d815f8cd3f 100644 --- a/seqr/views/apis/individual_api.py +++ b/seqr/views/apis/individual_api.py @@ -895,7 +895,7 @@ def import_gregor_metadata(request, project_guid): genes.add(variant[GENE_COLUMN]) finding_id_map[variant['genetic_findings_id']] = variant_id - gene_symbols_to_ids = {k: v[0] for k, v in get_gene_ids_for_gene_symbols(genes).items()} + gene_symbols_to_ids = {k: v[0] for k, v in get_gene_ids_for_gene_symbols(genes, genome_version=project.genome_version).items()} missing_genes = set() for variant in family_variant_data.values(): gene = variant[GENE_COLUMN] From 7cbea0313975669928b350ea83a77c39c2a4b7d1 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 20 Aug 2024 15:00:24 -0400 Subject: [PATCH 634/736] fix test order --- seqr/utils/search/search_utils_tests.py | 31 +++++++++++++------------ 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/seqr/utils/search/search_utils_tests.py b/seqr/utils/search/search_utils_tests.py index 909bc48c6d..431a3de035 100644 --- a/seqr/utils/search/search_utils_tests.py +++ b/seqr/utils/search/search_utils_tests.py @@ -156,6 +156,22 @@ def _test_invalid_search_params(self, search_func): query_variants(self.results_model, user=self.user, page=200) self.assertEqual(str(cm.exception), 'Unable to load more than 10000 variants (20000 requested)') + self.search_model.search['locus'] = {'rawVariantItems': 'chr2-A-C'} + with self.assertRaises(InvalidSearchException) as cm: + search_func(self.results_model, user=self.user) + self.assertEqual(str(cm.exception), 'Invalid variants: chr2-A-C') + + self.search_model.search['locus']['rawVariantItems'] = 'rs9876,chr2-1234-A-C' + with self.assertRaises(InvalidSearchException) as cm: + search_func(self.results_model, user=self.user) + self.assertEqual(str(cm.exception), 'Invalid variant notation: found both variant IDs and rsIDs') + + self.search_model.search['locus']['rawItems'] = 'chr27:1234-5678,2:40-400000000, ENSG00012345' + with self.assertRaises(InvalidSearchException) as cm: + search_func(self.results_model, user=self.user) + self.assertEqual(str(cm.exception), 'Invalid genes/intervals: chr27:1234-5678, chr2:40-400000000, ENSG00012345') + + self.search_model.search['locus'] = {} self.search_model.search['inheritance'] = {'mode': 'recessive'} with self.assertRaises(InvalidSearchException) as cm: query_variants(self.results_model) @@ -222,21 +238,6 @@ def _test_invalid_search_params(self, search_func): 'Searching across multiple genome builds is not supported. Remove projects with differing genome builds from search: 37 - 1kg project nåme with uniçøde, Test Reprocessed Project; 38 - Non-Analyst Project', ) - self.search_model.search['locus'] = {'rawVariantItems': 'chr2-A-C'} - with self.assertRaises(InvalidSearchException) as cm: - search_func(self.results_model, user=self.user) - self.assertEqual(str(cm.exception), 'Invalid variants: chr2-A-C') - - self.search_model.search['locus']['rawVariantItems'] = 'rs9876,chr2-1234-A-C' - with self.assertRaises(InvalidSearchException) as cm: - search_func(self.results_model, user=self.user) - self.assertEqual(str(cm.exception), 'Invalid variant notation: found both variant IDs and rsIDs') - - self.search_model.search['locus']['rawItems'] = 'chr27:1234-5678,2:40-400000000, ENSG00012345' - with self.assertRaises(InvalidSearchException) as cm: - search_func(self.results_model, user=self.user) - self.assertEqual(str(cm.exception), 'Invalid genes/intervals: chr27:1234-5678, chr2:40-400000000, ENSG00012345') - def test_invalid_search_query_variants(self): with self.assertRaises(InvalidSearchException) as se: query_variants(self.results_model, sort='prioritized_gene', num_results=2) From cc29f61b30e03da4f78a75a38487c7daeccc012f Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 20 Aug 2024 15:12:47 -0400 Subject: [PATCH 635/736] tests search will only run on genes with coordinates for the current build --- seqr/fixtures/reference_data.json | 4 ++-- seqr/utils/search/search_utils_tests.py | 12 ++++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/seqr/fixtures/reference_data.json b/seqr/fixtures/reference_data.json index 6fd43023c2..e42fcc72b6 100644 --- a/seqr/fixtures/reference_data.json +++ b/seqr/fixtures/reference_data.json @@ -426,8 +426,8 @@ "gene_id": "ENSG00000233653", "gene_symbol": "CICP7", "chrom_grch37": "1", - "start_grch37": 329431, - "end_grch37": 332236, + "start_grch37": null, + "end_grch37": null, "strand_grch37": "-", "coding_region_size_grch37": 0, "chrom_grch38": "1", diff --git a/seqr/utils/search/search_utils_tests.py b/seqr/utils/search/search_utils_tests.py index 431a3de035..8000d4bf1b 100644 --- a/seqr/utils/search/search_utils_tests.py +++ b/seqr/utils/search/search_utils_tests.py @@ -171,6 +171,12 @@ def _test_invalid_search_params(self, search_func): search_func(self.results_model, user=self.user) self.assertEqual(str(cm.exception), 'Invalid genes/intervals: chr27:1234-5678, chr2:40-400000000, ENSG00012345') + build_specific_genes = 'CICP7, OR4F29' + self.search_model.search['locus']['rawItems'] = build_specific_genes + with self.assertRaises(InvalidSearchException) as cm: + search_func(self.results_model, user=self.user) + self.assertEqual(str(cm.exception), 'Invalid genes/intervals: CICP7') + self.search_model.search['locus'] = {} self.search_model.search['inheritance'] = {'mode': 'recessive'} with self.assertRaises(InvalidSearchException) as cm: @@ -238,6 +244,12 @@ def _test_invalid_search_params(self, search_func): 'Searching across multiple genome builds is not supported. Remove projects with differing genome builds from search: 37 - 1kg project nåme with uniçøde, Test Reprocessed Project; 38 - Non-Analyst Project', ) + self.results_model.families.set(Family.objects.filter(guid='F000014_14')) + self.search_model.search['locus']['rawItems'] = build_specific_genes + with self.assertRaises(InvalidSearchException) as cm: + search_func(self.results_model, user=self.user) + self.assertEqual(str(cm.exception), 'Invalid genes/intervals: OR4F29') + def test_invalid_search_query_variants(self): with self.assertRaises(InvalidSearchException) as se: query_variants(self.results_model, sort='prioritized_gene', num_results=2) From 5cb6a158a05a8b9c7e673aef437ede03793f1d92 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 20 Aug 2024 15:19:52 -0400 Subject: [PATCH 636/736] only use ensembl ids with matched genome version --- seqr/utils/gene_utils.py | 18 ++++++++++++------ seqr/utils/search/search_utils_tests.py | 6 +++--- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/seqr/utils/gene_utils.py b/seqr/utils/gene_utils.py index f4ecc9da9f..84247b2904 100644 --- a/seqr/utils/gene_utils.py +++ b/seqr/utils/gene_utils.py @@ -16,8 +16,8 @@ def get_gene(gene_id, user): return gene_json -def get_genes(gene_ids): - return _get_genes(gene_ids) +def get_genes(gene_ids, genome_version=None): + return _get_genes(gene_ids, genome_version=genome_version) def get_genes_for_variant_display(gene_ids): @@ -32,18 +32,24 @@ def get_genes_with_detail(gene_ids, user): return _get_genes(gene_ids, user=user, gene_fields=ALL_GENE_FIELDS) -def _get_genes(gene_ids, user=None, gene_fields=None): +# TODO all usages? +def _get_genes(gene_ids, user=None, gene_fields=None, genome_version=None): gene_filter = {} + _add_genome_version_filter(gene_filter, genome_version) if gene_ids is not None: gene_filter['gene_id__in'] = gene_ids genes = GeneInfo.objects.filter(**gene_filter) return {gene['geneId']: gene for gene in _get_json_for_genes(genes, user=user, gene_fields=gene_fields)} -def get_gene_ids_for_gene_symbols(gene_symbols, genome_version=None): - gene_filter = {'gene_symbol__in': gene_symbols} +def _add_genome_version_filter(gene_filter, genome_version): if genome_version: gene_filter[f'start_grch{genome_version}__isnull'] = False + + +def get_gene_ids_for_gene_symbols(gene_symbols, genome_version=None): + gene_filter = {'gene_symbol__in': gene_symbols} + _add_genome_version_filter(gene_filter, genome_version) genes = GeneInfo.objects.filter(**gene_filter).only('gene_symbol', 'gene_id').order_by('-gencode_release') symbols_to_ids = defaultdict(list) for gene in genes: @@ -191,6 +197,6 @@ def parse_locus_list_items(request_json, genome_version=None): gene_symbols_to_ids = get_gene_ids_for_gene_symbols(gene_symbols, genome_version=genome_version) invalid_items += [symbol for symbol in gene_symbols if not gene_symbols_to_ids.get(symbol)] gene_ids.update({gene_ids[0] for gene_ids in gene_symbols_to_ids.values() if len(gene_ids)}) - genes_by_id = get_genes(list(gene_ids)) if gene_ids else {} + genes_by_id = get_genes(list(gene_ids), genome_version=genome_version) if gene_ids else {} invalid_items += [gene_id for gene_id in gene_ids if not genes_by_id.get(gene_id)] return genes_by_id, intervals, invalid_items \ No newline at end of file diff --git a/seqr/utils/search/search_utils_tests.py b/seqr/utils/search/search_utils_tests.py index 8000d4bf1b..f1ee0c53a8 100644 --- a/seqr/utils/search/search_utils_tests.py +++ b/seqr/utils/search/search_utils_tests.py @@ -171,11 +171,11 @@ def _test_invalid_search_params(self, search_func): search_func(self.results_model, user=self.user) self.assertEqual(str(cm.exception), 'Invalid genes/intervals: chr27:1234-5678, chr2:40-400000000, ENSG00012345') - build_specific_genes = 'CICP7, OR4F29' + build_specific_genes = 'CICP7, OR4F29, ENSG00000233653, ENSG00000256186' self.search_model.search['locus']['rawItems'] = build_specific_genes with self.assertRaises(InvalidSearchException) as cm: search_func(self.results_model, user=self.user) - self.assertEqual(str(cm.exception), 'Invalid genes/intervals: CICP7') + self.assertEqual(str(cm.exception), 'Invalid genes/intervals: CICP7, ENSG00000233653') self.search_model.search['locus'] = {} self.search_model.search['inheritance'] = {'mode': 'recessive'} @@ -248,7 +248,7 @@ def _test_invalid_search_params(self, search_func): self.search_model.search['locus']['rawItems'] = build_specific_genes with self.assertRaises(InvalidSearchException) as cm: search_func(self.results_model, user=self.user) - self.assertEqual(str(cm.exception), 'Invalid genes/intervals: OR4F29') + self.assertEqual(str(cm.exception), 'Invalid genes/intervals: OR4F29, ENSG00000256186') def test_invalid_search_query_variants(self): with self.assertRaises(InvalidSearchException) as se: From 59e61fc85fdb40c247c21fc2f5a6ad430aa1106a Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 20 Aug 2024 15:36:56 -0400 Subject: [PATCH 637/736] use build specific genes where possible --- seqr/utils/gene_utils.py | 9 ++++----- seqr/views/apis/family_api.py | 2 +- seqr/views/apis/individual_api.py | 5 +++-- seqr/views/apis/variant_search_api.py | 5 +++-- seqr/views/utils/variant_utils.py | 11 +++++++---- 5 files changed, 18 insertions(+), 14 deletions(-) diff --git a/seqr/utils/gene_utils.py b/seqr/utils/gene_utils.py index 84247b2904..06b2572981 100644 --- a/seqr/utils/gene_utils.py +++ b/seqr/utils/gene_utils.py @@ -20,19 +20,18 @@ def get_genes(gene_ids, genome_version=None): return _get_genes(gene_ids, genome_version=genome_version) -def get_genes_for_variant_display(gene_ids): - return _get_genes(gene_ids, gene_fields=VARIANT_GENE_DISPLAY_FIELDS) +def get_genes_for_variant_display(gene_ids, genome_version): + return _get_genes(gene_ids, gene_fields=VARIANT_GENE_DISPLAY_FIELDS, genome_version=genome_version) -def get_genes_for_variants(gene_ids): - return _get_genes(gene_ids, gene_fields=VARIANT_GENE_FIELDS) +def get_genes_for_variants(gene_ids, genome_version=None): + return _get_genes(gene_ids, gene_fields=VARIANT_GENE_FIELDS, genome_version=genome_version) def get_genes_with_detail(gene_ids, user): return _get_genes(gene_ids, user=user, gene_fields=ALL_GENE_FIELDS) -# TODO all usages? def _get_genes(gene_ids, user=None, gene_fields=None, genome_version=None): gene_filter = {} _add_genome_version_filter(gene_filter, genome_version) diff --git a/seqr/views/apis/family_api.py b/seqr/views/apis/family_api.py index 322c23648f..5da3adfef1 100644 --- a/seqr/views/apis/family_api.py +++ b/seqr/views/apis/family_api.py @@ -523,5 +523,5 @@ def get_family_phenotype_gene_scores(request, family_guid): gene_ids = {gene_id for indiv in phenotype_prioritization.values() for gene_id in indiv.keys()} return create_json_response({ 'phenotypeGeneScores': phenotype_prioritization, - 'genesById': get_genes_for_variant_display(gene_ids) + 'genesById': get_genes_for_variant_display(gene_ids, project.genome_version), }) diff --git a/seqr/views/apis/individual_api.py b/seqr/views/apis/individual_api.py index d815f8cd3f..b6d82c7b3a 100644 --- a/seqr/views/apis/individual_api.py +++ b/seqr/views/apis/individual_api.py @@ -965,7 +965,8 @@ def _parse_participant_val(column, value, participant_sample_lookup): @login_and_policies_required def get_individual_rna_seq_data(request, individual_guid): individual = Individual.objects.get(guid=individual_guid) - check_project_permissions(individual.family.project, request.user) + project = individual.family.project + check_project_permissions(project, request.user) filters = {'sample__individual': individual} outlier_data = get_json_for_rna_seq_outliers(filters, significant_only=False, individual_guid=individual_guid) @@ -973,7 +974,7 @@ def get_individual_rna_seq_data(request, individual_guid): genes_to_show = get_genes({ gene_id for rna_data in outlier_data.get(individual_guid, {}).values() for gene_id, data in rna_data.items() if any([d['isSignificant'] for d in (data if isinstance(data, list) else [data])]) - }) + }, genome_version=project.genome_version) return create_json_response({ 'rnaSeqData': outlier_data, diff --git a/seqr/views/apis/variant_search_api.py b/seqr/views/apis/variant_search_api.py index 0d9cdac9c2..5d97ee7913 100644 --- a/seqr/views/apis/variant_search_api.py +++ b/seqr/views/apis/variant_search_api.py @@ -250,12 +250,12 @@ def _get_variant_main_transcript_field_val(parsed_variant): @login_and_policies_required def get_variant_gene_breakdown(request, search_hash): results_model = VariantSearchResults.objects.get(search_hash=search_hash) - _check_results_permission(results_model, request.user) + projects = _check_results_permission(results_model, request.user) gene_counts = get_variant_query_gene_counts(results_model, user=request.user) return create_json_response({ 'searchGeneBreakdown': {search_hash: gene_counts}, - 'genesById': get_genes_for_variant_display(list(gene_counts.keys())), + 'genesById': get_genes_for_variant_display(list(gene_counts.keys()), projects.first().genome_version), }) @@ -479,6 +479,7 @@ def _check_results_permission(results_model, user, project_perm_check=None): for project in projects: if not project_perm_check(project): raise PermissionDenied() + return projects def _get_search_context(results_model): diff --git a/seqr/views/utils/variant_utils.py b/seqr/views/utils/variant_utils.py index 9a2bcad48b..aaf65a129b 100644 --- a/seqr/views/utils/variant_utils.py +++ b/seqr/views/utils/variant_utils.py @@ -269,7 +269,11 @@ def _saved_variant_genes_transcripts(variants): for family_guid in var['familyGuids']: family_genes[family_guid].update(var.get('transcripts', {}).keys()) - genes = get_genes_for_variants(gene_ids) + projects = Project.objects.filter(family__guid__in=family_genes.keys()).distinct() + genome_versions = {p.genome_version for p in projects} + genome_version = list(genome_versions)[0] if len(genome_versions) == 1 else None + + genes = get_genes_for_variants(gene_ids, genome_version=genome_version) for gene in genes.values(): if gene: gene['locusListGuids'] = [] @@ -281,7 +285,7 @@ def _saved_variant_genes_transcripts(variants): ) } if transcript_ids else None - return genes, transcripts, family_genes + return genes, transcripts, family_genes, projects def get_omim_intervals_query(variants): @@ -386,9 +390,8 @@ def get_variants_response(request, saved_variants, response_variants=None, add_a if not variants: return response - genes, transcripts, family_genes = _saved_variant_genes_transcripts(variants) + genes, transcripts, family_genes, projects = _saved_variant_genes_transcripts(variants) - projects = Project.objects.filter(family__guid__in=family_genes.keys()).distinct() project = list(projects)[0] if len(projects) == 1 else None discovery_tags = None From 9d7796830f22a850bc1d601f220b22ca1e78fe26 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 20 Aug 2024 16:11:41 -0400 Subject: [PATCH 638/736] test genes not returned in variant context --- seqr/fixtures/reference_data.json | 10 +++++----- seqr/utils/search/elasticsearch/es_utils_tests.py | 6 +++--- seqr/utils/search/search_utils_tests.py | 10 +++++----- seqr/views/apis/saved_variant_api_tests.py | 2 +- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/seqr/fixtures/reference_data.json b/seqr/fixtures/reference_data.json index e42fcc72b6..57993ea7fc 100644 --- a/seqr/fixtures/reference_data.json +++ b/seqr/fixtures/reference_data.json @@ -5,9 +5,9 @@ "fields": { "gene_id": "ENSG00000223972", "gene_symbol": "DDX11L1", - "chrom_grch37": "1", - "start_grch37": 11869, - "end_grch37": 14409, + "chrom_grch37": null, + "start_grch37": null, + "end_grch37": null, "strand_grch37": "+", "coding_region_size_grch37": 0, "chrom_grch38": "1", @@ -426,8 +426,8 @@ "gene_id": "ENSG00000233653", "gene_symbol": "CICP7", "chrom_grch37": "1", - "start_grch37": null, - "end_grch37": null, + "start_grch37": 329431, + "end_grch37": 332236, "strand_grch37": "-", "coding_region_size_grch37": 0, "chrom_grch38": "1", diff --git a/seqr/utils/search/elasticsearch/es_utils_tests.py b/seqr/utils/search/elasticsearch/es_utils_tests.py index 0775a6ab45..f4a34b4c94 100644 --- a/seqr/utils/search/elasticsearch/es_utils_tests.py +++ b/seqr/utils/search/elasticsearch/es_utils_tests.py @@ -1460,7 +1460,7 @@ def test_invalid_get_es_variants(self, mock_logger): results_model.families.set(self.families) search_model.search = { 'inheritance': {'mode': 'compound_het'}, - 'locus': {'rawItems': 'DDX11L1'}, + 'locus': {'rawItems': 'WASH7P'}, 'annotations': {'frameshift': ['frameshift_variant']}, } search_model.save() @@ -1603,7 +1603,7 @@ def test_filtered_get_es_variants(self): 'in_silico': {'cadd': '11.5', 'sift': 'D', 'fathmm': 'D'}, 'inheritance': {'mode': 'de_novo'}, 'customQuery': {'term': {'customFlag': 'flagVal'}}, - 'locus': {'rawItems': 'DDX11L1, chr2:1234-5678, chr7:100-10100%10', 'excludeLocations': True}, + 'locus': {'rawItems': 'WASH7P, chr2:1234-5678, chr7:100-10100%10', 'excludeLocations': True}, }) results_model = VariantSearchResults.objects.create(variant_search=search_model) @@ -1626,7 +1626,7 @@ def test_filtered_get_es_variants(self): {'range': {'xpos': {'gte': 2000000001}}}, {'range': {'xstop': {'lte': 2300000000}}}, ]}}, - {'terms': {'geneIds': ['ENSG00000223972']}}, + {'terms': {'geneIds': ['ENSG00000227232']}}, {'bool': {'must': [ {'range': {'xpos': {'gte': 7000000001, 'lte': 7000001100}}}, {'range': {'xstop': {'gte': 7000009100, 'lte': 7000011100}}}]}}, diff --git a/seqr/utils/search/search_utils_tests.py b/seqr/utils/search/search_utils_tests.py index f1ee0c53a8..3bd88e0903 100644 --- a/seqr/utils/search/search_utils_tests.py +++ b/seqr/utils/search/search_utils_tests.py @@ -171,11 +171,11 @@ def _test_invalid_search_params(self, search_func): search_func(self.results_model, user=self.user) self.assertEqual(str(cm.exception), 'Invalid genes/intervals: chr27:1234-5678, chr2:40-400000000, ENSG00012345') - build_specific_genes = 'CICP7, OR4F29, ENSG00000233653, ENSG00000256186' + build_specific_genes = 'DDX11L1, OR4F29, ENSG00000223972, ENSG00000256186' self.search_model.search['locus']['rawItems'] = build_specific_genes with self.assertRaises(InvalidSearchException) as cm: search_func(self.results_model, user=self.user) - self.assertEqual(str(cm.exception), 'Invalid genes/intervals: CICP7, ENSG00000233653') + self.assertEqual(str(cm.exception), 'Invalid genes/intervals: DDX11L1, ENSG00000223972') self.search_model.search['locus'] = {} self.search_model.search['inheritance'] = {'mode': 'recessive'} @@ -345,12 +345,12 @@ def _mock_get_variants(families, search, user, previous_search_results, genome_v search_fields=['locus'], rs_ids=['rs9876'], variant_ids=[], parsed_variant_ids=[], ) - self.search_model.search['locus']['rawItems'] = 'DDX11L1, chr2:1234-5678, chr7:100-10100%10, ENSG00000186092' + self.search_model.search['locus']['rawItems'] = 'WASH7P, chr2:1234-5678, chr7:100-10100%10, ENSG00000186092' query_variants(self.results_model, user=self.user) self._test_expected_search_call( mock_get_variants, results_cache, sort='xpos', page=1, num_results=100, skip_genotype_filter=False, search_fields=['locus'], genes={ - 'ENSG00000223972': mock.ANY, 'ENSG00000186092': mock.ANY, + 'ENSG00000227232': mock.ANY, 'ENSG00000186092': mock.ANY, }, intervals=[ {'chrom': '2', 'start': 1234, 'end': 5678, 'offset': None}, {'chrom': '7', 'start': 100, 'end': 10100, 'offset': 0.1}, @@ -359,7 +359,7 @@ def _mock_get_variants(families, search, user, previous_search_results, genome_v parsed_genes = mock_get_variants.call_args.args[1]['parsedLocus']['genes'] for gene in parsed_genes.values(): self.assertSetEqual(set(gene.keys()), GENE_FIELDS) - self.assertEqual(parsed_genes['ENSG00000223972']['geneSymbol'], 'DDX11L1') + self.assertEqual(parsed_genes['ENSG00000227232']['geneSymbol'], 'WASH7P') self.assertEqual(parsed_genes['ENSG00000186092']['geneSymbol'], 'OR4F5') self.search_model.search.update({'pathogenicity': {'clinvar': ['pathogenic', 'likely_pathogenic']}, 'locus': {}}) diff --git a/seqr/views/apis/saved_variant_api_tests.py b/seqr/views/apis/saved_variant_api_tests.py index 0f5ecf40d4..bba1283feb 100644 --- a/seqr/views/apis/saved_variant_api_tests.py +++ b/seqr/views/apis/saved_variant_api_tests.py @@ -277,7 +277,7 @@ def test_saved_variant_data(self): self.assertSetEqual( set(response_json['savedVariantsByGuid'].keys()), {'SV0000006_1248367227_r0003_tes', 'SV0000007_prefix_19107_DEL_r00'}) - self.assertSetEqual(set(response_json['genesById'].keys()), {'ENSG00000135953', 'ENSG00000223972', 'ENSG00000240361'}) + self.assertSetEqual(set(response_json['genesById'].keys()), {'ENSG00000135953', 'ENSG00000240361'}) self.assertDictEqual(response_json['omimIntervals'], {'3': { 'chrom': '1', 'start': 249044482, From 2403ce02a1df69afed9231b96d7710c4bf780f12 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 20 Aug 2024 16:31:27 -0400 Subject: [PATCH 639/736] clean up gene utilities tests --- seqr/utils/gene_utils_tests.py | 56 ------------------------- seqr/views/apis/gene_api_tests.py | 37 +++++++++++++--- seqr/views/apis/individual_api_tests.py | 3 +- 3 files changed, 33 insertions(+), 63 deletions(-) delete mode 100644 seqr/utils/gene_utils_tests.py diff --git a/seqr/utils/gene_utils_tests.py b/seqr/utils/gene_utils_tests.py deleted file mode 100644 index ad5944ea90..0000000000 --- a/seqr/utils/gene_utils_tests.py +++ /dev/null @@ -1,56 +0,0 @@ -from django.contrib.auth.models import User -from django.test import TestCase - -from seqr.utils.gene_utils import get_gene, get_genes, get_genes_for_variant_display, get_genes_for_variants, \ - get_genes_with_detail -from seqr.views.utils.test_utils import GENE_FIELDS, GENE_DETAIL_FIELDS, GENE_VARIANT_FIELDS, GENE_VARIANT_DISPLAY_FIELDS - -GENE_ID = 'ENSG00000223972' - -class GeneUtilsTest(TestCase): - databases = '__all__' - fixtures = ['reference_data'] - - def test_get_gene(self): - json = get_gene(GENE_ID, user=None) - self.assertSetEqual(set(json.keys()), GENE_DETAIL_FIELDS) - - def test_get_genes(self): - gene_ids = {GENE_ID, 'ENSG00000227232'} - user = User.objects.get(pk=1) - - json = get_genes(gene_ids) - self.assertSetEqual(set(json.keys()), gene_ids) - self.assertSetEqual(set(json[GENE_ID].keys()), GENE_FIELDS) - - json = get_genes_for_variant_display(gene_ids) - self.assertSetEqual(set(json.keys()), gene_ids) - self.assertSetEqual(set(json[GENE_ID].keys()), GENE_VARIANT_DISPLAY_FIELDS) - - json = get_genes_for_variants(gene_ids) - self.assertSetEqual(set(json.keys()), gene_ids) - self.assertSetEqual(set(json[GENE_ID].keys()), GENE_VARIANT_FIELDS) - - json = get_genes_with_detail(gene_ids, user) - self.assertSetEqual(set(json.keys()), gene_ids) - gene = json[GENE_ID] - self.assertSetEqual(set(gene.keys()), GENE_DETAIL_FIELDS) - - # test nested models - self.assertSetEqual(set(gene['primateAi'].keys()), {'percentile25', 'percentile75'}) - self.assertSetEqual( - set(gene['constraints'].keys()), {'misZ', 'misZRank', 'pli', 'pliRank', 'louef', 'louefRank', 'totalGenes'}) - self.assertSetEqual(set(gene['cnSensitivity'].keys()), {'phi', 'pts'}) - self.assertSetEqual( - set(gene['omimPhenotypes'][0].keys()), - {'mimNumber', 'phenotypeMimNumber', 'phenotypeDescription', 'phenotypeInheritance', 'chrom', 'start', 'end'}) - self.assertSetEqual(set(gene['genCc'].keys()), {'hgncId', 'classifications'}) - self.assertSetEqual(set(gene['clinGen'].keys()), {'haploinsufficiency', 'triplosensitivity', 'href'}) - - sparse_gene = json['ENSG00000227232'] - self.assertIsNone(sparse_gene['primateAi']) - self.assertDictEqual(sparse_gene['constraints'], {}) - self.assertDictEqual(sparse_gene['cnSensitivity'], {}) - self.assertListEqual(sparse_gene['omimPhenotypes'], []) - self.assertDictEqual(sparse_gene['genCc'], {}) - self.assertIsNone(sparse_gene['clinGen']) diff --git a/seqr/views/apis/gene_api_tests.py b/seqr/views/apis/gene_api_tests.py index 3ddc5db398..44e4b2e37d 100644 --- a/seqr/views/apis/gene_api_tests.py +++ b/seqr/views/apis/gene_api_tests.py @@ -27,14 +27,14 @@ def test_genes_info(self): url = reverse(genes_info) self.check_require_login(url) - response = self.client.get('{}?geneIds={},ENSG00000269981,foo'.format(url, GENE_ID)) + response = self.client.get('{}?geneIds={},ENSG00000269981,ENSG00000227232,foo'.format(url, GENE_ID)) self.assertEqual(response.status_code, 200) genes = response.json()['genesById'] - self.assertSetEqual(set(genes.keys()), {GENE_ID, 'ENSG00000269981'}) + self.assertSetEqual(set(genes.keys()), {GENE_ID, 'ENSG00000269981', 'ENSG00000227232'}) self.assertSetEqual(set(genes[GENE_ID].keys()), GENE_DETAIL_FIELDS) self.assertDictEqual(genes[GENE_ID], { - 'chromGrch37': '1', + 'chromGrch37': None, 'chromGrch38': '1', 'clinGen': {'haploinsufficiency': 'No Evidence', 'href': 'https://dosage.clinicalgenome.org/clingen_gene.cgi?sym=', 'triplosensitivity': ''}, 'cnSensitivity': {'phi': 0.90576, 'pts': 0.7346}, @@ -42,7 +42,7 @@ def test_genes_info(self): 'codingRegionSizeGrch38': 0, 'constraints': {'louef': 1.606, 'louefRank': 0, 'misZ': -0.7773, 'misZRank': 1, 'pli': 0.00090576, 'pliRank': 1, 'totalGenes': 1}, 'diseaseDesc': '', - 'endGrch37': 14409, + 'endGrch37': None, 'endGrch38': 14409, 'functionDesc': '', 'genCc': {'hgncId': 'HGNC:943', 'classifications': [ @@ -59,10 +59,35 @@ def test_genes_info(self): 'omimPhenotypes': [{'mimNumber': 147571, 'phenotypeDescription': 'Immunodeficiency 38', 'phenotypeInheritance': 'Autosomal recessive', 'phenotypeMimNumber': 616126, 'chrom': '1', 'start': 11869, 'end': 14409}], 'primateAi': {'percentile25': 0.587214291096, 'percentile75': 0.821286439896}, 'sHet': {'postMean': 0.90576}, - 'startGrch37': 11869, + 'startGrch37': None, 'startGrch38': 11869, }) - + self.assertDictEqual(genes['ENSG00000227232'], { + 'chromGrch37': '1', + 'chromGrch38': '1', + 'clinGen': None, + 'cnSensitivity': {}, + 'codingRegionSizeGrch37': 0, + 'codingRegionSizeGrch38': 0, + 'constraints': {}, + 'diseaseDesc': '', + 'endGrch37': 29570, + 'endGrch38': 29570, + 'functionDesc': '', + 'genCc': {}, + 'gencodeGeneType': 'unprocessed_pseudogene', + 'geneId': 'ENSG00000227232', + 'geneNames': 'POR4F29;TTN', + 'geneSymbol': 'WASH7P', + 'mgiMarkerId': None, + 'mimNumber': None, + 'notes': [], + 'omimPhenotypes': [], + 'primateAi': None, + 'sHet': {}, + 'startGrch37': 14404, + 'startGrch38': 14404, + }) def test_create_update_and_delete_gene_note(self): create_gene_note_url = reverse(create_gene_note_handler, args=[GENE_ID]) diff --git a/seqr/views/apis/individual_api_tests.py b/seqr/views/apis/individual_api_tests.py index 72f72df621..86114557e5 100644 --- a/seqr/views/apis/individual_api_tests.py +++ b/seqr/views/apis/individual_api_tests.py @@ -18,7 +18,7 @@ get_hpo_terms, get_individual_rna_seq_data, import_gregor_metadata from seqr.views.apis.report_api_tests import PARTICIPANT_TABLE, PHENOTYPE_TABLE, EXPERIMENT_TABLE, EXPERIMENT_LOOKUP_TABLE, GENETIC_FINDINGS_TABLE from seqr.views.utils.test_utils import AuthenticationTestCase, AnvilAuthenticationTestCase, INDIVIDUAL_FIELDS, \ - INDIVIDUAL_CORE_FIELDS, CORE_INTERNAL_INDIVIDUAL_FIELDS + INDIVIDUAL_CORE_FIELDS, CORE_INTERNAL_INDIVIDUAL_FIELDS, GENE_FIELDS PROJECT_GUID = 'R0001_1kg' PM_REQUIRED_PROJECT_GUID = 'R0003_test' @@ -1293,6 +1293,7 @@ def test_get_individual_rna_seq_data(self): outliers_by_pos[132885746] ) self.assertSetEqual(set(response_json['genesById'].keys()), {'ENSG00000135953', 'ENSG00000268903'}) + self.assertSetEqual(set(response_json['genesById']['ENSG00000135953'].keys()), GENE_FIELDS) def test_get_individual_rna_seq_data_is_significant(self): url = reverse(get_individual_rna_seq_data, args=[INDIVIDUAL_GUID]) From 7bd400ea7abcc68bfa0257d94b688440eebc7efb Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 20 Aug 2024 16:57:27 -0400 Subject: [PATCH 640/736] remvoe inaccurate comments --- seqr/views/apis/anvil_workspace_api_tests.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/seqr/views/apis/anvil_workspace_api_tests.py b/seqr/views/apis/anvil_workspace_api_tests.py index 089940fd99..e45e29f093 100644 --- a/seqr/views/apis/anvil_workspace_api_tests.py +++ b/seqr/views/apis/anvil_workspace_api_tests.py @@ -425,7 +425,6 @@ def test_validate_anvil_vcf(self, mock_subprocess, mock_file_logger, mock_utils_ @mock.patch('seqr.utils.file_utils.logger') @mock.patch('seqr.utils.file_utils.subprocess.Popen') def test_get_anvil_igv_options(self, *args): - # Requesting to load data from a workspace without an existing project url = reverse(get_anvil_igv_options, args=[TEST_WORKSPACE_NAMESPACE, TEST_WORKSPACE_NAME1]) expected_options = [ {'name': '/test.bam', 'value': 'gs://test_bucket/test.bam'}, @@ -436,7 +435,6 @@ def test_get_anvil_igv_options(self, *args): @mock.patch('seqr.utils.file_utils.logger') @mock.patch('seqr.utils.file_utils.subprocess.Popen') def test_get_anvil_vcf_list(self, *args): - # Requesting to load data from a workspace without an existing project url = reverse(get_anvil_vcf_list, args=[TEST_WORKSPACE_NAMESPACE, TEST_WORKSPACE_NAME1]) expected_files = [ '/test.vcf', '/data/test.vcf.gz', '/data/test-101.vcf.gz', '/data/test-102.vcf.gz', '/sharded/test-*.vcf.gz', From ea237b94ec4e7c5fe5a1dec17195d1c19fb70bd8 Mon Sep 17 00:00:00 2001 From: EddieLF <34049565+EddieLF@users.noreply.github.com> Date: Wed, 21 Aug 2024 14:08:50 +1000 Subject: [PATCH 641/736] Update s3/igv.org.genomes URLs to gs/cpg-common-main/references (#237) * Update s3/igv.org.genomes reference URLs to gs/cpg-common-main/references * Fix comments * Fix cytoBand reference url path prefix --- .../components/panel/family/constants.js | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/ui/shared/components/panel/family/constants.js b/ui/shared/components/panel/family/constants.js index a5b870f9de..1907ecb248 100644 --- a/ui/shared/components/panel/family/constants.js +++ b/ui/shared/components/panel/family/constants.js @@ -80,6 +80,8 @@ export const IGV_OPTIONS = { const BASE_REFERENCE_URL = '/api/igv_genomes' const REFERENCE_URLS = [ + // Accessing some reference files from the s3/igv.org.genomes URLs has caused issues + // So we access them via CPG cloud storage instead (cytoBand, alias, Refseq) { key: 'fastaURL', baseUrl: BASE_REFERENCE_URL, @@ -90,18 +92,18 @@ const REFERENCE_URLS = [ }, { key: 'cytobandURL', - baseUrl: `${BASE_REFERENCE_URL}/s3`, + baseUrl: `${BASE_REFERENCE_URL}`, path: { - 37: 'igv.broadinstitute.org/genomes/seq/hg19/cytoBand.txt', - 38: 'igv.org.genomes/hg38/annotations/cytoBandIdeo.txt.gz', + 37: 's3/igv.broadinstitute.org/genomes/seq/hg19/cytoBand.txt', + 38: 'gs/cpg-common-main/references/igv_org_genomes/hg38/annotations/cytoBandIdeo.txt.gz', }, }, { key: 'aliasURL', - baseUrl: `${BASE_REFERENCE_URL}/s3/igv.org.genomes`, + baseUrl: `${BASE_REFERENCE_URL}`, path: { - 37: 'hg19/hg19_alias.tab', - 38: 'hg38/hg38_alias.tab', + 37: 's3/igv.org.genomes/hg19/hg19_alias.tab', + 38: 'gs/cpg-common-main/references/igv_org_genomes/hg38/hg38_alias.tab', }, }, ] @@ -121,10 +123,10 @@ const REFERENCE_TRACKS = [ { name: 'Refseq', indexPostfix: 'tbi', - baseUrl: `${BASE_REFERENCE_URL}/s3/igv.org.genomes`, + baseUrl: `${BASE_REFERENCE_URL}`, path: { - 37: 'hg19/refGene.sorted.txt.gz', - 38: 'hg38/refGene.sorted.txt.gz', + 37: 's3/igv.org.genomes/hg19/refGene.sorted.txt.gz', + 38: 'gs/cpg-common-main/references/igv_org_genomes/hg38/refGene.sorted.txt.gz', }, format: 'refgene', visibilityWindow: -1, From 60d08c86e6c7ca03435855ab0b0633e4b921207f Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 21 Aug 2024 11:35:49 -0400 Subject: [PATCH 642/736] use bulk update --- seqr/views/utils/airtable_utils.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/seqr/views/utils/airtable_utils.py b/seqr/views/utils/airtable_utils.py index eb1a4f8d1b..11cfd6c126 100644 --- a/seqr/views/utils/airtable_utils.py +++ b/seqr/views/utils/airtable_utils.py @@ -11,6 +11,7 @@ PAGE_SIZE = 100 MAX_OR_FILTERS = PAGE_SIZE - 5 +MAX_UPDATE_RECORDS = 10 ANVIL_REQUEST_TRACKING_TABLE = 'AnVIL Seqr Loading Requests Tracking' @@ -49,13 +50,13 @@ def safe_create_record(self, record_type, record): def safe_patch_records(self, record_type, record_or_filters, record_and_filters, update, max_records=PAGE_SIZE - 1): try: - self._patch_record(record_type, record_or_filters, record_and_filters, update, max_records) + self._patch_records(record_type, record_or_filters, record_and_filters, update, max_records) except Exception as e: logger.error(f'Airtable patch "{record_type}" error: {e}', self._user, detail={ 'or_filters': record_or_filters, 'and_filters': record_and_filters, 'update': update, }) - def _patch_record(self, record_type, record_or_filters, record_and_filters, update, max_records): + def _patch_records(self, record_type, record_or_filters, record_and_filters, update, max_records): records = self.fetch_records( record_type, fields=record_or_filters.keys(), or_filters=record_or_filters, and_filters=record_and_filters, page_size=max_records+1, @@ -65,9 +66,11 @@ def _patch_record(self, record_type, record_or_filters, record_and_filters, upda self._session.params = {} errors = [] - for record_id in records.keys(): + record_ids = list(records.keys()) + for i in range(0, len(records), MAX_UPDATE_RECORDS): + update_chunk = [{'id': record_id, 'fields': update} for record_id in record_ids[i:i + MAX_UPDATE_RECORDS]] try: - response = self._session.patch(f'{self._url}/{record_type}/{record_id}', json={'fields': update}) + response = self._session.patch(f'{self._url}/{record_type}', json={'records': update_chunk}) response.raise_for_status() except Exception as e: errors.append(str(e)) From 664a1c461230b21026b9316fab939f7191762db9 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 21 Aug 2024 11:50:38 -0400 Subject: [PATCH 643/736] pdo status samples helper --- .../check_for_new_samples_from_pipeline.py | 5 +++++ seqr/views/apis/data_manager_api.py | 20 +++++-------------- seqr/views/apis/data_manager_api_tests.py | 1 + seqr/views/utils/airtable_utils.py | 18 ++++++++++++++++- 4 files changed, 28 insertions(+), 16 deletions(-) diff --git a/seqr/management/commands/check_for_new_samples_from_pipeline.py b/seqr/management/commands/check_for_new_samples_from_pipeline.py index 5666046029..7d8ef1bfbd 100644 --- a/seqr/management/commands/check_for_new_samples_from_pipeline.py +++ b/seqr/management/commands/check_for_new_samples_from_pipeline.py @@ -13,6 +13,7 @@ from seqr.utils.search.add_data_utils import notify_search_data_loaded from seqr.utils.search.utils import parse_valid_variant_id from seqr.utils.search.hail_search_utils import hail_variant_multi_lookup, search_data_type +from seqr.views.utils.airtable_utils import AirtableSession from seqr.views.utils.dataset_utils import match_and_update_search_samples from seqr.views.utils.variant_utils import reset_cached_search_results, update_projects_saved_variant_json, \ get_saved_variants @@ -91,6 +92,9 @@ def handle(self, *args, **options): # Reset cached results for all projects, as seqr AFs will have changed for all projects when new data is added reset_cached_search_results(project=None) + # Update Airtable PDOs + session = AirtableSession(user) + # Send loading notifications update_sample_data_by_project = { s['individual__family__project']: s for s in updated_samples.values('individual__family__project').annotate( @@ -111,6 +115,7 @@ def handle(self, *args, **options): updated_project_families.append((project.id, project.name, project.genome_version, project_families)) # Send failure notifications + # TODO include new PDO names failed_family_samples = metadata.get('failed_family_samples', {}) failed_families_by_guid = {f['guid']: f for f in Family.objects.filter( guid__in={family for families in failed_family_samples.values() for family in families} diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index 1ad54f5d99..460f188482 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -22,7 +22,7 @@ from seqr.utils.vcf_utils import validate_vcf_exists from seqr.views.utils.airflow_utils import trigger_data_loading, write_data_loading_pedigree -from seqr.views.utils.airtable_utils import AirtableSession +from seqr.views.utils.airtable_utils import AirtableSession, LOADABLE_PDO_STATUSES from seqr.views.utils.dataset_utils import load_rna_seq, load_phenotype_prioritization_data_file, RNA_DATA_TYPE_CONFIGS, \ post_process_rna_data from seqr.views.utils.file_utils import parse_file, get_temp_file_path, load_uploaded_file, persist_temp_file @@ -447,10 +447,6 @@ def write_pedigree(request, project_guid): Sample.DATASET_TYPE_SV_CALLS: ('.bed', '.bed.gz'), } -LOADABLE_PDO_STATUSES = [ - 'On hold for phenotips, but ready to load', - 'Methods (Loading)', -] AVAILABLE_PDO_STATUSES = { 'Available in seqr', 'Historic', @@ -593,16 +589,10 @@ def _get_valid_project_samples(project_samples, sample_type, user): def _get_loaded_samples(project_samples, user): - sample_ids = [sample_id for _, sample_id in project_samples] - samples_by_id = AirtableSession(user).get_samples_for_sample_ids(sample_ids, ['PDOStatus', 'SeqrProject']) - return [(project, sample_id) for project, sample_id in project_samples if any( - _is_loaded_airtable_sample(s, project) for s in samples_by_id.get(sample_id, []) - )] - - -def _is_loaded_airtable_sample(sample, project_guid): - return f'{BASE_URL}project/{project_guid}/project_page' in sample['SeqrProject'] and any( - status in AVAILABLE_PDO_STATUSES for status in sample['PDOStatus']) + return [ + (project, sample_id) for project, sample_id, _ in + AirtableSession(user).get_project_samples_with_status(project_samples, AVAILABLE_PDO_STATUSES) + ] # Hop-by-hop HTTP response headers shouldn't be forwarded. diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index 68fbcf59d2..c71cc4a2cd 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -1589,6 +1589,7 @@ def _get_dag_variable_overrides(*args, **kwargs): } @responses.activate + @mock.patch('seqr.views.utils.airtable_utils.BASE_URL', 'https://seqr.broadinstitute.org/') @mock.patch('seqr.views.apis.data_manager_api.BASE_URL', 'https://seqr.broadinstitute.org/') @mock.patch('seqr.views.utils.export_utils.open') @mock.patch('seqr.views.utils.export_utils.TemporaryDirectory') diff --git a/seqr/views/utils/airtable_utils.py b/seqr/views/utils/airtable_utils.py index 11cfd6c126..c4c2d7a09a 100644 --- a/seqr/views/utils/airtable_utils.py +++ b/seqr/views/utils/airtable_utils.py @@ -5,7 +5,7 @@ from seqr.utils.logging_utils import SeqrLogger from seqr.views.utils.terra_api_utils import is_google_authenticated -from settings import AIRTABLE_API_KEY, AIRTABLE_URL +from settings import AIRTABLE_API_KEY, AIRTABLE_URL, BASE_URL logger = SeqrLogger(__name__) @@ -15,6 +15,11 @@ ANVIL_REQUEST_TRACKING_TABLE = 'AnVIL Seqr Loading Requests Tracking' +LOADABLE_PDO_STATUSES = [ + 'On hold for phenotips, but ready to load', + 'Methods (Loading)', +] + class AirtableSession(object): @@ -125,3 +130,14 @@ def get_samples_for_sample_ids(self, sample_ids, fields): if missing: records_by_id.update(self._get_samples_for_id_field(missing, 'SeqrCollaboratorSampleID', fields)) return records_by_id + + def get_project_samples_with_status(self, project_samples, pdo_statuses): + sample_ids = [sample_id for _, sample_id in project_samples] + samples_by_id = self.get_samples_for_sample_ids(sample_ids, ['PDOStatus', 'SeqrProject']) + matched_samples = [(project, sample_id, [ + s for s in samples_by_id.get(sample_id, []) + if f'{BASE_URL}project/{project}/project_page' in s['SeqrProject'] and any( + status in pdo_statuses for status in s['PDOStatus'] + ) + ]) for project, sample_id in project_samples] + return [match for match in matched_samples if match[2]] From cabe21acfe6382ade70c4091053ba9b55cb898e3 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 21 Aug 2024 11:53:12 -0400 Subject: [PATCH 644/736] Revert "pdo status samples helper" This reverts commit 664a1c461230b21026b9316fab939f7191762db9. --- .../check_for_new_samples_from_pipeline.py | 5 ----- seqr/views/apis/data_manager_api.py | 20 ++++++++++++++----- seqr/views/apis/data_manager_api_tests.py | 1 - seqr/views/utils/airtable_utils.py | 18 +---------------- 4 files changed, 16 insertions(+), 28 deletions(-) diff --git a/seqr/management/commands/check_for_new_samples_from_pipeline.py b/seqr/management/commands/check_for_new_samples_from_pipeline.py index 7d8ef1bfbd..5666046029 100644 --- a/seqr/management/commands/check_for_new_samples_from_pipeline.py +++ b/seqr/management/commands/check_for_new_samples_from_pipeline.py @@ -13,7 +13,6 @@ from seqr.utils.search.add_data_utils import notify_search_data_loaded from seqr.utils.search.utils import parse_valid_variant_id from seqr.utils.search.hail_search_utils import hail_variant_multi_lookup, search_data_type -from seqr.views.utils.airtable_utils import AirtableSession from seqr.views.utils.dataset_utils import match_and_update_search_samples from seqr.views.utils.variant_utils import reset_cached_search_results, update_projects_saved_variant_json, \ get_saved_variants @@ -92,9 +91,6 @@ def handle(self, *args, **options): # Reset cached results for all projects, as seqr AFs will have changed for all projects when new data is added reset_cached_search_results(project=None) - # Update Airtable PDOs - session = AirtableSession(user) - # Send loading notifications update_sample_data_by_project = { s['individual__family__project']: s for s in updated_samples.values('individual__family__project').annotate( @@ -115,7 +111,6 @@ def handle(self, *args, **options): updated_project_families.append((project.id, project.name, project.genome_version, project_families)) # Send failure notifications - # TODO include new PDO names failed_family_samples = metadata.get('failed_family_samples', {}) failed_families_by_guid = {f['guid']: f for f in Family.objects.filter( guid__in={family for families in failed_family_samples.values() for family in families} diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index 460f188482..1ad54f5d99 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -22,7 +22,7 @@ from seqr.utils.vcf_utils import validate_vcf_exists from seqr.views.utils.airflow_utils import trigger_data_loading, write_data_loading_pedigree -from seqr.views.utils.airtable_utils import AirtableSession, LOADABLE_PDO_STATUSES +from seqr.views.utils.airtable_utils import AirtableSession from seqr.views.utils.dataset_utils import load_rna_seq, load_phenotype_prioritization_data_file, RNA_DATA_TYPE_CONFIGS, \ post_process_rna_data from seqr.views.utils.file_utils import parse_file, get_temp_file_path, load_uploaded_file, persist_temp_file @@ -447,6 +447,10 @@ def write_pedigree(request, project_guid): Sample.DATASET_TYPE_SV_CALLS: ('.bed', '.bed.gz'), } +LOADABLE_PDO_STATUSES = [ + 'On hold for phenotips, but ready to load', + 'Methods (Loading)', +] AVAILABLE_PDO_STATUSES = { 'Available in seqr', 'Historic', @@ -589,10 +593,16 @@ def _get_valid_project_samples(project_samples, sample_type, user): def _get_loaded_samples(project_samples, user): - return [ - (project, sample_id) for project, sample_id, _ in - AirtableSession(user).get_project_samples_with_status(project_samples, AVAILABLE_PDO_STATUSES) - ] + sample_ids = [sample_id for _, sample_id in project_samples] + samples_by_id = AirtableSession(user).get_samples_for_sample_ids(sample_ids, ['PDOStatus', 'SeqrProject']) + return [(project, sample_id) for project, sample_id in project_samples if any( + _is_loaded_airtable_sample(s, project) for s in samples_by_id.get(sample_id, []) + )] + + +def _is_loaded_airtable_sample(sample, project_guid): + return f'{BASE_URL}project/{project_guid}/project_page' in sample['SeqrProject'] and any( + status in AVAILABLE_PDO_STATUSES for status in sample['PDOStatus']) # Hop-by-hop HTTP response headers shouldn't be forwarded. diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index c71cc4a2cd..68fbcf59d2 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -1589,7 +1589,6 @@ def _get_dag_variable_overrides(*args, **kwargs): } @responses.activate - @mock.patch('seqr.views.utils.airtable_utils.BASE_URL', 'https://seqr.broadinstitute.org/') @mock.patch('seqr.views.apis.data_manager_api.BASE_URL', 'https://seqr.broadinstitute.org/') @mock.patch('seqr.views.utils.export_utils.open') @mock.patch('seqr.views.utils.export_utils.TemporaryDirectory') diff --git a/seqr/views/utils/airtable_utils.py b/seqr/views/utils/airtable_utils.py index c4c2d7a09a..11cfd6c126 100644 --- a/seqr/views/utils/airtable_utils.py +++ b/seqr/views/utils/airtable_utils.py @@ -5,7 +5,7 @@ from seqr.utils.logging_utils import SeqrLogger from seqr.views.utils.terra_api_utils import is_google_authenticated -from settings import AIRTABLE_API_KEY, AIRTABLE_URL, BASE_URL +from settings import AIRTABLE_API_KEY, AIRTABLE_URL logger = SeqrLogger(__name__) @@ -15,11 +15,6 @@ ANVIL_REQUEST_TRACKING_TABLE = 'AnVIL Seqr Loading Requests Tracking' -LOADABLE_PDO_STATUSES = [ - 'On hold for phenotips, but ready to load', - 'Methods (Loading)', -] - class AirtableSession(object): @@ -130,14 +125,3 @@ def get_samples_for_sample_ids(self, sample_ids, fields): if missing: records_by_id.update(self._get_samples_for_id_field(missing, 'SeqrCollaboratorSampleID', fields)) return records_by_id - - def get_project_samples_with_status(self, project_samples, pdo_statuses): - sample_ids = [sample_id for _, sample_id in project_samples] - samples_by_id = self.get_samples_for_sample_ids(sample_ids, ['PDOStatus', 'SeqrProject']) - matched_samples = [(project, sample_id, [ - s for s in samples_by_id.get(sample_id, []) - if f'{BASE_URL}project/{project}/project_page' in s['SeqrProject'] and any( - status in pdo_statuses for status in s['PDOStatus'] - ) - ]) for project, sample_id in project_samples] - return [match for match in matched_samples if match[2]] From 212f2a9b9df7c721129793fd35c79944f3f5a45b Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 21 Aug 2024 11:55:28 -0400 Subject: [PATCH 645/736] totods --- .../commands/check_for_new_samples_from_pipeline.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/seqr/management/commands/check_for_new_samples_from_pipeline.py b/seqr/management/commands/check_for_new_samples_from_pipeline.py index 5666046029..c32c01d52f 100644 --- a/seqr/management/commands/check_for_new_samples_from_pipeline.py +++ b/seqr/management/commands/check_for_new_samples_from_pipeline.py @@ -13,6 +13,7 @@ from seqr.utils.search.add_data_utils import notify_search_data_loaded from seqr.utils.search.utils import parse_valid_variant_id from seqr.utils.search.hail_search_utils import hail_variant_multi_lookup, search_data_type +from seqr.views.utils.airtable_utils import AirtableSession from seqr.views.utils.dataset_utils import match_and_update_search_samples from seqr.views.utils.variant_utils import reset_cached_search_results, update_projects_saved_variant_json, \ get_saved_variants @@ -91,6 +92,10 @@ def handle(self, *args, **options): # Reset cached results for all projects, as seqr AFs will have changed for all projects when new data is added reset_cached_search_results(project=None) + # Update Airtable PDOs + # TODO + session = AirtableSession(user) + # Send loading notifications update_sample_data_by_project = { s['individual__family__project']: s for s in updated_samples.values('individual__family__project').annotate( @@ -111,6 +116,7 @@ def handle(self, *args, **options): updated_project_families.append((project.id, project.name, project.genome_version, project_families)) # Send failure notifications + # TODO include new PDO names failed_family_samples = metadata.get('failed_family_samples', {}) failed_families_by_guid = {f['guid']: f for f in Family.objects.filter( guid__in={family for families in failed_family_samples.values() for family in families} From 2e3e0b474a0020aa9d351aec98da2df1272410a4 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 21 Aug 2024 15:44:49 -0400 Subject: [PATCH 646/736] update loaded PDOs --- .../check_for_new_samples_from_pipeline.py | 62 ++++++++++++++++--- seqr/views/apis/anvil_workspace_api.py | 6 +- seqr/views/apis/data_manager_api.py | 8 +-- seqr/views/utils/airtable_utils.py | 59 +++++++++++------- 4 files changed, 98 insertions(+), 37 deletions(-) diff --git a/seqr/management/commands/check_for_new_samples_from_pipeline.py b/seqr/management/commands/check_for_new_samples_from_pipeline.py index c32c01d52f..a4589b2f82 100644 --- a/seqr/management/commands/check_for_new_samples_from_pipeline.py +++ b/seqr/management/commands/check_for_new_samples_from_pipeline.py @@ -13,7 +13,7 @@ from seqr.utils.search.add_data_utils import notify_search_data_loaded from seqr.utils.search.utils import parse_valid_variant_id from seqr.utils.search.hail_search_utils import hail_variant_multi_lookup, search_data_type -from seqr.views.utils.airtable_utils import AirtableSession +from seqr.views.utils.airtable_utils import AirtableSession, LOADABLE_PDO_STATUSES, AVAILABLE_PDO_STATUS from seqr.views.utils.dataset_utils import match_and_update_search_samples from seqr.views.utils.variant_utils import reset_cached_search_results, update_projects_saved_variant_json, \ get_saved_variants @@ -26,6 +26,12 @@ USER_EMAIL = 'manage_command' MAX_LOOKUP_VARIANTS = 5000 +PDO_COPY_FIELDS = [ + 'PDO', 'PDOStatus', 'SeqrLoadingDate', 'GATKShortReadCallsetPath', 'SeqrProjectURL', 'TerraProjectURL', + 'SequencingProduct', 'PDOName', 'SequencingSubmissionDate', 'SequencingCompletionDate', 'CallsetRequestedDate', + 'CallsetCompletionDate', 'Project', 'Metrics Checked', 'gCNV_SV_CallsetPath', 'DRAGENShortReadCallsetPath', +] + class Command(BaseCommand): help = 'Check for newly loaded seqr samples' @@ -92,11 +98,7 @@ def handle(self, *args, **options): # Reset cached results for all projects, as seqr AFs will have changed for all projects when new data is added reset_cached_search_results(project=None) - # Update Airtable PDOs - # TODO - session = AirtableSession(user) - - # Send loading notifications + # Send loading notifications and update Airtable PDOs update_sample_data_by_project = { s['individual__family__project']: s for s in updated_samples.values('individual__family__project').annotate( samples=ArrayAgg(JSONObject(sample_id='sample_id', individual_id='individual_id')), @@ -105,6 +107,8 @@ def handle(self, *args, **options): } updated_project_families = [] updated_families = set() + split_project_pdos = {} + session = AirtableSession(user) for project, sample_ids in samples_by_project.items(): project_sample_data = update_sample_data_by_project[project.id] notify_search_data_loaded( @@ -114,9 +118,9 @@ def handle(self, *args, **options): project_families = project_sample_data['family_guids'] updated_families.update(project_families) updated_project_families.append((project.id, project.name, project.genome_version, project_families)) + split_project_pdos[project] = self._update_pdos(session, project, sample_ids) # Send failure notifications - # TODO include new PDO names failed_family_samples = metadata.get('failed_family_samples', {}) failed_families_by_guid = {f['guid']: f for f in Family.objects.filter( guid__in={family for families in failed_family_samples.values() for family in families} @@ -130,6 +134,9 @@ def handle(self, *args, **options): ) for project, failures in failures_by_project.items(): summary = '\n'.join(sorted(failures)) + split_pdos = split_project_pdos.get(project) + if split_pdos: + summary += f'\n\nSkipped samples in this project have been moved to {", ".join(split_pdos)}' safe_post_to_slack( SEQR_SLACK_LOADING_NOTIFICATION_CHANNEL, f'The following {len(failures)} families failed {check.replace("_", " ")} in {project}:\n{summary}' @@ -144,6 +151,47 @@ def handle(self, *args, **options): logger.info('DONE') + @staticmethod + def _update_pdos(session, project, sample_ids): + airtable_samples = session.fetch_records( + 'Samples', fields=['CollaboratorSampleID', 'SeqrCollaboratorSampleID', 'PDOID'], + or_filters={'PDOStatus': LOADABLE_PDO_STATUSES}, + and_filters={'SeqrProject': f'{BASE_URL}project/{project}/project_page'} + ) + + pdo_ids = set() + skipped_pdo_samples = defaultdict(list) + for record_id, sample in airtable_samples.items(): + pdo_id = sample['PDOID'][0] + sample_id = sample.get('SeqrCollaboratorSampleID') or sample['CollaboratorSampleID'] + if sample_id in sample_ids: + pdo_ids.add(pdo_id) + else: + skipped_pdo_samples[pdo_id].append(record_id) + + if pdo_ids: + session.safe_patch_records_by_id('PDO', pdo_ids, {'PDOStatus': AVAILABLE_PDO_STATUS}) + + if not skipped_pdo_samples: + return [] + + pdos_to_create = { + f"{pdo.pop('PDO')}_sr": (record_id, pdo) for record_id, pdo in session.fetch_records( + 'PDO', fields=PDO_COPY_FIELDS, or_filters={'RECORD_ID()': list(skipped_pdo_samples.keys())} + ).items() + } + + # Create PDOs and then update Samples with new PDOs + # Does not create PDOs with Samples directly as that would not remove Samples from old PDOs + new_pdos = session.safe_create_records('PDO', [ + {'PDO': pdo_name, **pdo} for pdo_name, (_, pdo) in pdos_to_create.items() + ]) + pdo_id_map = {pdos_to_create[record['fields']['PDO']][0]: record['id'] for record in new_pdos} + for pdo_id, sample_record_ids in skipped_pdo_samples.items(): + session.safe_patch_records_by_id('Samples', sample_record_ids, {'PDOID': [pdo_id_map[pdo_id]]}) + + return sorted(pdos_to_create.keys()) + @staticmethod def _reload_shared_variant_annotations(data_type, genome_version, updated_variants_by_id=None, exclude_families=None): dataset_type = data_type.split('_')[0] diff --git a/seqr/views/apis/anvil_workspace_api.py b/seqr/views/apis/anvil_workspace_api.py index 49fb942c2a..d22f7f07be 100644 --- a/seqr/views/apis/anvil_workspace_api.py +++ b/seqr/views/apis/anvil_workspace_api.py @@ -307,15 +307,15 @@ def _trigger_add_workspace_data(project, pedigree_records, user, data_path, samp SEQR_SLACK_ANVIL_DATA_LOADING_CHANNEL, f'ERROR triggering AnVIL loading for project {project.guid}', genome_version=project.genome_version, ) - AirtableSession(user, base=AirtableSession.ANVIL_BASE).safe_create_record( - ANVIL_REQUEST_TRACKING_TABLE, { + AirtableSession(user, base=AirtableSession.ANVIL_BASE).safe_create_records( + ANVIL_REQUEST_TRACKING_TABLE, [{ 'Requester Name': user.get_full_name(), 'Requester Email': user.email, 'AnVIL Project URL': _get_seqr_project_url(project), 'Initial Request Date': datetime.now().strftime('%Y-%m-%d'), 'Number of Samples': len(sample_ids), 'Status': 'Loading' if trigger_success else 'Loading Requested' - }) + }]) loading_warning_date = ANVIL_LOADING_DELAY_EMAIL_START_DATE and datetime.strptime(ANVIL_LOADING_DELAY_EMAIL_START_DATE, '%Y-%m-%d') if loading_warning_date and loading_warning_date <= datetime.now(): diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index 1ad54f5d99..b43920b07a 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -22,7 +22,7 @@ from seqr.utils.vcf_utils import validate_vcf_exists from seqr.views.utils.airflow_utils import trigger_data_loading, write_data_loading_pedigree -from seqr.views.utils.airtable_utils import AirtableSession +from seqr.views.utils.airtable_utils import AirtableSession, LOADABLE_PDO_STATUSES, AVAILABLE_PDO_STATUS from seqr.views.utils.dataset_utils import load_rna_seq, load_phenotype_prioritization_data_file, RNA_DATA_TYPE_CONFIGS, \ post_process_rna_data from seqr.views.utils.file_utils import parse_file, get_temp_file_path, load_uploaded_file, persist_temp_file @@ -447,12 +447,8 @@ def write_pedigree(request, project_guid): Sample.DATASET_TYPE_SV_CALLS: ('.bed', '.bed.gz'), } -LOADABLE_PDO_STATUSES = [ - 'On hold for phenotips, but ready to load', - 'Methods (Loading)', -] AVAILABLE_PDO_STATUSES = { - 'Available in seqr', + AVAILABLE_PDO_STATUS, 'Historic', } diff --git a/seqr/views/utils/airtable_utils.py b/seqr/views/utils/airtable_utils.py index 11cfd6c126..d173d9a36e 100644 --- a/seqr/views/utils/airtable_utils.py +++ b/seqr/views/utils/airtable_utils.py @@ -15,6 +15,12 @@ ANVIL_REQUEST_TRACKING_TABLE = 'AnVIL Seqr Loading Requests Tracking' +LOADABLE_PDO_STATUSES = [ + 'On hold for phenotips, but ready to load', + 'Methods (Loading)', +] +AVAILABLE_PDO_STATUS = 'Available in seqr' + class AirtableSession(object): @@ -41,42 +47,53 @@ def _check_user_access(self, base): if not has_access: raise PermissionDenied('Error: To access airtable user must login with Google authentication.') - def safe_create_record(self, record_type, record): - try: - response = self._session.post(f'{self._url}/{record_type}', json={'records': [{'fields': record}]}) - response.raise_for_status() - except Exception as e: - logger.error(f'Airtable create "{record_type}" error: {e}', self._user) + def safe_create_records(self, record_type, records): + return self._safe_bulk_update_records( + 'post', record_type, [{'fields': record} for record in records], error_detail=records, + ) def safe_patch_records(self, record_type, record_or_filters, record_and_filters, update, max_records=PAGE_SIZE - 1): + error_detail = { + 'or_filters': record_or_filters, 'and_filters': record_and_filters, 'update': update, + } try: - self._patch_records(record_type, record_or_filters, record_and_filters, update, max_records) + records = self.fetch_records( + record_type, fields=record_or_filters.keys(), or_filters=record_or_filters, + and_filters=record_and_filters, + page_size=max_records + 1, + ) + if not records or len(records) > max_records: + raise ValueError('Unable to identify record to update') + + self.safe_patch_records_by_id(record_type, list(records.keys()), update, error_detail=error_detail) except Exception as e: - logger.error(f'Airtable patch "{record_type}" error: {e}', self._user, detail={ - 'or_filters': record_or_filters, 'and_filters': record_and_filters, 'update': update, - }) - - def _patch_records(self, record_type, record_or_filters, record_and_filters, update, max_records): - records = self.fetch_records( - record_type, fields=record_or_filters.keys(), or_filters=record_or_filters, and_filters=record_and_filters, - page_size=max_records+1, + logger.error(f'Airtable patch "{record_type}" error: {e}', self._user, detail=error_detail) + + def safe_patch_records_by_id(self, record_type, record_ids, update, error_detail=None): + self._safe_bulk_update_records( + 'patch', record_type, [{'id': record_id, 'fields': update} for record_id in record_ids], + error_detail=error_detail or {'record_ids': record_ids, 'update': update}, ) - if not records or len(records) > max_records: - raise ValueError('Unable to identify record to update') + def _safe_bulk_update_records(self, update_type, record_type, records, error_detail=None): self._session.params = {} + update = getattr(self._session, update_type) errors = [] - record_ids = list(records.keys()) + records = [] for i in range(0, len(records), MAX_UPDATE_RECORDS): - update_chunk = [{'id': record_id, 'fields': update} for record_id in record_ids[i:i + MAX_UPDATE_RECORDS]] try: - response = self._session.patch(f'{self._url}/{record_type}', json={'records': update_chunk}) + response = update(f'{self._url}/{record_type}', json={'records': records[i:i + MAX_UPDATE_RECORDS]}) response.raise_for_status() + records += response.json()['records'] except Exception as e: errors.append(str(e)) if errors: - raise Exception(';'.join(errors)) + logger.error( + f'Airtable {update_type} "{record_type}" error: {";".join(errors)}', self._user, detail=error_detail, + ) + + return records def fetch_records(self, record_type, fields, or_filters, and_filters=None, page_size=PAGE_SIZE): self._session.params.update({'fields[]': fields, 'pageSize': page_size}) From a99450caa647fc1c4662b71263f3a3fa467aba05 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 21 Aug 2024 15:58:42 -0400 Subject: [PATCH 647/736] fix tests --- .../check_for_new_samples_from_pipeline.py | 2 +- seqr/views/apis/anvil_workspace_api_tests.py | 4 ++-- seqr/views/apis/project_api_tests.py | 16 +++++++--------- seqr/views/utils/airtable_utils.py | 1 - 4 files changed, 10 insertions(+), 13 deletions(-) diff --git a/seqr/management/commands/check_for_new_samples_from_pipeline.py b/seqr/management/commands/check_for_new_samples_from_pipeline.py index a4589b2f82..80c67f82f7 100644 --- a/seqr/management/commands/check_for_new_samples_from_pipeline.py +++ b/seqr/management/commands/check_for_new_samples_from_pipeline.py @@ -108,7 +108,7 @@ def handle(self, *args, **options): updated_project_families = [] updated_families = set() split_project_pdos = {} - session = AirtableSession(user) + session = AirtableSession(user=None) for project, sample_ids in samples_by_project.items(): project_sample_data = update_sample_data_by_project[project.id] notify_search_data_loaded( diff --git a/seqr/views/apis/anvil_workspace_api_tests.py b/seqr/views/apis/anvil_workspace_api_tests.py index e45e29f093..a72939cac6 100644 --- a/seqr/views/apis/anvil_workspace_api_tests.py +++ b/seqr/views/apis/anvil_workspace_api_tests.py @@ -854,8 +854,8 @@ def _test_mv_file_and_triggering_dag_exception(self, url, workspace, sample_data self.mock_airflow_logger.warning.assert_called_with( 'LOADING_PIPELINE DAG is running and cannot be triggered again.', self.manager_user) self.mock_airtable_logger.error.assert_called_with( - f'Airtable create "AnVIL Seqr Loading Requests Tracking" error: 400 Client Error: Bad Request for url: ' - f'{MOCK_AIRTABLE_URL}/appUelDNM3BnWaR7M/AnVIL%20Seqr%20Loading%20Requests%20Tracking', self.manager_user) + f'Airtable post "AnVIL Seqr Loading Requests Tracking" error: 400 Client Error: Bad Request for url: ' + f'{MOCK_AIRTABLE_URL}/appUelDNM3BnWaR7M/AnVIL%20Seqr%20Loading%20Requests%20Tracking', self.manager_user, detail=mock.ANY) slack_message_on_failure = """ERROR triggering AnVIL loading for project {guid}: LOADING_PIPELINE DAG is running and cannot be triggered again. diff --git a/seqr/views/apis/project_api_tests.py b/seqr/views/apis/project_api_tests.py index 9dc4a4053f..48e2251e7b 100644 --- a/seqr/views/apis/project_api_tests.py +++ b/seqr/views/apis/project_api_tests.py @@ -108,8 +108,7 @@ def test_create_and_delete_project(self, mock_airtable_logger): responses.GET, f"{self.AIRTABLE_TRACKING_URL}?fields[]=Status&pageSize=100&filterByFormula=AND({{AnVIL Project URL}}='/project/{project_guid}/project_page',OR(Status='Available in Seqr',Status='Loading',Status='Loading Requested'))", json=MOCK_RECORDS) - responses.add(responses.PATCH, f'{self.AIRTABLE_TRACKING_URL}/recH4SEO1CeoIlOiE', status=400) - responses.add(responses.PATCH, f'{self.AIRTABLE_TRACKING_URL}/recSgwrXNkmlIB5eM') + responses.add(responses.PATCH, self.AIRTABLE_TRACKING_URL, status=400) delete_project_url = reverse(delete_project_handler, args=[project_guid]) response = self.client.post(delete_project_url, content_type='application/json') self.assertEqual(response.status_code, 200) @@ -720,16 +719,15 @@ def test_create_and_delete_project(self, *args, **kwargs): ]) def _assert_expected_airtable_requests(self, mock_airtable_logger): - self.assertEqual(responses.calls[1].request.url, f'{self.AIRTABLE_TRACKING_URL}/recH4SEO1CeoIlOiE') + self.assertEqual(responses.calls[1].request.url, self.AIRTABLE_TRACKING_URL) self.assertEqual(responses.calls[1].request.method, 'PATCH') - self.assertDictEqual(json.loads(responses.calls[1].request.body), {'fields': {'Status': 'Project Deleted'}}) - - self.assertEqual(responses.calls[2].request.url, f'{self.AIRTABLE_TRACKING_URL}/recSgwrXNkmlIB5eM') - self.assertEqual(responses.calls[2].request.method, 'PATCH') - self.assertDictEqual(json.loads(responses.calls[2].request.body), {'fields': {'Status': 'Project Deleted'}}) + self.assertDictEqual(json.loads(responses.calls[1].request.body), {'records': [ + {'id': 'recH4SEO1CeoIlOiE', 'fields': {'Status': 'Project Deleted'}}, + {'id': 'recSgwrXNkmlIB5eM', 'fields': {'Status': 'Project Deleted'}}, + ]}) mock_airtable_logger.error.assert_called_with( - 'Airtable patch "AnVIL Seqr Loading Requests Tracking" error: 400 Client Error: Bad Request for url: http://testairtable/appUelDNM3BnWaR7M/AnVIL%20Seqr%20Loading%20Requests%20Tracking/recH4SEO1CeoIlOiE', + 'Airtable patch "AnVIL Seqr Loading Requests Tracking" error: 400 Client Error: Bad Request for url: http://testairtable/appUelDNM3BnWaR7M/AnVIL%20Seqr%20Loading%20Requests%20Tracking', self.pm_user, detail={ 'or_filters': {'Status': ['Loading', 'Loading Requested', 'Available in Seqr']}, 'and_filters': {'AnVIL Project URL': '/project/R0005_new_project/project_page'}, diff --git a/seqr/views/utils/airtable_utils.py b/seqr/views/utils/airtable_utils.py index d173d9a36e..77238dbb09 100644 --- a/seqr/views/utils/airtable_utils.py +++ b/seqr/views/utils/airtable_utils.py @@ -79,7 +79,6 @@ def _safe_bulk_update_records(self, update_type, record_type, records, error_det self._session.params = {} update = getattr(self._session, update_type) errors = [] - records = [] for i in range(0, len(records), MAX_UPDATE_RECORDS): try: response = update(f'{self._url}/{record_type}', json={'records': records[i:i + MAX_UPDATE_RECORDS]}) From 49abe372bab12dfefd63b82caab1c5e9053716fb Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 21 Aug 2024 15:59:05 -0400 Subject: [PATCH 648/736] fix tests --- seqr/management/commands/check_for_new_samples_from_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seqr/management/commands/check_for_new_samples_from_pipeline.py b/seqr/management/commands/check_for_new_samples_from_pipeline.py index 80c67f82f7..7903c6ccfd 100644 --- a/seqr/management/commands/check_for_new_samples_from_pipeline.py +++ b/seqr/management/commands/check_for_new_samples_from_pipeline.py @@ -108,7 +108,7 @@ def handle(self, *args, **options): updated_project_families = [] updated_families = set() split_project_pdos = {} - session = AirtableSession(user=None) + session = AirtableSession(user=None, no_auth=True) for project, sample_ids in samples_by_project.items(): project_sample_data = update_sample_data_by_project[project.id] notify_search_data_loaded( From b9a95e84e5f525e409eb9be0991acae6bdb5c2f0 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 21 Aug 2024 16:07:23 -0400 Subject: [PATCH 649/736] only run pdo update for internal SNV_INDEL --- .../commands/check_for_new_samples_from_pipeline.py | 7 ++++--- seqr/utils/search/add_data_utils.py | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/seqr/management/commands/check_for_new_samples_from_pipeline.py b/seqr/management/commands/check_for_new_samples_from_pipeline.py index 7903c6ccfd..62f7e9991d 100644 --- a/seqr/management/commands/check_for_new_samples_from_pipeline.py +++ b/seqr/management/commands/check_for_new_samples_from_pipeline.py @@ -17,7 +17,7 @@ from seqr.views.utils.dataset_utils import match_and_update_search_samples from seqr.views.utils.variant_utils import reset_cached_search_results, update_projects_saved_variant_json, \ get_saved_variants -from settings import SEQR_SLACK_LOADING_NOTIFICATION_CHANNEL +from settings import SEQR_SLACK_LOADING_NOTIFICATION_CHANNEL, BASE_URL logger = logging.getLogger(__name__) @@ -111,14 +111,15 @@ def handle(self, *args, **options): session = AirtableSession(user=None, no_auth=True) for project, sample_ids in samples_by_project.items(): project_sample_data = update_sample_data_by_project[project.id] - notify_search_data_loaded( + is_internal = notify_search_data_loaded( project, dataset_type, sample_type, inactivated_sample_guids, updated_samples=project_sample_data['samples'], num_samples=len(sample_ids), ) project_families = project_sample_data['family_guids'] updated_families.update(project_families) updated_project_families.append((project.id, project.name, project.genome_version, project_families)) - split_project_pdos[project] = self._update_pdos(session, project, sample_ids) + if is_internal and dataset_type == Sample.DATASET_TYPE_VARIANT_CALLS: + split_project_pdos[project] = self._update_pdos(session, project, sample_ids) # Send failure notifications failed_family_samples = metadata.get('failed_family_samples', {}) diff --git a/seqr/utils/search/add_data_utils.py b/seqr/utils/search/add_data_utils.py index ece4f55d48..09b0e67a93 100644 --- a/seqr/utils/search/add_data_utils.py +++ b/seqr/utils/search/add_data_utils.py @@ -90,3 +90,4 @@ def notify_search_data_loaded(project, dataset_type, sample_type, inactivated_sa email=email, subject='New data available in seqr', ) + return is_internal From 99e62282df94a33703a3aad1637651d794b1fd21 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 21 Aug 2024 17:11:35 -0400 Subject: [PATCH 650/736] better error handdling/ test airtable fetch --- .../check_for_new_samples_from_pipeline.py | 13 +++- ...eck_for_new_samples_from_pipeline_tests.py | 77 +++++++++++++++++++ seqr/views/utils/airtable_utils.py | 5 +- 3 files changed, 89 insertions(+), 6 deletions(-) diff --git a/seqr/management/commands/check_for_new_samples_from_pipeline.py b/seqr/management/commands/check_for_new_samples_from_pipeline.py index 62f7e9991d..e70ad1295b 100644 --- a/seqr/management/commands/check_for_new_samples_from_pipeline.py +++ b/seqr/management/commands/check_for_new_samples_from_pipeline.py @@ -119,7 +119,7 @@ def handle(self, *args, **options): updated_families.update(project_families) updated_project_families.append((project.id, project.name, project.genome_version, project_families)) if is_internal and dataset_type == Sample.DATASET_TYPE_VARIANT_CALLS: - split_project_pdos[project] = self._update_pdos(session, project, sample_ids) + split_project_pdos[project.name] = self._update_pdos(session, project.guid, sample_ids) # Send failure notifications failed_family_samples = metadata.get('failed_family_samples', {}) @@ -153,11 +153,11 @@ def handle(self, *args, **options): logger.info('DONE') @staticmethod - def _update_pdos(session, project, sample_ids): + def _update_pdos(session, project_guid, sample_ids): airtable_samples = session.fetch_records( 'Samples', fields=['CollaboratorSampleID', 'SeqrCollaboratorSampleID', 'PDOID'], or_filters={'PDOStatus': LOADABLE_PDO_STATUSES}, - and_filters={'SeqrProject': f'{BASE_URL}project/{project}/project_page'} + and_filters={'SeqrProject': f'{BASE_URL}project/{project_guid}/project_page'} ) pdo_ids = set() @@ -173,6 +173,9 @@ def _update_pdos(session, project, sample_ids): if pdo_ids: session.safe_patch_records_by_id('PDO', pdo_ids, {'PDOStatus': AVAILABLE_PDO_STATUS}) + skipped_pdo_samples = { + pdo_id: sample_ids for pdo_id, sample_ids in skipped_pdo_samples.items() if pdo_id in pdo_ids + } if not skipped_pdo_samples: return [] @@ -189,7 +192,9 @@ def _update_pdos(session, project, sample_ids): ]) pdo_id_map = {pdos_to_create[record['fields']['PDO']][0]: record['id'] for record in new_pdos} for pdo_id, sample_record_ids in skipped_pdo_samples.items(): - session.safe_patch_records_by_id('Samples', sample_record_ids, {'PDOID': [pdo_id_map[pdo_id]]}) + new_pdo_id = pdo_id_map.get(pdo_id) + if new_pdo_id: + session.safe_patch_records_by_id('Samples', sample_record_ids, {'PDOID': [new_pdo_id]}) return sorted(pdos_to_create.keys()) diff --git a/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py b/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py index af2423c818..fbc0b80137 100644 --- a/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py +++ b/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py @@ -47,6 +47,72 @@ f'Test Reprocessed Project' \ f'

All the best,
The seqr team' +PDO_QUERY_FIELDS = '&'.join([f'fields[]={field}' for field in [ + 'PDO', 'PDOStatus', 'SeqrLoadingDate', 'GATKShortReadCallsetPath', 'SeqrProjectURL', 'TerraProjectURL', + 'SequencingProduct', 'PDOName', 'SequencingSubmissionDate', 'SequencingCompletionDate', 'CallsetRequestedDate', + 'CallsetCompletionDate', 'Project', 'Metrics Checked', 'gCNV_SV_CallsetPath', 'DRAGENShortReadCallsetPath', +]]) +AIRTABLE_SAMPLE_RECORDS = { + 'records': [ + { + 'id': 'rec2B6OGmQpAkQW3s', + 'fields': { + 'CollaboratorSampleID': 'NA19675_1', + 'PDOID': ['recW24C2CJW5lT64K'], + }, + }, + { + 'id': 'recfMYDEZpPtzAIeV', + 'fields': { + 'CollaboratorSampleID': 'NA19678', + 'PDOID': ['recW24C2CJW5lT64K'], + }, + }, + { + 'id': 'rec2B67GmXpAkQW8z', + 'fields': { + 'CollaboratorSampleID': 'NA19679', + 'PDOID': ['rec2Nkg10N1KssPc3'], + }, + }, + { + 'id': 'rec2Nkg10N1KssPc3', + 'fields': { + 'SeqrCollaboratorSampleID': 'HG00731', + 'CollaboratorSampleID': 'VCGS_FAM203_621_D2', + 'PDOID': ['recW24C2CJW5lT64K'], + }, + }, + { + 'id': 'recrbZh9Hn1UFtMi2', + 'fields': { + 'SeqrCollaboratorSampleID': 'NA20888', + 'CollaboratorSampleID': 'NA20888_D1', + 'PDOID': ['recW24C2CJW5lT64K'], + }, + }, + { + 'id': 'rec2Nkg1fKssJc7', + 'fields': { + 'CollaboratorSampleID': 'NA20889', + 'PDOID': ['rec0RWBVfDVbtlBSL'], + }, + }, +]} +AIRTABLE_PDO_RECORDS = { + 'records': [ + { + 'id': 'recW24C2CJW5lT64K', + 'fields': { + 'PDO': 'PDO-1234', + 'SeqrProjectURL': 'https://test-seqr.org/project/R0003_test/project_page', + 'PDOStatus': 'Methods (Loading)', + 'PDOName': 'RGP_WGS_12', + } + }, + ] +} + @mock.patch('seqr.utils.search.hail_search_utils.HAIL_BACKEND_SERVICE_HOSTNAME', MOCK_HAIL_HOST) @mock.patch('seqr.models.random.randint', lambda *args: GUID_ID) @@ -123,6 +189,7 @@ def _test_success(self, path, metadata, dataset_type, sample_guids, reload_calls ) @mock.patch('seqr.management.commands.check_for_new_samples_from_pipeline.MAX_LOOKUP_VARIANTS', 1) + @mock.patch('seqr.management.commands.check_for_new_samples_from_pipeline.BASE_URL', 'https://test-seqr.org/') @mock.patch('seqr.views.utils.airtable_utils.logger') @mock.patch('seqr.utils.communication_utils.EmailMultiAlternatives') @responses.activate @@ -131,6 +198,16 @@ def test_command(self, mock_email, mock_airtable_utils): responses.GET, "http://testairtable/appUelDNM3BnWaR7M/AnVIL%20Seqr%20Loading%20Requests%20Tracking?fields[]=Status&pageSize=2&filterByFormula=AND({AnVIL Project URL}='https://seqr.broadinstitute.org/project/R0004_non_analyst_project/project_page',OR(Status='Loading',Status='Loading Requested'))", json={'records': [{'id': 'rec12345', 'fields': {}}, {'id': 'rec67890', 'fields': {}}]}) + responses.add( + responses.GET, + "http://testairtable/app3Y97xtbbaOopVR/Samples?fields[]=CollaboratorSampleID&fields[]=SeqrCollaboratorSampleID&fields[]=PDOID&pageSize=100&filterByFormula=AND({SeqrProject}='https://test-seqr.org/project/R0003_test/project_page',OR(PDOStatus='Methods (Loading)',PDOStatus='On hold for phenotips, but ready to load'))", + json=AIRTABLE_SAMPLE_RECORDS) + responses.add( + responses.GET, + f"http://testairtable/app3Y97xtbbaOopVR/PDO?{PDO_QUERY_FIELDS}&pageSize=100&filterByFormula=OR(RECORD_ID()='recW24C2CJW5lT64K')", + json=AIRTABLE_PDO_RECORDS) + # TODO patch PDOs, create PDOs, patch samples + # TODO test paging for patch (MAX_UPDATE_RECORDS) responses.add(responses.POST, f'{MOCK_HAIL_HOST}:5000/search', status=200, json={ 'results': [{'variantId': '1-248367227-TC-T', 'familyGuids': ['F000014_14'], 'updated_field': 'updated_value'}], 'total': 1, diff --git a/seqr/views/utils/airtable_utils.py b/seqr/views/utils/airtable_utils.py index 77238dbb09..5b8fcd2821 100644 --- a/seqr/views/utils/airtable_utils.py +++ b/seqr/views/utils/airtable_utils.py @@ -79,11 +79,12 @@ def _safe_bulk_update_records(self, update_type, record_type, records, error_det self._session.params = {} update = getattr(self._session, update_type) errors = [] + updated_records = [] for i in range(0, len(records), MAX_UPDATE_RECORDS): try: response = update(f'{self._url}/{record_type}', json={'records': records[i:i + MAX_UPDATE_RECORDS]}) response.raise_for_status() - records += response.json()['records'] + updated_records += response.json()['records'] except Exception as e: errors.append(str(e)) @@ -92,7 +93,7 @@ def _safe_bulk_update_records(self, update_type, record_type, records, error_det f'Airtable {update_type} "{record_type}" error: {";".join(errors)}', self._user, detail=error_detail, ) - return records + return updated_records def fetch_records(self, record_type, fields, or_filters, and_filters=None, page_size=PAGE_SIZE): self._session.params.update({'fields[]': fields, 'pageSize': page_size}) From f0a30e2dadcd7d398c5be88122a137b16b4547aa Mon Sep 17 00:00:00 2001 From: Michael Franklin <22381693+illusional@users.noreply.github.com> Date: Thu, 22 Aug 2024 13:37:25 +1000 Subject: [PATCH 651/736] Revert "Update s3/igv.org.genomes URLs to gs/cpg-common-main/references (#237)" (#238) This reverts commit ea237b94ec4e7c5fe5a1dec17195d1c19fb70bd8. --- .../components/panel/family/constants.js | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/ui/shared/components/panel/family/constants.js b/ui/shared/components/panel/family/constants.js index 1907ecb248..a5b870f9de 100644 --- a/ui/shared/components/panel/family/constants.js +++ b/ui/shared/components/panel/family/constants.js @@ -80,8 +80,6 @@ export const IGV_OPTIONS = { const BASE_REFERENCE_URL = '/api/igv_genomes' const REFERENCE_URLS = [ - // Accessing some reference files from the s3/igv.org.genomes URLs has caused issues - // So we access them via CPG cloud storage instead (cytoBand, alias, Refseq) { key: 'fastaURL', baseUrl: BASE_REFERENCE_URL, @@ -92,18 +90,18 @@ const REFERENCE_URLS = [ }, { key: 'cytobandURL', - baseUrl: `${BASE_REFERENCE_URL}`, + baseUrl: `${BASE_REFERENCE_URL}/s3`, path: { - 37: 's3/igv.broadinstitute.org/genomes/seq/hg19/cytoBand.txt', - 38: 'gs/cpg-common-main/references/igv_org_genomes/hg38/annotations/cytoBandIdeo.txt.gz', + 37: 'igv.broadinstitute.org/genomes/seq/hg19/cytoBand.txt', + 38: 'igv.org.genomes/hg38/annotations/cytoBandIdeo.txt.gz', }, }, { key: 'aliasURL', - baseUrl: `${BASE_REFERENCE_URL}`, + baseUrl: `${BASE_REFERENCE_URL}/s3/igv.org.genomes`, path: { - 37: 's3/igv.org.genomes/hg19/hg19_alias.tab', - 38: 'gs/cpg-common-main/references/igv_org_genomes/hg38/hg38_alias.tab', + 37: 'hg19/hg19_alias.tab', + 38: 'hg38/hg38_alias.tab', }, }, ] @@ -123,10 +121,10 @@ const REFERENCE_TRACKS = [ { name: 'Refseq', indexPostfix: 'tbi', - baseUrl: `${BASE_REFERENCE_URL}`, + baseUrl: `${BASE_REFERENCE_URL}/s3/igv.org.genomes`, path: { - 37: 's3/igv.org.genomes/hg19/refGene.sorted.txt.gz', - 38: 'gs/cpg-common-main/references/igv_org_genomes/hg38/refGene.sorted.txt.gz', + 37: 'hg19/refGene.sorted.txt.gz', + 38: 'hg38/refGene.sorted.txt.gz', }, format: 'refgene', visibilityWindow: -1, From 2df3f346d68217ce3b6c7d00a3f1663a704f922b Mon Sep 17 00:00:00 2001 From: EddieLF <34049565+EddieLF@users.noreply.github.com> Date: Thu, 22 Aug 2024 13:53:46 +1000 Subject: [PATCH 652/736] Adds user agent to request to igv content (#239) --- seqr/views/apis/igv_api.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/seqr/views/apis/igv_api.py b/seqr/views/apis/igv_api.py index eebcbb248a..ddc038f514 100644 --- a/seqr/views/apis/igv_api.py +++ b/seqr/views/apis/igv_api.py @@ -273,6 +273,8 @@ def igv_genomes_proxy(request, cloud_host, file_path): if range_header: headers['Range'] = range_header + headers['User-Agent'] = request.META.get('HTTP_USER_AGENT', 'Mozilla/5.0') + genome_response = requests.get(f'{CLOUD_STORAGE_URLS[cloud_host]}/{file_path}', headers=headers) proxy_response = HttpResponse( content=genome_response.content, From fe80d6f5cb606e2eef62a790348db81fceb1d990 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 22 Aug 2024 14:26:38 -0400 Subject: [PATCH 653/736] add callbacks --- .../tests/check_for_new_samples_from_pipeline_tests.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py b/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py index fbc0b80137..3079e54c0b 100644 --- a/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py +++ b/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py @@ -206,8 +206,14 @@ def test_command(self, mock_email, mock_airtable_utils): responses.GET, f"http://testairtable/app3Y97xtbbaOopVR/PDO?{PDO_QUERY_FIELDS}&pageSize=100&filterByFormula=OR(RECORD_ID()='recW24C2CJW5lT64K')", json=AIRTABLE_PDO_RECORDS) - # TODO patch PDOs, create PDOs, patch samples + responses.add(responses.PATCH, 'http://testairtable/app3Y97xtbbaOopVR/Samples', json=AIRTABLE_SAMPLE_RECORDS) + responses.add(responses.PATCH, 'http://testairtable/app3Y97xtbbaOopVR/PDO', json=AIRTABLE_PDO_RECORDS) + responses.add_callback(responses.POST, 'http://testairtable/app3Y97xtbbaOopVR/PDO', callback=lambda request: ( + 200, {}, json.dumps({'records': [{'id': f'rec{i}', **r} for i, r in enumerate(json.loads(request.body)['records'])]}) + )) + # TODO test actual calls patch/post # TODO test paging for patch (MAX_UPDATE_RECORDS) + # TODO test with an error in patch? responses.add(responses.POST, f'{MOCK_HAIL_HOST}:5000/search', status=200, json={ 'results': [{'variantId': '1-248367227-TC-T', 'familyGuids': ['F000014_14'], 'updated_field': 'updated_value'}], 'total': 1, From ff388bf745d61f916fac4ab449621fe32277c265 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Thu, 22 Aug 2024 15:19:13 -0400 Subject: [PATCH 654/736] variant lookup table has sample_type (#4289) * variant lookup table has sample_type * project_sample_types * try hail * switch sample_type and family_guid in project_samples dict" " --- .../SNV_INDEL/lookup.ht/.README.txt.crc | Bin 12 -> 12 bytes .../SNV_INDEL/lookup.ht/.metadata.json.gz.crc | Bin 12 -> 12 bytes .../GRCh37/SNV_INDEL/lookup.ht/README.txt | 2 +- .../lookup.ht/globals/.metadata.json.gz.crc | Bin 12 -> 12 bytes .../lookup.ht/globals/metadata.json.gz | Bin 303 -> 332 bytes .../lookup.ht/globals/parts/.part-0.crc | Bin 12 -> 12 bytes .../SNV_INDEL/lookup.ht/globals/parts/part-0 | Bin 63 -> 69 bytes .../.index.crc | Bin .../.metadata.json.gz.crc | Bin .../index | Bin .../metadata.json.gz | Bin .../SNV_INDEL/lookup.ht/metadata.json.gz | Bin 343 -> 356 bytes .../lookup.ht/rows/.metadata.json.gz.crc | Bin 16 -> 16 bytes .../SNV_INDEL/lookup.ht/rows/metadata.json.gz | Bin 606 -> 606 bytes ...-c11f065f-e1bb-4a1f-9f2d-ad814a396818.crc} | Bin ...rt-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818} | Bin .../GRCh38/MITO/lookup.ht/.README.txt.crc | Bin 12 -> 12 bytes .../MITO/lookup.ht/.metadata.json.gz.crc | Bin 12 -> 12 bytes .../fixtures/GRCh38/MITO/lookup.ht/README.txt | 2 +- .../lookup.ht/globals/.metadata.json.gz.crc | Bin 12 -> 12 bytes .../MITO/lookup.ht/globals/metadata.json.gz | Bin 335 -> 368 bytes .../MITO/lookup.ht/globals/parts/.part-0.crc | Bin 16 -> 16 bytes .../MITO/lookup.ht/globals/parts/part-0 | Bin 631 -> 636 bytes .../.index.crc | Bin .../.metadata.json.gz.crc | Bin .../index | Bin .../metadata.json.gz | Bin .../GRCh38/MITO/lookup.ht/metadata.json.gz | Bin 372 -> 385 bytes .../MITO/lookup.ht/rows/.metadata.json.gz.crc | Bin 16 -> 16 bytes .../MITO/lookup.ht/rows/metadata.json.gz | Bin 625 -> 624 bytes ...-87a9f074-c787-4edc-81ce-94ba0daffd80.crc} | Bin ...rt-0-87a9f074-c787-4edc-81ce-94ba0daffd80} | Bin .../SNV_INDEL/lookup.ht/.README.txt.crc | Bin 12 -> 12 bytes .../SNV_INDEL/lookup.ht/.metadata.json.gz.crc | Bin 12 -> 12 bytes .../GRCh38/SNV_INDEL/lookup.ht/README.txt | 2 +- .../lookup.ht/globals/.metadata.json.gz.crc | Bin 12 -> 12 bytes .../lookup.ht/globals/metadata.json.gz | Bin 335 -> 368 bytes .../lookup.ht/globals/parts/.part-0.crc | Bin 52 -> 52 bytes .../SNV_INDEL/lookup.ht/globals/parts/part-0 | Bin 5564 -> 5585 bytes .../.index.crc | Bin 12 -> 0 bytes .../.metadata.json.gz.crc | Bin 12 -> 0 bytes .../index | Bin 110 -> 0 bytes .../metadata.json.gz | Bin 184 -> 0 bytes .../.index.crc | Bin 0 -> 12 bytes .../.metadata.json.gz.crc | Bin 0 -> 12 bytes .../index | Bin 0 -> 111 bytes .../metadata.json.gz | Bin 0 -> 184 bytes .../SNV_INDEL/lookup.ht/metadata.json.gz | Bin 365 -> 381 bytes .../lookup.ht/rows/.metadata.json.gz.crc | Bin 16 -> 16 bytes .../SNV_INDEL/lookup.ht/rows/metadata.json.gz | Bin 619 -> 621 bytes ...0-38581d1a-27f8-452f-9678-75225dfc64ab.crc | Bin 12 -> 0 bytes ...0-7a236e4f-7c20-4944-b7d8-071d2b10a630.crc | Bin 0 -> 12 bytes ...art-0-38581d1a-27f8-452f-9678-75225dfc64ab | Bin 118 -> 0 bytes ...art-0-7a236e4f-7c20-4944-b7d8-071d2b10a630 | Bin 0 -> 123 bytes hail_search/queries/base.py | 30 ++++++------------ hail_search/queries/mito.py | 29 +++++++---------- 56 files changed, 25 insertions(+), 40 deletions(-) rename hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/{part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6.idx => part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx}/.index.crc (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/{part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6.idx => part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx}/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/{part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6.idx => part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx}/index (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/{part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6.idx => part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx}/metadata.json.gz (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/{.part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6.crc => .part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.crc} (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/{part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6 => part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818} (100%) rename hail_search/fixtures/GRCh38/MITO/lookup.ht/index/{part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b.idx => part-0-87a9f074-c787-4edc-81ce-94ba0daffd80.idx}/.index.crc (100%) rename hail_search/fixtures/GRCh38/MITO/lookup.ht/index/{part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b.idx => part-0-87a9f074-c787-4edc-81ce-94ba0daffd80.idx}/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/MITO/lookup.ht/index/{part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b.idx => part-0-87a9f074-c787-4edc-81ce-94ba0daffd80.idx}/index (100%) rename hail_search/fixtures/GRCh38/MITO/lookup.ht/index/{part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b.idx => part-0-87a9f074-c787-4edc-81ce-94ba0daffd80.idx}/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/MITO/lookup.ht/rows/parts/{.part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b.crc => .part-0-87a9f074-c787-4edc-81ce-94ba0daffd80.crc} (100%) rename hail_search/fixtures/GRCh38/MITO/lookup.ht/rows/parts/{part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b => part-0-87a9f074-c787-4edc-81ce-94ba0daffd80} (100%) delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-38581d1a-27f8-452f-9678-75225dfc64ab.idx/.index.crc delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-38581d1a-27f8-452f-9678-75225dfc64ab.idx/.metadata.json.gz.crc delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-38581d1a-27f8-452f-9678-75225dfc64ab.idx/index delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-38581d1a-27f8-452f-9678-75225dfc64ab.idx/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630.idx/.index.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630.idx/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630.idx/index create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630.idx/metadata.json.gz delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/rows/parts/.part-0-38581d1a-27f8-452f-9678-75225dfc64ab.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/rows/parts/.part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630.crc delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/rows/parts/part-0-38581d1a-27f8-452f-9678-75225dfc64ab create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/rows/parts/part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/.README.txt.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/.README.txt.crc index 47a747d1a8b78660846186993c269f7c6fb86b70..b8eaa2d478e189846de5e8165c6d446d224fa9af 100644 GIT binary patch literal 12 TcmYc;N@ieSU}8AI)a?ua5&r_y literal 12 TcmYc;N@ieSU}Cs`=}s5`6@UaD diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/.metadata.json.gz.crc index 6acf89fa39eb967dedb21da0678eb66654ba42e2..8d6507b0100c50e8d6ee30420d567d9a808e7ef5 100644 GIT binary patch literal 12 TcmYc;N@ieSU}DI;?y3v`5>^7^ literal 12 TcmYc;N@ieSU}DgYn#~3P5OxA# diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/README.txt b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/README.txt index 0552dbf36b..5daea17753 100644 --- a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/README.txt +++ b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/README.txt @@ -1,3 +1,3 @@ This folder comprises a Hail (www.hail.is) native Table or MatrixTable. Written with version 0.2.128-eead8100a1c1 - Created at 2024/04/03 17:08:32 \ No newline at end of file + Created at 2024/08/16 15:39:04 \ No newline at end of file diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/globals/.metadata.json.gz.crc index 0be207028cc9e4a24fd5ef5a75da9f454d17c0b3..6650687d8b1d7a0980e3d48af24b54da9c0e5986 100644 GIT binary patch literal 12 TcmYc;N@ieSU}7k)u-^y(5#s|E literal 12 TcmYc;N@ieSU}Bi_;_4y*6!Zi( diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/globals/metadata.json.gz index e132519e5c2492bd8bd5d64d2d4c690a1d619ac8..95261c27158ff01413ca0f3071e37dd36a018226 100644 GIT binary patch literal 332 zcmV-S0ki%eiwFP!000000F_c(Yr-%T{x5ko19d2)Z|m%y1_LX5SScElql>0#Nm8LJ z{qH-m^*&+iOV0T&=hCDmnq@!&@@@ne5YvxITmT;uO6FUrNRXr$@mv7+I0@>5VOA9& zUXBE+z(dINqL5jLohiAi3zKhvyZMS`Qc18uFM2kHRmp`4TLtI&qQAggYGx|B7#!n< z?wBee=t^{G+NAW*#*R61G_=)5$nU&ZgL5b9B+8!xwJ9XKe zd0NeH_O|=7T&=)d{pYtR91Ner6GaLCl1iLOLY4C^mbnluJs8Vx6pTls$MLQ0(>k%Y z{?msu1IwD` literal 303 zcmV+~0nq**iwFP!000000F_b6PQx$|{TE*wBq(Zyd?PfRkdPY0fhww8dlK9_PUNLh zi1P1@n=T~n1)s)mo3W=Qnw3B(c{c(Kh~?)Zt$|NTMAkcK2wi1Z@k#&>IHB#y@X^#D zUd{xnAVA1Wl9*XYo$=hYwW+qi-Ma8nSUO3bjbY6yiKS+y$)XdTvr;Mv7sVh1C!t46 zv}ei#9p!Xc7be&rC%DTN9006^J Bhu8oB diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/globals/parts/.part-0.crc index f8364d7499a7a33cfd4e893ecbdd12bef28cb879..905a30feecaf6f082dfeb576d78198faa8bee189 100644 GIT binary patch literal 12 TcmYc;N@ieSU}7-2V166`5%mK- literal 12 TcmYc;N@ieSU}9LYWlsbE6bu8a diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/globals/parts/part-0 index cbcaed4d4c5edb1b782c4914dfbef6f09d9e337b..05ec205c54a6ecbeb75db969956d48149ca772d3 100644 GIT binary patch literal 69 zcmXqDU|=u+VvVi(e-#X585wRcGBPl51{oL_7{(iBr!$AU1~W2nx)~V78!<33IGjDv Pq{a(W4+3C43_#@o#7_+y literal 63 zcmdO5U|>)KVvVi(e-+f;GBE65WMtqBGB7YOj5o|qcLOpE42AU0HG=j#sB~S diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6.idx/.index.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/.index.crc similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6.idx/.index.crc rename to hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/.index.crc diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6.idx/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6.idx/index b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/index similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6.idx/index rename to hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/index diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6.idx/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6.idx/metadata.json.gz rename to hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/metadata.json.gz diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/metadata.json.gz index 5284af4b0bb01cff4ac7e3d2d47d6f5b15044abc..9479e06e8ac7461ed9a7d2a0cd0c5d10a481e976 100644 GIT binary patch literal 356 zcmV-q0h|6GiwFP!000000F9DSYr-%Th5t*QHps+Qn3A`lFvj*URQ6Iz$>P;!OOue? zPKxxuFRhJI*~4Di=G^a`d%5X}?4^R@2b`Bi6ZSZtr3*?*F4a{e(@8o>(!~P+-z}2q zlqVt~lsMRf11-Q84pd<|mI1!10YUy5;<2$1D%V0jE9AoW5bWvdk!(5)dXq=_A1iV#DHCEa%V;goROl6G* zjh?U*Ij^h#)V%7}xc*ozBeJm|`kT0{Kk{u*&GG-f9u6kU9ZC%MUIlyDIT&zRatk5UU%%_}spBP?ta+BT#O{{9c@t?e(`_b7O}0ssK= C8m#01 literal 343 zcmV-d0jT~TiwFP!000000F9DSOT#b}h5t*Q6yjJra3gQBfrviHCccbPVtThm(48!f#6F?&-w02a@v+0q=Nh#oR>xucE4PtD@sZ()kP%pSvpJ7)g1sYR>^$M z6Oj-~92~%b7T|LSDz_ZV0AJLAApZ#Q*jNb8TmRA?mHFU`?ag{|GatJuFJ*p zHgbRL>XBpg&O{#-E|_~>*?#m%<07Rc;I(3^snUfrcA*eqDr+=o^n|tKw~Jw^dDVBc zec!Ajva=wDn$O)vz6+{3{eNzN!;s|;C5Dfh3saA07z19TwH}=7u*mO|tj}@ALf~PN pa2CgzDOE_1QVUegGc-^T7TSca3u)v0@OuoS-49vHLpqlN002-HrD6a8 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/.metadata.json.gz.crc index 6d4bcebe5ae8e4504c9eba7bbe28df0adf27fc20..dcbbf269b1a067d9ce763291a813428aeb890b5c 100644 GIT binary patch literal 16 XcmYc;N@ieSU}EU{lgX%mc>7rZC0hld literal 16 XcmYc;N@ieSU}BJYd!t7nW`8gMB)A18 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/metadata.json.gz index 4cc2e1674a4e4c5988726f5d4e43824a9992f57e..5e76d5dbbaef5c6bee2b7bbe5fbd6d340eaf98c7 100644 GIT binary patch literal 606 zcmV-k0-^mMiwFP!000000Ns|~Z`v>r$Nv{UZEDgHem8HWV--?0O)%|Y6^e{~2DibE z95a=n{O>&{C6I(lqgr3eLqzV*ci*r5k>VXubPXgV6UE^P`1IG;Spuw0JQ6*^mW1~t z)@+mFJB?$AAtiH6G0lOwLK5!w9e!>T;5TIgtH6aYb1@##6lT_JnBC>=!b*r~W#!Vv zq@rf)B|4Q>YK4)BvYmG3z0AhIwJ~&izb~D8iO~2`Xv>I^=YQVRiCDAlqqv&fYdb#YB;O<@&Ti;WOq zYUgn`9tG{;=#cUVTgjLom5k89BkHRZEnILW|6)PE*X#6~%YLVp=S$CAhgN~E>nk(& zLle>wSo1L1$8sPI#6PHAVf^y)OJ44xF;J0X_fISy10keIZ6x_qam@X$hsakTJJez( zd`FHmPYaQPEtOEEG_q2xKAMqbCrLb5IWkfK%o#Wr+fr3SQ%%hW0nma`H2#jvhX0GNHZAG+OdxlqV--?0O)%|Z6^e{~0&as7 zIc6$D`R_e;O2`M3Mz!9Qi-??ae9rsYj}#w>q+1{XnMn>$z~?`|EK*=S;*jtW_9VC` zal&>PzSB5{7&5ZN6iYZTQ%HitvBQsj3jC%_U=^4UWDOOS*u+X zfohz`-E`9JjV7m*L)b>dyizhj1COY$RsUBN0Q`IP5lvMdZz)E5;~biQ?x)UC3d%12y*A&YQlQt}iF`oRz;Ad5`eo zroHmpWcT2-JM}I(G0jt|^jBd%{&*iSFK)*j10Q-mqwZ+j8@q#!H*!a#6?40*0q-yh zeV=*uuJACYat)JYC%IXFAovYb+88Tt7Wk)N7^nKEyjcgkNyDj06Q2SCjbBd diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/.part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/.part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.crc similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/.part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6.crc rename to hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/.part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.crc diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6 b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818 similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6 rename to hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818 diff --git a/hail_search/fixtures/GRCh38/MITO/lookup.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/MITO/lookup.ht/.README.txt.crc index 6def5d8db35cd6b61b41cd042d3cd17889d7b617..0ab5a311c58c535923eb7f702fe6a71268230d64 100644 GIT binary patch literal 12 TcmYc;N@ieSU}A`_meT?N5W@ml literal 12 TcmYc;N@ieSU}6Y diff --git a/hail_search/fixtures/GRCh38/MITO/lookup.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh38/MITO/lookup.ht/globals/metadata.json.gz index 1ed50398d6345b1b042d17949f5c9f21c96f277f..9d007df8c1a1365f0219d2947ba93908da56aa52 100644 GIT binary patch literal 368 zcmV-$0gwJ4iwFP!000000F_e7Zo)7S{TE+xC1?eza%)O@s#J-f9vVep?137bMP887 zDF5Cu31MlG+Fas!^WK}qu1U@^AOU&P0yK!(`!p_r-xEp}JE%yIqasx*T!CvQojq;yfqIzx0X(g`mnX|Y>#wBEm!ZM$B%^&0fmEx$(L z=>9Q0r6}Q_Qi(H4s0zMAn+wv)gR%TV!el&tnEaG|7%L9af9kMjVA-{@@gELRGHqX^ z0)sD?_~33x{0rXBSiXUfRp23HCUHa!Bvw0St3u~HV0N83!A&@cpS7mtDvpGrx=e#Jy`={s1$2r*2sQ~@ zOK_ypK{(2()4&y++P?xAmC}GkvQ6pNx}bGh`#Fy795*Lu!jTdxyHS53m~Gi$Angh! za8mSZwT3~d25T#A+;$6k^a5{j^e}yjIw~OSTS(rML^5Z4j2VbF9!$g!3g)xf+9o9wqDkEy4OP^o1wZ4jF!YvVwD=2)< z2i;~#D=lJA^eciIYZM5IA`0t?QF?V#A_tP%^NRwfN(BPPiw4G-mI#1EED;s}F=P4a zZWjQ7L?Qt!;mK+Z_aoJNbo`NGee9UxfH*pGZj2|^g zwfKL=^{{pmSzpHT=H{?=2C>RFy`7)2KJ>ua4N&>oiN44e!`cbJ2&=VNJAw=!!`{mO zzU%CfK3?pnX(xdYvQ*!VUYe2$kQlQ+1YZL$2tf@`lCTv~~#;o{yvH z_GnwE;ZQ@6f@mZKNh!k`utEut$Tku{)D?vxga{|X|9|%bna>^6((3?GQ-F`ZXipLz(IN8dS@>c4oQ5%- zpG9-p^?*A!(VMOL7^R!I7aoqMvVVcIoS)Sn#@nkl&F!Rog>w(|wfUt0P<63aKgOu0 WI28>500000001bpFa00@0RRB$)IXvC literal 631 zcmV--0*L)$0ssJ?4*&otwJ-f(pAMxG0CwMBCGe2w0qRgmp-L-)e9+q=q^}0Y8b*hG zOW3B?6R*~o0C;AmZjWr1bMUjN{#3a4CC6NP3?fE5v(`4d+BLxHFOhZJ=^3~l= z03aYB(v%cSc(U4zpJ`%!77C<(^jaVNT5ZHc^1EL@zU1>&ng9C3`?B)2z^cvlQSa5X zJ7^cTcx*MOuWtVDhz%hQ*9M$6g z8P~(wJ!E|u%bT0S+CgHKZ+bgFV||Fi+7VFs+KIl%7sJ{OzzD0gSi6D@zrxU2RlypcK+sj9!I=r*l{ z*j|XWb`Uw|`~7- zJ%al%eOtVRe^Yybez)_gnTRy6-Xm>H{{Ky#ro<2E?{(eGl}sNLAP5U`Fix7tx{0m@ zlDl4nI=um;ZX=ZVEwdiddD5T5Gbqb>^O2e4W&!UQj>jNUU zt2aZ_8?{Z`c3vJ&m4AV=+^5ywj0>w)+Uuk|!g&sKwfUm}sOn;ievCWCOV!_v9}NHi R0000004TLD{U87V002*)IphEU diff --git a/hail_search/fixtures/GRCh38/MITO/lookup.ht/index/part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b.idx/.index.crc b/hail_search/fixtures/GRCh38/MITO/lookup.ht/index/part-0-87a9f074-c787-4edc-81ce-94ba0daffd80.idx/.index.crc similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/lookup.ht/index/part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b.idx/.index.crc rename to hail_search/fixtures/GRCh38/MITO/lookup.ht/index/part-0-87a9f074-c787-4edc-81ce-94ba0daffd80.idx/.index.crc diff --git a/hail_search/fixtures/GRCh38/MITO/lookup.ht/index/part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/MITO/lookup.ht/index/part-0-87a9f074-c787-4edc-81ce-94ba0daffd80.idx/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/lookup.ht/index/part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b.idx/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/MITO/lookup.ht/index/part-0-87a9f074-c787-4edc-81ce-94ba0daffd80.idx/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/MITO/lookup.ht/index/part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b.idx/index b/hail_search/fixtures/GRCh38/MITO/lookup.ht/index/part-0-87a9f074-c787-4edc-81ce-94ba0daffd80.idx/index similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/lookup.ht/index/part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b.idx/index rename to hail_search/fixtures/GRCh38/MITO/lookup.ht/index/part-0-87a9f074-c787-4edc-81ce-94ba0daffd80.idx/index diff --git a/hail_search/fixtures/GRCh38/MITO/lookup.ht/index/part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b.idx/metadata.json.gz b/hail_search/fixtures/GRCh38/MITO/lookup.ht/index/part-0-87a9f074-c787-4edc-81ce-94ba0daffd80.idx/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/lookup.ht/index/part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b.idx/metadata.json.gz rename to hail_search/fixtures/GRCh38/MITO/lookup.ht/index/part-0-87a9f074-c787-4edc-81ce-94ba0daffd80.idx/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/MITO/lookup.ht/metadata.json.gz b/hail_search/fixtures/GRCh38/MITO/lookup.ht/metadata.json.gz index 67e671d964a463e2e136a0c45dd9399e6ed354a4..60381e4539e1d710e062e3f4ca942425251a64f2 100644 GIT binary patch literal 385 zcmV-{0e=1;iwFP!000000F6>hYr`-M{x3dl;FUgLE53~u#@G&{ExUveJV$N3IkB;o zl~D5Er=(6xOAou`@x3HVKCrDeko|%W+Ez?FKFy|c&I+l`8D!(pbTpaH9{^-NpNz+H zq9%+p4_ok1DF~SdlR1e6iO(oWP<~N7HV%l{bU%lkv0u`NHTqiN!Fl@)3bU;&wb2m7 zQY%b0wKI^AL{~e0e5L%^d)Y)VO?mBdiDXypWs@dP0LNUFrfsk@<_rno-M!a{=yO-6Xn9c78ow(@vTpA?~+)2?z(d=Sh zLA+KtoALq>ymdx~QmbpumbaOfx^8Lu%9hSnP+?%=z>b6OKPy>w3t4?G7lUN&K=nI4 zEppJegxdZ8eZ81WPI}ZxHz-rKjy=SHkAYhB%yrnHPiASCL+1z5!$ragjGjwXt^?{y fp=sWrft(4NFSISCMSp)6du#g#whm(91Oos7xm~m^ literal 372 zcmV-)0gL`0iwFP!000000F9DSOT#b}h5t*Q6yjK0(2_UVKtvy86JJItS$eld(4Wkq2Nns?)lEi?M*telM3=ruukflu-m&NPAMt5RHq`D&*E7Wr#Aq+NTd0j zMGj0@(XB&{lXfKfs4$q^@yhhp#~PEEmVjWjQQTEh zTxz=dUGZ7weHg9KmC-fS=m_h`_f&_^npgd_S8vP3gscsS;lhr$H1R%E_2K`%0S+d^ zElLc}BNw_Edl&;=CTTG!mthmXNwPjC{67R9&JvEr(Q{4}(xI#cs`d$5C1VD(=XBVk?P0ssKsY_iS( diff --git a/hail_search/fixtures/GRCh38/MITO/lookup.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/MITO/lookup.ht/rows/.metadata.json.gz.crc index 3e5f0b7d529059a734d9c40bb99cebe504e824c7..498d4f224fc7672022f5d7f74d802686d3a32f02 100644 GIT binary patch literal 16 XcmYc;N@ieSU}6Yb>Nol5f_ZlVB|io8 literal 16 XcmYc;N@ieSU}E4dmk2ytzk?qD9$ExB diff --git a/hail_search/fixtures/GRCh38/MITO/lookup.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/MITO/lookup.ht/rows/metadata.json.gz index cf47dc3361f40853639eba114c4e30e40d2dcf38..b54d201997931940d1ec0f4bb3824945fdbbe0a1 100644 GIT binary patch delta 611 zcmV-p0-XKv1n>lq7=Q62Eg^4(u?nf1CaCtX3Pr}gfZO0ij+x4k|M#Ag5|RK@snnP9 z0Oam`_x(DbDBcoDS3m+XmK>gdPkw)!rocMHA>kuzNN`W$gsn5Yrg020WMqaZmT+LU zkObRZhr3M*d{`#13Ty~7Uq%C(!Bnd?)7!jVPz$j{YO$~}A%CgbIBAjzEfR^5vDDp; zab9L);94oVx!;w}oKz(bz;u{ts;l|pw>m{Z6lQdl+CD{JX9*fqlEyS!33iZ7Rwq(s zHZ#`W-xq&1XSeqI)6HbK?E0Te;e{M56mroNUF6D&rq=&SJ*~7RbiR%vP>uU_I~ue* z{$QVS2wRGnSAR+dXz39Z*NWygI2He};Og?Sdv&<%Po{ak^vrc=9_XsM(%2805SGw} zhw(0!1FInZMfDQn7Z+diavP0-Mmd(RVzo+4BvhtD!Ji5cuRI4Ii5i6aTFeCR$hmQn zD9S*WN~n|x(o?Li%rNFCn|mu2BQ@l#fpbAFRUK$*sDJq&0OBJkY5Wtpc^gpqJO%PN zLOHQvBw`2~H~SF8BJ%Fg6=M{zMDgRI&g8J%ff{><&YQlQuFog+n3WSoJ|euhX>WX6 z<$JK-oq88TZ1a>V{ax6@0iGA^h1+ud9_>f1p69Zj-*Y|S%=Oz0+`c!bEl#6|`>p&e z@i3us0bGM*ExBF4CHPiU+8QfP9{8tV7^kwO`hjwG43og~I(@T0o8b5x%wH3{;RGaq xWtX5o3)|t(6^Q%rlhck`Pu2006pHEZ_hD delta 612 zcmV-q0-OEt1n~rr7=Mi<{Cq2nRY=`5LA8ffC^Gg1+y*Cd%v6T_zwexskOY`YrM{F0 zAb01x@7MW6@s3Ek1~MQM$>9n3^w-x}3d|!82_Iog2KOXR*e1hk8pjYrM&_7e2?u5i z$zZqdaJNl?kIDpAfem5i(|AZTm}#|PdY88w)IuzgS}bi$NPnufPMTywi$r2%BDL=s z=Xo{;u9c$O`+e!$NmcRyOoy4Kx|%P3t5Xz2VMf=f?Nju1R-i#8X-u=VU`NSheI{k* zb7TGedG=Rxc5A;s+)PI+zxS~eUdX{hA(u_jMXs!9YW<(o^Ga(%7n>*o)wo}`<6+C| z4G$@Yu$72;rGI3ImL5@Yt!QC`Gw~M-I_z~#;W{)AblqHO><3K$R${8c?5nkN1SH7+C zJvi)6y^A5Xc}kW3ChXw=&kOd#{o*cs+6z41aXseyZm{q@x8LdoZnxiUvmogAX={<6 zB_5_!E?r@mY$UhqcLd*xN?T*a$pils4C7R`Qr}U|o?$Ww0NMgZXQMH=Kav yFYM9^x;?+%<_-%+c^e^Fyy3`ot_p2K85`RPHe#$1s<}NNF_P| diff --git a/hail_search/fixtures/GRCh38/MITO/lookup.ht/rows/parts/.part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b.crc b/hail_search/fixtures/GRCh38/MITO/lookup.ht/rows/parts/.part-0-87a9f074-c787-4edc-81ce-94ba0daffd80.crc similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/lookup.ht/rows/parts/.part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b.crc rename to hail_search/fixtures/GRCh38/MITO/lookup.ht/rows/parts/.part-0-87a9f074-c787-4edc-81ce-94ba0daffd80.crc diff --git a/hail_search/fixtures/GRCh38/MITO/lookup.ht/rows/parts/part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b b/hail_search/fixtures/GRCh38/MITO/lookup.ht/rows/parts/part-0-87a9f074-c787-4edc-81ce-94ba0daffd80 similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/lookup.ht/rows/parts/part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b rename to hail_search/fixtures/GRCh38/MITO/lookup.ht/rows/parts/part-0-87a9f074-c787-4edc-81ce-94ba0daffd80 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/.README.txt.crc index ac18ce1aac64a0e2ac8acce3465c69bde8cae411..92285ec48d482bce1bb9179da95e403691dddb3b 100644 GIT binary patch literal 12 TcmYc;N@ieSU}BgO+`9n)5|aa8 literal 12 TcmYc;N@ieSU}Crwacm9%6Ziwi diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/.metadata.json.gz.crc index a59974a85c71072216238679c45d6826f03e0236..fc9edcf61dba073e4468802289cf9679a69c55ad 100644 GIT binary patch literal 12 TcmYc;N@ieSU}E^uIVBJP6m$cj literal 12 TcmYc;N@ieSU}Er0{nP>g5<3H5 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/README.txt b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/README.txt index f201e7b745..5b8a155f43 100644 --- a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/README.txt +++ b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/README.txt @@ -1,3 +1,3 @@ This folder comprises a Hail (www.hail.is) native Table or MatrixTable. Written with version 0.2.128-eead8100a1c1 - Created at 2024/04/03 17:00:55 \ No newline at end of file + Created at 2024/08/16 15:40:56 \ No newline at end of file diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/globals/.metadata.json.gz.crc index d2b845640ae4b4a47caa8deedfec45f223ccee8b..06fd46b58c8cabd7e3ea6aa6a6acb3eb4a216d6f 100644 GIT binary patch literal 12 TcmYc;N@ieSU}7k7)bs}c5VQhl literal 12 TcmYc;N@ieSU}AW%X7y$O6%7O> diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/globals/metadata.json.gz index 1ed50398d6345b1b042d17949f5c9f21c96f277f..9d007df8c1a1365f0219d2947ba93908da56aa52 100644 GIT binary patch literal 368 zcmV-$0gwJ4iwFP!000000F_e7Zo)7S{TE+xC1?eza%)O@s#J-f9vVep?137bMP887 zDF5Cu31MlG+Fas!^WK}qu1U@^AOU&P0yK!(`!p_r-xEp}JE%yIqasx*T!CvQojq;yfqIzx0X(g`mnX|Y>#wBEm!ZM$B%^&0fmEx$(L z=>9Q0r6}Q_Qi(H4s0zMAn+wv)gR%TV!el&tnEaG|7%L9af9kMjVA-{@@gELRGHqX^ z0)sD?_~33x{0rXBSiXUfRp23HCUHa!Bvw0St3u~HV0N83!A&@cpS7mtDvpGrx=e#Jy`={s1$2r*2sQ~@ zOK_ypK{(2()4&y++P?xAmC}GkvQ6pNx}bGh`#Fy795*Lu!jTdxyHS53m~Gi$Angh! za8mSZwT3~d25T#A+;$6k^a5{j^e}yjIw~OSTS(rML^5Z4j2VbF9!$g!3g)xfHq)$ literal 52 zcmV-40L%Yla$^7h00IEL&Oz|Hr-fv3(*uLdj?;4-ySVT-%GaqL2U6>pmtH(r`z>Q1IH%T&PS+;_5ObZX&pW?+DLICr6a()x`~vG}H% zjd4mUR;tC^ySzI9W^MgR!8C*#k(8ZqR83W6m~ehu?Ve0Wg42=q|4{ zxclFGP9aQFWOr=OX+?z=d^hvwbfW62k?W4mX@y*;jrA0rQ;JA4)pyJ}t#BqCF;G>0 zG3WGwZLGAGb1GrURV#OSu=AQzi(D^o?4Gz9Ix@M(jiu@)auPbNU=li|h%}a}vu%VO z>k>M3=m=CH?Cj~F(~6Bcai%K0(?Aa!bV~6WbV^}Dl+S5}Ms=pwQFC?kD?XP(SndwWhToc+{E^XF7T!C1+K z%7B1S3bKJJ3LZ?tK73=&w1nX?fKZhq5C7Dx@02b03L!ZD{8M!?9NIu@Lgzt6O*o!tEGF{cuINxsSWnMBJw$DBs+$9`jppm&Wqg}{E& zkrUa^`1vOBJb02K&9B9rPFPDKojH9VC99E{Oe#^Tc;8f(S*PGzPu=z0avK~W;gk|G zjbR{*@zuyEuS=@vs#BMlPC;qiR`;Hi?8Rjxo2%Yi871vg)O^w@Cc>uVT^2Xl+jt@u zL}wS6{FUInuf6)gw${50enWux1^KGhL9>(Z&f@|!x1~(&` zJ_7>cp}0Ez#pOxJl#Ow!*s4>@E>p>cS!XW;wvic`9qk-h$0TF^ z)7{Tg?LXj?Zei`o##mLSOSPVJieW6HrgL?9t4aGpk%;Ru5Q(@rM^lbKO*{6TIeQ`( zj0hp(exCE~D{V~9geU)tuFeGo7ZgyS9sAf*M4BSe-yHpUQYfW$^ndkpw(dN3I9Rb0|Ou$UhhBkH=P8gS(}K(P;#Xm0Y+AnQYN6(JhK`ilW$@ zPQB-tip-r)QJ~~8EZyIDZ{*uNlhLU})b(cY85kfwLx&1}V@vnJ62|L0UuZSV_3p?NMF(yMrqTZvW%U|D$uF6Fs9;U-eYU2 zt0VUA2s}@O`8T7}397}KslP1CKh<9SfD*=c*J8}FD5gNEWR*_E3awJE(%hUFyO}(T z(WykeI?{ODM2|ZqtBhOjmQzb*Qo*Y@WtG^}oNGP!5YRB1T(P6!J3ul#1d@P}l+me#ITv4+r7lakOg(rN)uQ4QbdOO``kQaL1Ax&r zrrKP<=+9Nw8hlD)6l{y47RxxrFjo{~;{xYX2q2+T2#l3nFjn%@zSE=G2%}xT)>G8- zDb<{dQ+6&(wS_5{adSRclI!o?yvyix!ZcM#N6bH885xj=a#wQ?TKJk&vaYF^2%UP) zUBS01i*0VW*qN#n;Rjnb*+M}E1_+CFG;LhLH@Ox-N1RDxu(iX3af??)PUYRHoGVqm z*qoYcGU_eHuw}|9-v+ijiMY@~8e6PomE?=;TUvUq3&N-QX7L#D;RWo!QO_==hUJx$!~YR zA-w;&P<w_ar{hG1#&O~|Xo>L1fLXLhfxhj#C z+`SP~LIX*x0u-rAzDK`NdrmLxC|WAy>eyfGIeic{)ndm+ zo>L0v&nbjBol^?XeHb{M(}{ZDbWSbUbO8=BAk3Mr9VF;Z%CS?`b~DI)xm!?WUaF1# z2P0b_qUz5Po~n+N`aF?ckge79opyy@z8GOcPY>UP#U7->n~N6AEugI?($eFqovQrO zSp3lei5YSX?!|sW7OPiB8XHSw+CPba51uzBHQ!c*_-^KdbS|Tv098| z9Ad=?D?%PXazqUuTN`RmjOk8stN>niU<3t`<8DcADzc z4X(ydeUA_z5!TaB2%(5iq^BI&-%phapfKUdPdER5>PHA64#-+;iXJR|#vl+vHlC+m zz1qS}^s7lk_=)UhkSzu)K=9spYOa49?N9)ax6Xb!H$a#}9E*Kq%NP6UYdL$kqC0!} zVS+AlWVsqx+RuO`c|ar}KAky;W4W)2_m1dy=YaNRdlATH046VV^Ie{`SmBubt6OXm zk=fsjB_mA7@uxB2Gr z0M0sASLiGAxYpl|`^%lMzD~xUtHYSBTKa9Vjzl?f9_L@v0TLBbC>?rWYk6CLpStl& zMu3Yg2Gu+Lk~O65OAG|?7W1}1xv}_R0V>u~u(tay&mx2%x-Z6@NgnUP08Aq9j{pyz zYH`MoebzYaJ9%%==>wBN3={S)i#hw&;R4&39t>6Hb;SI&RoWUln*5pxkTB)H8j$;3&i|htC&V?4Pvo)_&iKLqLSZ^6xug(gMc>T`b9?$@qDyeLVRY zpVLPM28cI+1s^g7=8_K*H{<6@SbFxL)5l(N-3@EbDTU>(GQa?XlCDH8H)92g?Bf}R zQXjb2`iwQU^U{?I#!4=7v^6Z&`Bne|*9S{}*~?tc>BMiy*bsyHsKWY?V@#qM84-~r zNzwsA3K9e*fs&L|LLU+!@lX)uI0m8^#u#G=A%qY^2q8p3ghoIl5fPCO!&-4|T1=6f zj>eOZjIYcxS^2J15v!IdH2jcnIx9w|p2Q2hN2C5HhY;!xl+7itcdI&c9QIr(^MLCE zLXBl)_xGl1CE^64tD(AT*|_g<7%#wWzFc3`;kUkUvEUHWxq~fq-cE z8R$QOyz=BCN*ARJ1?vDpB0CbtHb7he%dbEVe`YWD0|LZ8gDl1R1RH3rMG0_2Mp$y% zs!tG7^*K_kt%$ujEi8iTC3o0etXyXHP^2V5DLIz|BpS60~aiWKN&U+%xT{m8aRS%b|FTas{AEeN2h;wJ&*Z+|SPF+e$ASoHyB07&T6 z)T+5xt~s5=>5&m=;xX*XamP5oxvL9(!mu3X$3&0cOr3k!crhI(RkYaYoU~@Hb*5dA z9;WHzHqVCG6Jmh?1e#qeIS%M^ctAxcg-_4sD43r|5EtOMu+AtP(f?Sl=E~9`>jL|O zNT1I5vTi%;x$Gpm-~*dO&xWp>6_*VMd}ISJ3I#iEJYYWi`_2Pl3XOBmQau<($M~Gr z=fjo?NX0CBmjh*|*$0r!jR3!-qsnB0p5y?(PUuWYb5wwym$R(gah{Cu3D|iK!C7)B zOjO&&DsZz&4z9bd3KjF?z}gW(q`n`eR2F=59}q8JnwGTa+*Fxb&iud~(;9hzWp>V* z_|>(v+-{r0My0X}Y-sK-}fG$QA%W*8gLJ>q>$7YSV9Ik0Z>etXPQW zanNPqKOdZodNSQ$&e^KLIJ~Xe3Me6z&7V!SiS)@1TmG}s#YPTO6}n+AGV{#H#ghqo zjH}F^ncWNWa;>o$;$)nR(2pD7=wsHhKFv0^vT=qyb16R%FTMwnOhF4j3&bpUJ;i55 ze3leK*DCa4%0k+q9TXW_Uhq%G&8zQ_(o(dIN3EEmbiOHs((~n0oUMTxpD#R{8K_8S z4Js=MZVL1-q#UfGfKEV)@Db7Une7ZBqtuS03zmQow1PwN48eUwK+B<^^GOMd_MR(m z)P_2tmCYPR(l^d^ns)BDt6C&x&0FHv6)uV1ppkbiN^_(Hp05lBgP?$gkwe<(Wei+6 zaZhXVF32O?h>F;wxFBx>aatT`SCJ&BS=!+dPOA~~Kxy{ zZ^Sr4J&>&}Ix1DqX#SqKiy;AW8Z<&7A9Xi4<^)T+(NKZ|89)@oATMyTSoMO^-}8vm z47lT>h(3wWy{}62&KBW-le*PpoKIZ=_}>Q|=nSt&{(K)rexq#x}6G`K4z z;gF)jsv5a5QxSy+vx$Cm2WM~|H7ukPLL$Y!kJ$K&wvM}{c48Im91#i<`*ql1E~zJO z*7Zw9Oa`Vo$?|fP(z|75JnDF1Jdb$xWB}kzI^GP$dY+d z%#Y;fCmThmS^Xx}3@DV{&bA_l%_|FzViG{(lm}Hvq5;TvzA=d>FkyzYKT8>+Ji_`| z(hZ3lj8SeaB%V-yq&TF@Q#wt%utKDJDe^zF{BGJOv+j@D~)67--S2WU^ z4_aRlQ;>_|8s1(M&nJH4)^c1zN_mo)OE@x}!(|k$Qle(iI;C6#A`Y9#&8w$#ER9qJ zdU_Wp3^6{W8x09<2o3=WG!@PX?R8ju zcGIlzmrDQiKt?gj2W;^7=o^Zw*bEC#QMCuw)*O(;F!?1?Xp)sN>m(!*O)PlyGRC`J z36^|)l(7{$gXGA7$onR-B;kZpj7A9gJiG}Ji{TG(@(D*`XbbT=D2^3mh;;id6eLEC zeOT%oRyp%f+O2`y$jkjUIT)*N8ssfNc3p8!6>ET0?fO*@HmCQYx4-x?8^d6&wR3?+ z0)yk-X-Mi2Ko1W~{|BRv{pW(G5)LxR#wO8;hJa+N zjgPilq)^F+D%|=Mt!P+Faj^ILDCs^{+%0PsvPdbNQNSY_X(8{ps~94h1Qb`T&1WVD fY@o{Mrf>6DEbamg000000000ewJ-f300961FK(E+ literal 5564 zcmV;t6+`N$6#xK#n*abPwJ-f(f11s#0D1=*FGApoN}2~iu@Mr=g9&p8gvMY^fBgSO zs@hs3*34*PBCM08WkL3hB1ltaAJFe> z#yBMvE7fA|UEZaDSzBKM`Q@Nf2rzT;JQ*(7h7B1U9yWw%`b^ubN8+2;Hn6pX!@RKJGK!XQ`?(&+0 zyZ^oC6v8w`cE|RdT2yGkcQb!ZDXOj+x$fwkTF7Mgsx!TgnyZ^%@j12F5^Bi=pcRDydo^V&+k z8_8=Z-w^2=Rxb+)5{IAAxDpGe$?8s(d2dN`p|iVm6!#zrm_ z#zTZs4-QmK;9wCJ;u~|Or3|mZgQ^^X_@`!lr)^;e(sW}%YP zDXOvP)VfSlijA$-<;pS+2L-!f-~kOdV69*4fdv>?pnYS{+}+)S0}C`T?K^#Qcltn+ zv3|Kbbzp%87zj=++&$eZnHtx}X1t|yTJb!Q)YyV$bWSTks`ESJj0a1g1Qu4XRoeJ@ z&pDOg+DKarm54}Of6i&6gwsG%LXPrB=bTp1k>l8{vE!Ux*pOY$X@xWCE6IuH<|;u7 z?wnFcJJQqD@6KssfttrzwAq|iXcCxgPAzZ;xkA5Wb2?#0kWnYBK&EG;s5!N8b(KsB z5jLk34_>G)k>X%;ia}DW*PL2#t~sq}l4qo7+WAUSWdE(^bV74#!6I%lrx82~B^+~# zp#+S~X=A||`TI<}TFK4d9&;+eSLBCEW^DOruoWKxMz#rvkR%sK_%dg`v{mfK(n38$2hX$%8djITyU zd0kRPSDm`dbP7uAwz~JEWG^lo*jx@;8F_ zzV_+|+gk57cn%scu3SsX_w1cjwI&tC7{w{(HK`PbjDd=iz-}ZTJ<)!Kd5_@M^TzxD z9v%*NtIaysauO<7m#X4&PBF%~QtqNG(t>EB&$p?1c^IS939hl%99)cOdW?q%hvMq= z7ndg?Q#Qt_VyjLqyG$h)W}Q6^*g|Gxk#7$9>Fzq!wE+&{jJJzu$h%`Z>zd8CzSIM!MGkZCF2jxch7Rkibtn%efH5s^` zYBNsh3ZuZZ?-XTp3SqgsI?~04khuA2E~Z?ut`X_pj;%BYcS{MQQwSIvxo{OS*`i&dTNL9IMX@=Zde1QxnLD4N zz{q1*y1((B<~>cOk178R$UdyIn8-+arZ07lQ4YI6ajKUZ06 z@F|T^uq}#OEaMczTv3dT3!G08fP_vVFg9|**vL=&PL5_HjCT22Pf^RKRC6v)*|{v$ z7N%Ur&H3O+uD^HlE~C>4(^MfHG5>sFU_2bkUCljc;cHUKx~5_xbm}>G1>dSHw#ngQ zXR11sJ9ry zmMNor8`$nd;zIXmY_XPMDy%Ci$5g2-bf=iNq*0C2^nnoHnG3u>gOJ zhluM-Rx4+dOH~THQaNW%N$E`0Xr+sf0Zqn^t#@)RI=ui%k2Cbnep+Zf zxPwj`vq7hg44g4UG08W>G3eAn-M?<6HU^zqFz6J6y$zesX@$ljzuo4T`L7CRR5oK`r0P9enU zoKk@9!@%jBQq=pVb6SB-7Yi_Fx_*$LJ1NIbRol%V^W|P!&`jlXk*+&kG{?n{j-phg|tVAW;H7|7F{iN8tgRHsU2L6pZXplJR+>8 zpYTx;pGZ$RvcI1y4M1VSlAmt={nU>TJ{pg;*c3fj`ix273Btx>uKGxmss2-y=xIJa z=o*uo>xjqqLf_oJn#6tP9(YWU{ivZvqP+Dh84uZCY&m1!9+T&(SFg5k6a8uv5q=`O z8Dxt=3lF>}PtEmjqa6m|@z&WdCx-{~M`N*%Z24k8eJy7XS9E7DLrl;`hAcM&OZyS9 zB96zz!>2R%Xe{?t@!k;q?iA48Y!3q2GGOvTH{a!1ix!T_zq-XX0ojd3ipHR~s+IbT zMGR|6EJYK@X|VGNpy2E|7BcB-KdQDa_^FY1OGE^bs=TdJzRfp}18~-{x$F=@W z?k{)3`Z^hZt`1|iYU#Jd8WQEmd7OVu1x!>(p>zm>t>ta~ed^?wi~tu~461kfC2L6A zml_1%E#_^3a%1sB3@p}Cu(tay&mw#vx-Z6@MI7%z07xM3PXG^|YH`MoebzkeD|v6w zsRNV23>5Y*i#hxD0Rx*%4~8o9I%59XD(ws%Nq)@&NSN~949N2Hf_-hIrK=@K-{v1S za5UnV!{>`F_D@#u)_z}!BS3`3^6x8Q(t^eWU95(Hqao0l5RvTH)92gtm9b+r9N=4^%-Yu=cOwb zjE!96XlGcg^Q}Mxt`CmQ;O$+!NC!O^r*u6kYh}u85t3gBuUZ%G71s|C5e)h zR6-vTAn{O;BoPsj4|wts@RjM2mG8Jm8CfkYO2<571P_ir5y>RWDr?94@;Y z#`AFVE!S3c2#qfsMT10@BO$3a=-frt%>@=mU?Uo?43wWha(Oa}(zuqP03LvsWkTYp z0vrw?>=o$6pE1h4On~CfB9>x(Y6n{DdjdL>Evm>u^@-r9`UxpEC}fT&mr~%mk^$_Z zac=#0Y$wc?KC|AC5{a@>$upj7kis#miw~oJ6w`Ewkm7_)db@F##(KH2Oo!!sw2$$JERY%}(nT1Rmy)2Xy=F8!fUcSd zZB1vrE2SzI5Cr4fGU58(bvHx~b6_CZvfSWzbbif2&*^iPvic(Efdb>7ot|8@K_k`+S>(9KA}d)6xWkHt^agy@ zi8Xiv14HnK%FRHsxa1$AU|Mr1m#*|L3#-;kBuKp~l`Z^dyRwRwSEN8MyYm0d`$w`R z${H+u*sg@D+JXRTDt-bWfAwSGj6q8J!m2Su0s!Ksre4pz;+oS*upY?)O+1HvIWEKp z^6u)|PWYw6w3z78o2mK>TP~&}Lw+sB4F^q_YaM78q@2k3NcwXjE<+?$0D(3ZOTMEy zcmtqb&_<`%GQzfm6m%PQ97sne5YhfvFLNd7taS(bgG-;<__EGB>qT~uqt3(f(6glL zvd87j03EsVMYX|>gA166{e7pCaAxD&Lst)p(Tsb}tM{>Lrm;pJ2-cdk!H>I8r^Q*oAh+8NdHSOp$7NyBy0Rl#Di99Sbo z5P9TBlgh#mt_9+GOJ|8hI(Jd#;WK4G!NiWdnKD6Ua)8=Ag^9%_a|Xs~l5HhVIca+J zUAAuS01$WiEwTkbkoEuA;JQ+9#%*GcjAz6?m30pB^c=bf{HK8?BAHD0o^!TpFb;jI zwt`CtbK^&oEqMB5RV_bB=`|yVsS4dN7nylxmD!L+Xx?SGLB>@O2PsAsMFQvB z5RCx=0tjOK$Pg6p0ziV42sMPZs>DiU^bV+_P1Q&elW;Xx?h+~ygwUBfwI$I;>$duS{f2;^Oz5Qsfvyz zK=Olr<1)c5R|7Rh)1qNp97R2>uSvo`&pA1m&&2D>DV=d#GyP(yV`pbK7ACzr z!AFHelxjU7qR`!P`bdJ&fm4(*-L%p4Q=-ORCgBK@okW?=*W?a!=k!yg>hU8g_q~gC z{9@7{wfhb=Tuw?*xcV7U`C1gE5Ky4rAL$3nf0|xTNz|pNkc39g&s0Q-JA*_&wZj>j zM-2)2w2(+~?;|$sL%8Bvq#OZPqps)716(y3^W1vQ*)g4RNzYV=PBo&P1C+T}~ni7d^NV!la!F4-uBnpJI5&48iowr?xa z*u1jhC?;_lr!1&KN)5p71s#)k$zwAl|14FA^62YhNiZaCFh;qxlXyb;k?@c%PwzDC zunLh5tc-Bzd&Fb7iF(TXe?S)9c&10qMq=;R)-=&&LM9r5k?mK6jxQEq^7anZNrMu! z?&3if)pik16;o z7>y9|d3YN#7Q>&#;}ed=&=%r#P!cQ15b5?^C`gPN`>@nGta9d|)LR4Fk(VQEawJyY zG|XFo=(^&ZG}ZvA=Jl%|1A7Q5+EXc~vG9TQ3IWM)8^30^NY#@MRTcHgSkbUX;$YAH zD5XADiCb0{vPi``qZmfC(n8*GS2Bc}1lHHDIlH)(qKDRMFm@D}bp#Cn00000001bp KFa00@0RR93v4Kkf diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-38581d1a-27f8-452f-9678-75225dfc64ab.idx/.index.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-38581d1a-27f8-452f-9678-75225dfc64ab.idx/.index.crc deleted file mode 100644 index cfd3665d8ceabc092c98b6f5c6fe4bd7b52304da..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}BiCaMf!76XFB` diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-38581d1a-27f8-452f-9678-75225dfc64ab.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-38581d1a-27f8-452f-9678-75225dfc64ab.idx/.metadata.json.gz.crc deleted file mode 100644 index 3bc8112b6871aabbd3633665f898363eeb350f6a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}6Zmsl6Kj5@Z8R diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-38581d1a-27f8-452f-9678-75225dfc64ab.idx/index b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-38581d1a-27f8-452f-9678-75225dfc64ab.idx/index deleted file mode 100644 index 3b6113f78c85dfd6b45c84ec875c1077703f8d14..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 110 zcmZ=`U|_HVVvVi(e--S`GBSK&VqjolNzN!TT;$Kh?C9*w=*Vz9kcr6=$PjP*ctM+q r(VfxJxNZAIAS;B?-IrN}g{kS$1Oo-2DZ((5gl94^Ffsy7LNy2go#`1X diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-38581d1a-27f8-452f-9678-75225dfc64ab.idx/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-38581d1a-27f8-452f-9678-75225dfc64ab.idx/metadata.json.gz deleted file mode 100644 index 2b31e5a9da3dd6974a7cd3d6ab3f0047d619cd9b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 184 zcmV;p07w5HiwFP!0000009B5`3c@f9hTkP8g&u~EIeQa94+@Hk7x7SQwiZh(Z3dRH zcQ^CqB?SI2-=7{D>+p&(+2BD|RZ-Rg9#MI1tZz3;Q-~nTVp$0=Xj?-dX98ZBFZqoi z;Y_j9)VrX&B=_T-Y)@-5uctyehYpim#i;U)K{nn85TLvdq=+m3C1d7ym=iY&u>Ogk mqawq=Z|5|IK!>JD$Zsr|QN-3qN607rFnj?OTpFOy0000svrTUR diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630.idx/.index.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630.idx/.index.crc new file mode 100644 index 0000000000000000000000000000000000000000..7013243ff9fd1b59dcb832d14eddcd98d1c6b955 GIT binary patch literal 12 TcmYc;N@ieSU}Bhf^4Jdm6r%)F literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630.idx/.metadata.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..359650e81674052c8db801f7ef6696b142f31b71 GIT binary patch literal 12 TcmYc;N@ieSU}AVWt7<<06qN(> literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630.idx/index b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630.idx/index new file mode 100644 index 0000000000000000000000000000000000000000..2196f12697914ab3e1fb7705a6b30e48c677e29d GIT binary patch literal 111 zcmZ=~U|_HVVvVi(e--SmGBSK&VqjolNzN!TT;$Kh?C9*w=*Vz9kcr6=$dGOPctM+q s(VfxJzHR$OAS;B?J%*Wu$$#OD_A>?wKx2eqMhVYkU|?hf8ii^S0NR=wC;$Ke literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630.idx/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630.idx/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..521ca22d19aaad9700d820d725fb93cc43d0d61f GIT binary patch literal 184 zcmV;p07w5HiwFP!0000009B5`3c@fDME_+^3ON+5wdN**9uyQ6FXADs+a?$iNw#1p z{dc$CybJ>~`(}G+jKv#OntTA6XRAVhM$>8nITP^0bWLvr z2^R`oO}+JX7v*8p$?m+-^KvSbwP-QQbqFfmxyZ!F00NYAo)q!qKV;0@9#iB+0meP? mb5f+a@bjDo@9Eey5&4bff=R^Gsw3o+ei**=>PSz|0002eL{k6& literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/metadata.json.gz index 5a194f66847ba175afe52c6732e2aaf0a17b7d27..50f4a7346adcfd7e8d0f7dfb335c2ad21ecf6d74 100644 GIT binary patch delta 366 zcmV-!0g?Xg0{sGz7=Jg>k~i5vL?2`uzLZk3^lpo$Nl0!TW%R$Bc4^1N2VdIe-0z&+ z-sD7fQbF+z&P$^Sdw87BQc4b7s*6abli4JhrFQ_lOq1!9Cn6z~IM{&$Ex;EJRAD(D z0(?;ef_xj|v9S;;n{IL1E3@T_tG_q@ zC+Bz*#XP6AEjb3m8Ze7tf-B_#Su_sW`&yP!Yx7a{yt1)3&(7I@ZJW3B3mTRwBWh2> z2B5&_)S2V(3w?#Xb0PK>t#Q*>R delta 350 zcmV-k0ipi=0__5j7=Js_k~i5vL?2`mUq&fedbdW?CM36xQu5zTYrENj;7e)G`OfL( zCN0@Z1=$x^Cv`>G-Tfj?C@Hv9XOYZj@hpmy8vvds(R|J$5fMr(?7>1Mz-1OxW;hlB zKC1yiz6M@zQkBrwSJ_EdVbST}~CBrrw7|3SAmqL4}U6mi+9n zf30}g?RNdXT33$9#(?OL=X53`?}Mt3|IhVs2pMisVz>;M(Dg9GQ1Cih%ig&x8~N>R w+Vxm%An-70IEh1MO6Ag_tOTm&85+n53)=+mL)(z=uT>xQ-+IZz56=Ps0DL&G5&!@I diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/rows/.metadata.json.gz.crc index cefb7d4b9de03afa17451d74601f862b9d68b5d0..b87073cd48685faf47474df6d566664ca194d234 100644 GIT binary patch literal 16 XcmYc;N@ieSU}9)d(oL=m`Tht1ASVSE literal 16 XcmYc;N@ieSU}BJZ?EQATnZgPHBsv8p diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/rows/metadata.json.gz index 604320d847c286c9ab4b7f8216a0345b556cc3a9..a63aaa6698f0300b757e83fa52c499ca5e2ee4f7 100644 GIT binary patch literal 621 zcmV-z0+Rh7iwFP!000000Nqy6ZrU&u{TDxNTGAyU1e&+PScOzg6I6Rxg(4H*fZO0i zj+x3({(aX@2_#`ssnnP90OZ``d(W|t6z_?k8z3Q>3I@->X1~78bBH}+k?09_Bzz!g z7HBzuybG%YGM%nWY!5L#seH?Ukke zpb4(?*YYquU?z?d(w|hV5x=_nYUmnGfhHz&uS()+AQCPM6>;`l9dqDY82CoSj$EvS z@0jA!%95l2RV$%FWSEs|^~sE~Je$O$l@lY4fN2Al#ngl{p!ctfPSzYdqBNj0DfW*V$ZStYu~q*12(jsfy=z5 z>(HR*n47}mjEWVEvaMix{hr`EP%3S#x>w+zD#JK6UP~9{95PHo*Kz%zJYGF;{>^xd zlpBI)PC&3%HoA8Et{V(nz3!27YdUcer(1^UHrmiU)YwE1(MYN7P^F}vRhIt%rfX%l HItc&(-oPjy literal 619 zcmV-x0+jt9iwFP!000000NqwyZ`v>v{V#snw4`eS32EMdu?nf0CaCtX3PmQq0k^?H zj+x4k|NE|!67s>MQmHTH0m!+>_nu=PDLxQEH$Xx%6%3w%&3=8GZy|PxMWQFzlkkD0 zS-i{fnWibEkdph zHK;C@ZQ7V#EVME7=fzON%ANWCbUPcbd%@>QX*nN>T&&tE%UZ=~i|b133Twz(?vey# z>p1QvBgYL!r;3Ut$6sm%vX za1+3mhv^Y3@+cwwN!1$htE;aC-JmJZq)6SXqF4_^!ey=^&YsI-_WKqlzLBvL7c1dA z##mcflH{N&B~*wEvr?|E&CvR65>HlYMj8P#1}=?lrK+Q;rRIYKP=ORQ{eii8TcC=0 z62$ok6^V@!o~0WC&lARZIVGgSuXk@SJyzRT~`tqcnv+^G!A2GbR zX>WWt6?<^non{wJO!F<3>N_{@e!LEt2m9LY1zzAVhuZES3GBY-Ciaa#2<(C9x*khn zzfYINRpD_)#R^8*PB63nK=2(Xl`&RcEbvd6VVoMTrGs*g8784~?eu+px_jXK;dqRc zA;B{zAlNG#-8i1(_(7}JJ#ub~PF%$4j$yhBZD<}UY+{BOL>oI)DQRYD{STD=4M#f( F004Q(D3AaE diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/rows/parts/.part-0-38581d1a-27f8-452f-9678-75225dfc64ab.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/rows/parts/.part-0-38581d1a-27f8-452f-9678-75225dfc64ab.crc deleted file mode 100644 index b9ef33653c510e8296c5690159dac303b4f698d4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D(Kzwk5w5~c%j diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/rows/parts/.part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/rows/parts/.part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630.crc new file mode 100644 index 0000000000000000000000000000000000000000..2a7dde15ed6ca272aa3baaef8081bf56e1baa88e GIT binary patch literal 12 TcmYc;N@ieSU}E?);n-II6`=%z literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/rows/parts/part-0-38581d1a-27f8-452f-9678-75225dfc64ab b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/rows/parts/part-0-38581d1a-27f8-452f-9678-75225dfc64ab deleted file mode 100644 index 963999a2ef1ae25a21bc7f7eed2044621d76e702..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 118 zcmYdfU|`4qVvVi(e-$#8GBI3XW@JguC^B5+&&2HL?9AxM#Kg?Vz`)3GJdlaWk%5Vk zfr){UvGL;tZ6-!{hPLe&fpiF?J4hHP&cMN-5aZ9ZYiAhi=VG?=7cAGWJDkJB3$y?P Kz?J}wVgLZ!ff@J! diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/rows/parts/part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630 b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/rows/parts/part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630 new file mode 100644 index 0000000000000000000000000000000000000000..1d7128c9f7ba35bd412330fdb4772c2bb6278c44 GIT binary patch literal 123 zcmYdeU|^^MVvVi(e-*0EGBIpnW@KPV&L}cm~Px$iT$N zz{J4F*!b~+HWQ;eBV*h4i$F#Qh{?#n&EPD-aENhLKzVauvTJO^BxjAw_OqE(nNFON P;04+O0$`hf#xVc@Zp#}b literal 0 HcmV?d00001 diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 66b6334c95..bef76459b4 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -287,7 +287,7 @@ def _parse_sample_data(self, sample_data): project_samples = defaultdict(lambda: defaultdict(lambda: defaultdict(list))) for s in sample_data: families.add(s['family_guid']) - project_samples[s['project_guid']][s['family_guid']][s['sample_type']].append(s) + project_samples[s['project_guid']][s['sample_type']][s['family_guid']].append(s) num_families = len(families) logger.info(f'Loading {self.DATA_TYPE} data for {num_families} families in {len(project_samples)} projects') @@ -297,13 +297,12 @@ def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, n_ if len(project_samples) == 1: project_guid = list(project_samples.keys())[0] # for variant lookup, project_samples looks like - # {: {: {: True}, {: {: True}}, : ...} + # {: {: {: True}, : {: True}}, : ...} # for variant search, project_samples looks like - # {: {: {: [, , ...], : ...}, : ...}, : ...} - first_family_samples = list(project_samples[project_guid].values())[0] - sample_type = list(first_family_samples.keys())[0] + # {: {: {: [, , ...]}, : {: []} ...}, : ...} + sample_type = list(project_samples[project_guid].keys())[0] project_ht = self._read_table(f'projects/{sample_type}/{project_guid}.ht', use_ssd_dir=True) - return self._filter_entries_table(project_ht, project_samples[project_guid], **kwargs) + return self._filter_entries_table(project_ht, project_samples[project_guid][sample_type], **kwargs) # Need to chunk tables or else evaluating table globals throws LineTooLong exception # However, minimizing number of chunks minimizes number of aggregations/ evals and improves performance @@ -314,14 +313,13 @@ def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, n_ project_hts = [] sample_data = {} for project_guid, project_sample_data in project_samples.items(): - first_family_samples = list(project_sample_data.values())[0] - sample_type = list(first_family_samples.keys())[0] + sample_type = list(project_sample_data.keys())[0] project_ht = self._read_table(f'projects/{sample_type}/{project_guid}.ht', use_ssd_dir=True) if project_ht is None: continue project_hts.append(project_ht.select_globals('sample_type', 'family_guids', 'family_samples')) - sample_data.update(project_sample_data) + sample_data.update(project_sample_data[sample_type]) if len(project_hts) >= chunk_size: self._filter_merged_project_hts( @@ -342,14 +340,14 @@ def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, n_ def import_filtered_table(self, project_samples, num_families, **kwargs): if num_families == 1: family_sample_data = list(project_samples.values())[0] - family_guid = list(family_sample_data.keys())[0] - sample_type = list(family_sample_data[family_guid].keys())[0] + sample_type = list(family_sample_data.keys())[0] + family_guid = list(family_sample_data[sample_type].keys())[0] family_ht = self._read_table(f'families/{sample_type}/{family_guid}.ht', use_ssd_dir=True) family_ht = family_ht.transmute(family_entries=[family_ht.entries]) family_ht = family_ht.annotate_globals( family_guids=[family_guid], family_samples={family_guid: family_ht.sample_ids}, ) - families_ht, comp_het_families_ht = self._filter_entries_table(family_ht, family_sample_data, **kwargs) + families_ht, comp_het_families_ht = self._filter_entries_table(family_ht, family_sample_data[sample_type], **kwargs) else: families_ht, comp_het_families_ht = self._load_filtered_project_hts(project_samples, **kwargs) @@ -399,14 +397,6 @@ def _merge_project_hts(project_hts, n_partitions, include_all_globals=False): def _filter_entries_table(self, ht, sample_data, inheritance_filter=None, quality_filter=None, **kwargs): ht = self._prefilter_entries_table(ht, **kwargs) - # Temporarily reset sample_data until full blended eS/GS support is added - for family_guid, samples_by_sample_type in sample_data.items(): - if isinstance(list(samples_by_sample_type.values())[0], list): - samples = [s for samples in samples_by_sample_type.values() for s in samples] - sample_data[family_guid] = samples - else: - sample_data[family_guid] = True - ht, sorted_family_sample_data = self._add_entry_sample_families(ht, sample_data) passes_quality_filter = self._get_family_passes_quality_filter(quality_filter, ht, **kwargs) diff --git a/hail_search/queries/mito.py b/hail_search/queries/mito.py index cad6a0b3d7..90436bea27 100644 --- a/hail_search/queries/mito.py +++ b/hail_search/queries/mito.py @@ -1,4 +1,4 @@ -import os +from collections import defaultdict from aiohttp.web import HTTPNotFound import hail as hl @@ -310,34 +310,29 @@ def _gene_rank_sort(cls, r, gene_ranks): def _add_project_lookup_data(self, ht, annotation_fields, *args, **kwargs): # Get all the project-families for the looked up variant formatted as a dict of dicts: - # {: {: {: True}, {: {: True}}, : ...} + # {: {: {: True}, : {: True}}, : ...} lookup_ht = self._read_table('lookup.ht', use_ssd_dir=True, skip_missing_field='project_stats') if lookup_ht is None: raise HTTPNotFound() variant_projects = lookup_ht.aggregate(hl.agg.take( hl.dict(hl.enumerate(lookup_ht.project_stats).starmap(lambda i, ps: ( - lookup_ht.project_guids[i], + lookup_ht.project_sample_types[i], hl.enumerate(ps).starmap( lambda j, s: hl.or_missing(self._stat_has_non_ref(s), j) ).filter(hl.is_defined), )).filter( lambda x: x[1].any(hl.is_defined) - ).starmap(lambda project_guid, family_indices: ( - project_guid, - hl.dict(family_indices.map(lambda j: (lookup_ht.project_families[project_guid][j], True))), - ))), 1), + ).starmap(lambda project_key, family_indices: ( + project_key, + hl.dict(family_indices.map(lambda j: (lookup_ht.project_families[project_key][j], True))), + )).group_by( + lambda x: x[0][0] + ).map_values( + lambda project_data: hl.dict(project_data.starmap( + lambda project_key, families: (project_key[1], families) + )))), 1) )[0] - for project_guid, families in variant_projects.items(): - # Temporarily use try/except to determine sample_type, to be removed when lookup table contains sample_type - try: - hl.read_table(self._get_table_path(f'projects/WES/{project_guid}.ht', use_ssd_dir=True)) - sample_type = 'WES' - except Exception: - sample_type = 'WGS' - for family_guid, value in families.items(): - families[family_guid] = {sample_type: value} - # Variant can be present in the lookup table with only ref calls, so is still not present in any projects if not variant_projects: raise HTTPNotFound() From cd7ba4db108a1fa1339549373d61561736954f12 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 22 Aug 2024 15:34:45 -0400 Subject: [PATCH 655/736] test airtabel udpate calls --- ...eck_for_new_samples_from_pipeline_tests.py | 49 ++++++++++++++----- seqr/views/utils/airtable_utils.py | 2 +- 2 files changed, 39 insertions(+), 12 deletions(-) diff --git a/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py b/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py index 3079e54c0b..19a34945a9 100644 --- a/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py +++ b/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py @@ -168,9 +168,9 @@ def _test_success(self, path, metadata, dataset_type, sample_guids, reload_calls ]) # Test reload saved variants - self.assertEqual(len(responses.calls), len(reload_calls) + (3 if has_additional_requests else 0)) + self.assertEqual(len(responses.calls), len(reload_calls) + (8 if has_additional_requests else 0)) for i, call in enumerate(reload_calls): - resp = responses.calls[i+(1 if has_additional_requests else 0)] + resp = responses.calls[i+(6 if has_additional_requests else 0)] self.assertEqual(resp.request.url, f'{MOCK_HAIL_HOST}:5000/search') self.assertEqual(resp.request.headers.get('From'), 'manage_command') self.assertDictEqual(json.loads(resp.request.body), call) @@ -198,20 +198,21 @@ def test_command(self, mock_email, mock_airtable_utils): responses.GET, "http://testairtable/appUelDNM3BnWaR7M/AnVIL%20Seqr%20Loading%20Requests%20Tracking?fields[]=Status&pageSize=2&filterByFormula=AND({AnVIL Project URL}='https://seqr.broadinstitute.org/project/R0004_non_analyst_project/project_page',OR(Status='Loading',Status='Loading Requested'))", json={'records': [{'id': 'rec12345', 'fields': {}}, {'id': 'rec67890', 'fields': {}}]}) + airtable_samples_url = 'http://testairtable/app3Y97xtbbaOopVR/Samples' + airtable_pdo_url = 'http://testairtable/app3Y97xtbbaOopVR/PDO' responses.add( responses.GET, - "http://testairtable/app3Y97xtbbaOopVR/Samples?fields[]=CollaboratorSampleID&fields[]=SeqrCollaboratorSampleID&fields[]=PDOID&pageSize=100&filterByFormula=AND({SeqrProject}='https://test-seqr.org/project/R0003_test/project_page',OR(PDOStatus='Methods (Loading)',PDOStatus='On hold for phenotips, but ready to load'))", + f"{airtable_samples_url}?fields[]=CollaboratorSampleID&fields[]=SeqrCollaboratorSampleID&fields[]=PDOID&pageSize=100&filterByFormula=AND({{SeqrProject}}='https://test-seqr.org/project/R0003_test/project_page',OR(PDOStatus='Methods (Loading)',PDOStatus='On hold for phenotips, but ready to load'))", json=AIRTABLE_SAMPLE_RECORDS) responses.add( responses.GET, - f"http://testairtable/app3Y97xtbbaOopVR/PDO?{PDO_QUERY_FIELDS}&pageSize=100&filterByFormula=OR(RECORD_ID()='recW24C2CJW5lT64K')", + f"{airtable_pdo_url}?{PDO_QUERY_FIELDS}&pageSize=100&filterByFormula=OR(RECORD_ID()='recW24C2CJW5lT64K')", json=AIRTABLE_PDO_RECORDS) - responses.add(responses.PATCH, 'http://testairtable/app3Y97xtbbaOopVR/Samples', json=AIRTABLE_SAMPLE_RECORDS) - responses.add(responses.PATCH, 'http://testairtable/app3Y97xtbbaOopVR/PDO', json=AIRTABLE_PDO_RECORDS) - responses.add_callback(responses.POST, 'http://testairtable/app3Y97xtbbaOopVR/PDO', callback=lambda request: ( - 200, {}, json.dumps({'records': [{'id': f'rec{i}', **r} for i, r in enumerate(json.loads(request.body)['records'])]}) - )) - # TODO test actual calls patch/post + responses.add(responses.PATCH, airtable_samples_url, json=AIRTABLE_SAMPLE_RECORDS) + responses.add(responses.PATCH, airtable_pdo_url, json=AIRTABLE_PDO_RECORDS) + responses.add_callback(responses.POST, airtable_pdo_url, callback=lambda request: (200, {}, json.dumps({ + 'records': [{'id': f'rec{i}ABC123', **r} for i, r in enumerate(json.loads(request.body)['records'])] + }))) # TODO test paging for patch (MAX_UPDATE_RECORDS) # TODO test with an error in patch? responses.add(responses.POST, f'{MOCK_HAIL_HOST}:5000/search', status=200, json={ @@ -346,9 +347,35 @@ def test_command(self, mock_email, mock_airtable_utils): ]) self.assertEqual(Family.objects.get(guid='F000014_14').analysis_status, 'Rncc') + # Test airtable PDO updates + update_pdos_request = responses.calls[1].request + self.assertEqual(update_pdos_request.url, airtable_pdo_url) + self.assertEqual(update_pdos_request.method, 'PATCH') + self.assertDictEqual(json.loads(update_pdos_request.body), {'records': [ + {'id': 'rec0RWBVfDVbtlBSL', 'fields': {'PDOStatus': 'Available in seqr'}}, + {'id': 'recW24C2CJW5lT64K', 'fields': {'PDOStatus': 'Available in seqr'}}, + ]}) + create_pdos_request = responses.calls[3].request + self.assertEqual(create_pdos_request.url, airtable_pdo_url) + self.assertEqual(create_pdos_request.method, 'POST') + self.assertDictEqual(json.loads(create_pdos_request.body), {'records': [{'fields': { + 'PDO': 'PDO-1234_sr', + 'SeqrProjectURL': 'https://test-seqr.org/project/R0003_test/project_page', + 'PDOStatus': 'Methods (Loading)', + 'PDOName': 'RGP_WGS_12', + }}]}) + update_samples_request = responses.calls[4].request + self.assertEqual(update_samples_request.url, airtable_samples_url) + self.assertEqual(update_samples_request.method, 'PATCH') + self.assertDictEqual(json.loads(update_samples_request.body), {'records': [ + {'id': 'rec2B6OGmQpAkQW3s', 'fields': {'PDOID': ['rec0ABC123']}}, + {'id': 'rec2Nkg10N1KssPc3', 'fields': {'PDOID': ['rec0ABC123']}}, + {'id': 'recfMYDEZpPtzAIeV', 'fields': {'PDOID': ['rec0ABC123']}}, + ]}) + # Test SavedVariant model updated for i, variant_id in enumerate([['1', 1562437, 'G', 'CA'], ['1', 46859832, 'G', 'A']]): - multi_lookup_request = responses.calls[3+i].request + multi_lookup_request = responses.calls[8+i].request self.assertEqual(multi_lookup_request.url, f'{MOCK_HAIL_HOST}:5000/multi_lookup') self.assertEqual(multi_lookup_request.headers.get('From'), 'manage_command') self.assertDictEqual(json.loads(multi_lookup_request.body), { diff --git a/seqr/views/utils/airtable_utils.py b/seqr/views/utils/airtable_utils.py index 5b8fcd2821..d0d88e0025 100644 --- a/seqr/views/utils/airtable_utils.py +++ b/seqr/views/utils/airtable_utils.py @@ -71,7 +71,7 @@ def safe_patch_records(self, record_type, record_or_filters, record_and_filters, def safe_patch_records_by_id(self, record_type, record_ids, update, error_detail=None): self._safe_bulk_update_records( - 'patch', record_type, [{'id': record_id, 'fields': update} for record_id in record_ids], + 'patch', record_type, [{'id': record_id, 'fields': update} for record_id in sorted(record_ids)], error_detail=error_detail or {'record_ids': record_ids, 'update': update}, ) From b19b293b87f4d5ca1ba717a7b2cb74fbbe11fe51 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 22 Aug 2024 15:35:23 -0400 Subject: [PATCH 656/736] test airtabel udpate calls --- .../tests/check_for_new_samples_from_pipeline_tests.py | 1 + 1 file changed, 1 insertion(+) diff --git a/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py b/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py index 19a34945a9..6bb554a7ba 100644 --- a/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py +++ b/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py @@ -190,6 +190,7 @@ def _test_success(self, path, metadata, dataset_type, sample_guids, reload_calls @mock.patch('seqr.management.commands.check_for_new_samples_from_pipeline.MAX_LOOKUP_VARIANTS', 1) @mock.patch('seqr.management.commands.check_for_new_samples_from_pipeline.BASE_URL', 'https://test-seqr.org/') + @mock.patch('seqr.views.utils.airtable_utils.MAX_UPDATE_RECORDS', 2) @mock.patch('seqr.views.utils.airtable_utils.logger') @mock.patch('seqr.utils.communication_utils.EmailMultiAlternatives') @responses.activate From 9e725365c69bc4ddf187eb42dae6ce0fc4534f58 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 22 Aug 2024 15:36:34 -0400 Subject: [PATCH 657/736] test pagination for bulk updates --- .../check_for_new_samples_from_pipeline_tests.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py b/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py index 6bb554a7ba..46591ca1c3 100644 --- a/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py +++ b/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py @@ -168,9 +168,9 @@ def _test_success(self, path, metadata, dataset_type, sample_guids, reload_calls ]) # Test reload saved variants - self.assertEqual(len(responses.calls), len(reload_calls) + (8 if has_additional_requests else 0)) + self.assertEqual(len(responses.calls), len(reload_calls) + (9 if has_additional_requests else 0)) for i, call in enumerate(reload_calls): - resp = responses.calls[i+(6 if has_additional_requests else 0)] + resp = responses.calls[i+(7 if has_additional_requests else 0)] self.assertEqual(resp.request.url, f'{MOCK_HAIL_HOST}:5000/search') self.assertEqual(resp.request.headers.get('From'), 'manage_command') self.assertDictEqual(json.loads(resp.request.body), call) @@ -214,7 +214,6 @@ def test_command(self, mock_email, mock_airtable_utils): responses.add_callback(responses.POST, airtable_pdo_url, callback=lambda request: (200, {}, json.dumps({ 'records': [{'id': f'rec{i}ABC123', **r} for i, r in enumerate(json.loads(request.body)['records'])] }))) - # TODO test paging for patch (MAX_UPDATE_RECORDS) # TODO test with an error in patch? responses.add(responses.POST, f'{MOCK_HAIL_HOST}:5000/search', status=200, json={ 'results': [{'variantId': '1-248367227-TC-T', 'familyGuids': ['F000014_14'], 'updated_field': 'updated_value'}], @@ -371,12 +370,17 @@ def test_command(self, mock_email, mock_airtable_utils): self.assertDictEqual(json.loads(update_samples_request.body), {'records': [ {'id': 'rec2B6OGmQpAkQW3s', 'fields': {'PDOID': ['rec0ABC123']}}, {'id': 'rec2Nkg10N1KssPc3', 'fields': {'PDOID': ['rec0ABC123']}}, + ]}) + update_samples_request_2 = responses.calls[5].request + self.assertEqual(update_samples_request_2.url, airtable_samples_url) + self.assertEqual(update_samples_request_2.method, 'PATCH') + self.assertDictEqual(json.loads(update_samples_request_2.body), {'records': [ {'id': 'recfMYDEZpPtzAIeV', 'fields': {'PDOID': ['rec0ABC123']}}, ]}) # Test SavedVariant model updated for i, variant_id in enumerate([['1', 1562437, 'G', 'CA'], ['1', 46859832, 'G', 'A']]): - multi_lookup_request = responses.calls[8+i].request + multi_lookup_request = responses.calls[9+i].request self.assertEqual(multi_lookup_request.url, f'{MOCK_HAIL_HOST}:5000/multi_lookup') self.assertEqual(multi_lookup_request.headers.get('From'), 'manage_command') self.assertDictEqual(json.loads(multi_lookup_request.body), { From fb325161f43748e7c269a44f86b5c3222f138a38 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 22 Aug 2024 15:40:36 -0400 Subject: [PATCH 658/736] test erroro handling on airtable error --- .../check_for_new_samples_from_pipeline_tests.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py b/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py index 46591ca1c3..7a59b27460 100644 --- a/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py +++ b/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py @@ -210,11 +210,10 @@ def test_command(self, mock_email, mock_airtable_utils): f"{airtable_pdo_url}?{PDO_QUERY_FIELDS}&pageSize=100&filterByFormula=OR(RECORD_ID()='recW24C2CJW5lT64K')", json=AIRTABLE_PDO_RECORDS) responses.add(responses.PATCH, airtable_samples_url, json=AIRTABLE_SAMPLE_RECORDS) - responses.add(responses.PATCH, airtable_pdo_url, json=AIRTABLE_PDO_RECORDS) + responses.add(responses.PATCH, airtable_pdo_url, status=400) responses.add_callback(responses.POST, airtable_pdo_url, callback=lambda request: (200, {}, json.dumps({ 'records': [{'id': f'rec{i}ABC123', **r} for i, r in enumerate(json.loads(request.body)['records'])] }))) - # TODO test with an error in patch? responses.add(responses.POST, f'{MOCK_HAIL_HOST}:5000/search', status=200, json={ 'results': [{'variantId': '1-248367227-TC-T', 'familyGuids': ['F000014_14'], 'updated_field': 'updated_value'}], 'total': 1, @@ -463,11 +462,15 @@ def test_command(self, mock_email, mock_airtable_utils): self.assertDictEqual(mock_email.return_value.esp_extra, {'MessageStream': 'seqr-notifications'}) self.assertDictEqual(mock_email.return_value.merge_data, {}) - mock_airtable_utils.error.assert_called_with( + self.assertEqual(mock_airtable_utils.error.call_count, 2) + mock_airtable_utils.error.assert_has_calls([mock.call( + f'Airtable patch "PDO" error: 400 Client Error: Bad Request for url: {airtable_pdo_url}', None, detail={ + 'record_ids': {'rec0RWBVfDVbtlBSL', 'recW24C2CJW5lT64K'}, 'update': {'PDOStatus': 'Available in seqr'}} + ), mock.call( 'Airtable patch "AnVIL Seqr Loading Requests Tracking" error: Unable to identify record to update', None, detail={ 'or_filters': {'Status': ['Loading', 'Loading Requested']}, 'and_filters': {'AnVIL Project URL': 'https://seqr.broadinstitute.org/project/R0004_non_analyst_project/project_page'}, - 'update': {'Status': 'Available in Seqr'}}) + 'update': {'Status': 'Available in Seqr'}})]) self.assertEqual(self.manager_user.notifications.count(), 3) self.assertEqual( From 9d9ddcd44877f4d82e95971742d4052d5d9f3f81 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 22 Aug 2024 16:44:53 -0400 Subject: [PATCH 659/736] proxy header to s3 igv reference requests --- seqr/views/apis/data_manager_api.py | 20 ++------------------ seqr/views/apis/igv_api.py | 7 ++++++- seqr/views/utils/dataset_utils.py | 15 +++++++++++++++ 3 files changed, 23 insertions(+), 19 deletions(-) diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index 1ad54f5d99..65d0090be8 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -24,7 +24,7 @@ from seqr.views.utils.airflow_utils import trigger_data_loading, write_data_loading_pedigree from seqr.views.utils.airtable_utils import AirtableSession from seqr.views.utils.dataset_utils import load_rna_seq, load_phenotype_prioritization_data_file, RNA_DATA_TYPE_CONFIGS, \ - post_process_rna_data + post_process_rna_data, convert_django_meta_to_http_headers from seqr.views.utils.file_utils import parse_file, get_temp_file_path, load_uploaded_file, persist_temp_file from seqr.views.utils.json_utils import create_json_response from seqr.views.utils.json_to_orm_utils import update_model_from_json @@ -618,7 +618,7 @@ def _is_loaded_airtable_sample(sample, project_guid): @data_manager_required @csrf_exempt def proxy_to_kibana(request): - headers = _convert_django_meta_to_http_headers(request.META) + headers = convert_django_meta_to_http_headers(request) headers['Host'] = KIBANA_SERVER if KIBANA_ELASTICSEARCH_PASSWORD: token = base64.b64encode('kibana:{}'.format(KIBANA_ELASTICSEARCH_PASSWORD).encode('utf-8')) @@ -652,19 +652,3 @@ def proxy_to_kibana(request): except (ConnectionError, RequestConnectionError) as e: logger.error(str(e), request.user) return HttpResponse("Error: Unable to connect to Kibana {}".format(e), status=400) - - -def _convert_django_meta_to_http_headers(request_meta_dict): - """Converts django request.META dictionary into a dictionary of HTTP headers.""" - - def convert_key(key): - # converting Django's all-caps keys (eg. 'HTTP_RANGE') to regular HTTP header keys (eg. 'Range') - return key.replace("HTTP_", "").replace('_', '-').title() - - http_headers = { - convert_key(key): str(value).lstrip() - for key, value in request_meta_dict.items() - if key.startswith("HTTP_") or (key in ('CONTENT_LENGTH', 'CONTENT_TYPE') and value) - } - - return http_headers diff --git a/seqr/views/apis/igv_api.py b/seqr/views/apis/igv_api.py index 0ef01440e4..72751d0cff 100644 --- a/seqr/views/apis/igv_api.py +++ b/seqr/views/apis/igv_api.py @@ -9,6 +9,7 @@ from seqr.models import Individual, IgvSample from seqr.utils.file_utils import file_iter, does_file_exist, is_google_bucket_file_path, run_command, get_google_project from seqr.utils.redis_utils import safe_redis_get_json, safe_redis_set_json +from seqr.views.utils.dataset_utils import convert_django_meta_to_http_headers from seqr.views.utils.file_utils import save_uploaded_file, load_uploaded_file from seqr.views.utils.json_to_orm_utils import get_or_create_model_from_json from seqr.views.utils.json_utils import create_json_response @@ -19,8 +20,9 @@ GS_STORAGE_ACCESS_CACHE_KEY = 'gs_storage_access_cache_entry' GS_STORAGE_URL = 'https://storage.googleapis.com' +S3_KEY = 's3' CLOUD_STORAGE_URLS = { - 's3': 'https://s3.amazonaws.com', + S3_KEY: 'https://s3.amazonaws.com', 'gs': GS_STORAGE_URL, } TIMEOUT = 300 @@ -272,6 +274,8 @@ def igv_genomes_proxy(request, cloud_host, file_path): range_header = request.META.get('HTTP_RANGE') if range_header: headers['Range'] = range_header + if cloud_host == S3_KEY: + headers.update(convert_django_meta_to_http_headers(request)) genome_response = requests.get(f'{CLOUD_STORAGE_URLS[cloud_host]}/{file_path}', headers=headers, timeout=TIMEOUT) proxy_response = HttpResponse( @@ -279,3 +283,4 @@ def igv_genomes_proxy(request, cloud_host, file_path): status=genome_response.status_code, ) return proxy_response + diff --git a/seqr/views/utils/dataset_utils.py b/seqr/views/utils/dataset_utils.py index dd1574be78..c113e9985f 100644 --- a/seqr/views/utils/dataset_utils.py +++ b/seqr/views/utils/dataset_utils.py @@ -620,3 +620,18 @@ def load_phenotype_prioritization_data_file(file_path, user): raise ValueError(f'Multiple tools found {tool} and {row_dict["tool"]}. Only one in a file is supported.') return tool, data_by_project_sample_id + + +def convert_django_meta_to_http_headers(request): + + def convert_key(key): + # converting Django's all-caps keys (eg. 'HTTP_RANGE') to regular HTTP header keys (eg. 'Range') + return key.replace("HTTP_", "").replace('_', '-').title() + + http_headers = { + convert_key(key): str(value).lstrip() + for key, value in request.META.items() + if key.startswith("HTTP_") or (key in ('CONTENT_LENGTH', 'CONTENT_TYPE') and value) + } + + return http_headers From 132bd6eb6c54cd5bac50fb32c7e8940ad6f19213 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 22 Aug 2024 16:49:06 -0400 Subject: [PATCH 660/736] add test --- seqr/views/apis/igv_api_tests.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/seqr/views/apis/igv_api_tests.py b/seqr/views/apis/igv_api_tests.py index ae4d3f90de..1688b0f199 100644 --- a/seqr/views/apis/igv_api_tests.py +++ b/seqr/views/apis/igv_api_tests.py @@ -317,10 +317,11 @@ def test_igv_genomes_proxy(self, mock_subprocess): responses.GET, 'https://s3.amazonaws.com/igv.org.genomes/foo?query=true', match_querystring=True, content_type='application/json', body=json.dumps(expected_body)) - response = self.client.get(s3_url) + response = self.client.get(s3_url, HTTP_TEST_HEADER='test/value') self.assertEqual(response.status_code, 200) self.assertDictEqual(json.loads(response.content), expected_body) self.assertIsNone(responses.calls[0].request.headers.get('Range')) + self.assertEqual(responses.calls[0].request.headers.get('Test-Header'), 'test/value') # test with range header proxy gs_url = reverse(igv_genomes_proxy, args=['gs', 'test-bucket/foo.fasta']) @@ -329,7 +330,8 @@ def test_igv_genomes_proxy(self, mock_subprocess): responses.GET, 'https://storage.googleapis.com/test-bucket/foo.fasta', match_querystring=True, body=expected_content) - response = self.client.get(gs_url, HTTP_RANGE='bytes=100-200') + response = self.client.get(gs_url, HTTP_RANGE='bytes=100-200', HTTP_TEST_HEADER='test/value') self.assertEqual(response.status_code, 200) self.assertEqual(response.content.decode(), expected_content) self.assertEqual(responses.calls[1].request.headers.get('Range'), 'bytes=100-200') + self.assertIsNone(responses.calls[1].request.headers.get('Test-Header')) From adcdf122f528eedbdd190ede62fdc1b404dd3aee Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Thu, 22 Aug 2024 17:35:47 -0400 Subject: [PATCH 661/736] variant lookup table has sample_type (#4289) (#4326) * variant lookup table has sample_type * project_sample_types * try hail * switch sample_type and family_guid in project_samples dict" " --- .../SNV_INDEL/lookup.ht/.README.txt.crc | Bin 12 -> 12 bytes .../SNV_INDEL/lookup.ht/.metadata.json.gz.crc | Bin 12 -> 12 bytes .../GRCh37/SNV_INDEL/lookup.ht/README.txt | 2 +- .../lookup.ht/globals/.metadata.json.gz.crc | Bin 12 -> 12 bytes .../lookup.ht/globals/metadata.json.gz | Bin 303 -> 332 bytes .../lookup.ht/globals/parts/.part-0.crc | Bin 12 -> 12 bytes .../SNV_INDEL/lookup.ht/globals/parts/part-0 | Bin 63 -> 69 bytes .../.index.crc | Bin .../.metadata.json.gz.crc | Bin .../index | Bin .../metadata.json.gz | Bin .../SNV_INDEL/lookup.ht/metadata.json.gz | Bin 343 -> 356 bytes .../lookup.ht/rows/.metadata.json.gz.crc | Bin 16 -> 16 bytes .../SNV_INDEL/lookup.ht/rows/metadata.json.gz | Bin 606 -> 606 bytes ...-c11f065f-e1bb-4a1f-9f2d-ad814a396818.crc} | Bin ...rt-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818} | Bin .../GRCh38/MITO/lookup.ht/.README.txt.crc | Bin 12 -> 12 bytes .../MITO/lookup.ht/.metadata.json.gz.crc | Bin 12 -> 12 bytes .../fixtures/GRCh38/MITO/lookup.ht/README.txt | 2 +- .../lookup.ht/globals/.metadata.json.gz.crc | Bin 12 -> 12 bytes .../MITO/lookup.ht/globals/metadata.json.gz | Bin 335 -> 368 bytes .../MITO/lookup.ht/globals/parts/.part-0.crc | Bin 16 -> 16 bytes .../MITO/lookup.ht/globals/parts/part-0 | Bin 631 -> 636 bytes .../.index.crc | Bin .../.metadata.json.gz.crc | Bin .../index | Bin .../metadata.json.gz | Bin .../GRCh38/MITO/lookup.ht/metadata.json.gz | Bin 372 -> 385 bytes .../MITO/lookup.ht/rows/.metadata.json.gz.crc | Bin 16 -> 16 bytes .../MITO/lookup.ht/rows/metadata.json.gz | Bin 625 -> 624 bytes ...-87a9f074-c787-4edc-81ce-94ba0daffd80.crc} | Bin ...rt-0-87a9f074-c787-4edc-81ce-94ba0daffd80} | Bin .../SNV_INDEL/lookup.ht/.README.txt.crc | Bin 12 -> 12 bytes .../SNV_INDEL/lookup.ht/.metadata.json.gz.crc | Bin 12 -> 12 bytes .../GRCh38/SNV_INDEL/lookup.ht/README.txt | 2 +- .../lookup.ht/globals/.metadata.json.gz.crc | Bin 12 -> 12 bytes .../lookup.ht/globals/metadata.json.gz | Bin 335 -> 368 bytes .../lookup.ht/globals/parts/.part-0.crc | Bin 52 -> 52 bytes .../SNV_INDEL/lookup.ht/globals/parts/part-0 | Bin 5564 -> 5585 bytes .../.index.crc | Bin 12 -> 0 bytes .../.metadata.json.gz.crc | Bin 12 -> 0 bytes .../index | Bin 110 -> 0 bytes .../metadata.json.gz | Bin 184 -> 0 bytes .../.index.crc | Bin 0 -> 12 bytes .../.metadata.json.gz.crc | Bin 0 -> 12 bytes .../index | Bin 0 -> 111 bytes .../metadata.json.gz | Bin 0 -> 184 bytes .../SNV_INDEL/lookup.ht/metadata.json.gz | Bin 365 -> 381 bytes .../lookup.ht/rows/.metadata.json.gz.crc | Bin 16 -> 16 bytes .../SNV_INDEL/lookup.ht/rows/metadata.json.gz | Bin 619 -> 621 bytes ...0-38581d1a-27f8-452f-9678-75225dfc64ab.crc | Bin 12 -> 0 bytes ...0-7a236e4f-7c20-4944-b7d8-071d2b10a630.crc | Bin 0 -> 12 bytes ...art-0-38581d1a-27f8-452f-9678-75225dfc64ab | Bin 118 -> 0 bytes ...art-0-7a236e4f-7c20-4944-b7d8-071d2b10a630 | Bin 0 -> 123 bytes hail_search/queries/base.py | 30 ++++++------------ hail_search/queries/mito.py | 29 +++++++---------- 56 files changed, 25 insertions(+), 40 deletions(-) rename hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/{part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6.idx => part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx}/.index.crc (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/{part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6.idx => part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx}/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/{part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6.idx => part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx}/index (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/{part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6.idx => part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx}/metadata.json.gz (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/{.part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6.crc => .part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.crc} (100%) rename hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/{part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6 => part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818} (100%) rename hail_search/fixtures/GRCh38/MITO/lookup.ht/index/{part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b.idx => part-0-87a9f074-c787-4edc-81ce-94ba0daffd80.idx}/.index.crc (100%) rename hail_search/fixtures/GRCh38/MITO/lookup.ht/index/{part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b.idx => part-0-87a9f074-c787-4edc-81ce-94ba0daffd80.idx}/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/MITO/lookup.ht/index/{part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b.idx => part-0-87a9f074-c787-4edc-81ce-94ba0daffd80.idx}/index (100%) rename hail_search/fixtures/GRCh38/MITO/lookup.ht/index/{part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b.idx => part-0-87a9f074-c787-4edc-81ce-94ba0daffd80.idx}/metadata.json.gz (100%) rename hail_search/fixtures/GRCh38/MITO/lookup.ht/rows/parts/{.part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b.crc => .part-0-87a9f074-c787-4edc-81ce-94ba0daffd80.crc} (100%) rename hail_search/fixtures/GRCh38/MITO/lookup.ht/rows/parts/{part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b => part-0-87a9f074-c787-4edc-81ce-94ba0daffd80} (100%) delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-38581d1a-27f8-452f-9678-75225dfc64ab.idx/.index.crc delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-38581d1a-27f8-452f-9678-75225dfc64ab.idx/.metadata.json.gz.crc delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-38581d1a-27f8-452f-9678-75225dfc64ab.idx/index delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-38581d1a-27f8-452f-9678-75225dfc64ab.idx/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630.idx/.index.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630.idx/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630.idx/index create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630.idx/metadata.json.gz delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/rows/parts/.part-0-38581d1a-27f8-452f-9678-75225dfc64ab.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/rows/parts/.part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630.crc delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/rows/parts/part-0-38581d1a-27f8-452f-9678-75225dfc64ab create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/rows/parts/part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/.README.txt.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/.README.txt.crc index 47a747d1a8b78660846186993c269f7c6fb86b70..b8eaa2d478e189846de5e8165c6d446d224fa9af 100644 GIT binary patch literal 12 TcmYc;N@ieSU}8AI)a?ua5&r_y literal 12 TcmYc;N@ieSU}Cs`=}s5`6@UaD diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/.metadata.json.gz.crc index 6acf89fa39eb967dedb21da0678eb66654ba42e2..8d6507b0100c50e8d6ee30420d567d9a808e7ef5 100644 GIT binary patch literal 12 TcmYc;N@ieSU}DI;?y3v`5>^7^ literal 12 TcmYc;N@ieSU}DgYn#~3P5OxA# diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/README.txt b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/README.txt index 0552dbf36b..5daea17753 100644 --- a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/README.txt +++ b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/README.txt @@ -1,3 +1,3 @@ This folder comprises a Hail (www.hail.is) native Table or MatrixTable. Written with version 0.2.128-eead8100a1c1 - Created at 2024/04/03 17:08:32 \ No newline at end of file + Created at 2024/08/16 15:39:04 \ No newline at end of file diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/globals/.metadata.json.gz.crc index 0be207028cc9e4a24fd5ef5a75da9f454d17c0b3..6650687d8b1d7a0980e3d48af24b54da9c0e5986 100644 GIT binary patch literal 12 TcmYc;N@ieSU}7k)u-^y(5#s|E literal 12 TcmYc;N@ieSU}Bi_;_4y*6!Zi( diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/globals/metadata.json.gz index e132519e5c2492bd8bd5d64d2d4c690a1d619ac8..95261c27158ff01413ca0f3071e37dd36a018226 100644 GIT binary patch literal 332 zcmV-S0ki%eiwFP!000000F_c(Yr-%T{x5ko19d2)Z|m%y1_LX5SScElql>0#Nm8LJ z{qH-m^*&+iOV0T&=hCDmnq@!&@@@ne5YvxITmT;uO6FUrNRXr$@mv7+I0@>5VOA9& zUXBE+z(dINqL5jLohiAi3zKhvyZMS`Qc18uFM2kHRmp`4TLtI&qQAggYGx|B7#!n< z?wBee=t^{G+NAW*#*R61G_=)5$nU&ZgL5b9B+8!xwJ9XKe zd0NeH_O|=7T&=)d{pYtR91Ner6GaLCl1iLOLY4C^mbnluJs8Vx6pTls$MLQ0(>k%Y z{?msu1IwD` literal 303 zcmV+~0nq**iwFP!000000F_b6PQx$|{TE*wBq(Zyd?PfRkdPY0fhww8dlK9_PUNLh zi1P1@n=T~n1)s)mo3W=Qnw3B(c{c(Kh~?)Zt$|NTMAkcK2wi1Z@k#&>IHB#y@X^#D zUd{xnAVA1Wl9*XYo$=hYwW+qi-Ma8nSUO3bjbY6yiKS+y$)XdTvr;Mv7sVh1C!t46 zv}ei#9p!Xc7be&rC%DTN9006^J Bhu8oB diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/globals/parts/.part-0.crc index f8364d7499a7a33cfd4e893ecbdd12bef28cb879..905a30feecaf6f082dfeb576d78198faa8bee189 100644 GIT binary patch literal 12 TcmYc;N@ieSU}7-2V166`5%mK- literal 12 TcmYc;N@ieSU}9LYWlsbE6bu8a diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/globals/parts/part-0 index cbcaed4d4c5edb1b782c4914dfbef6f09d9e337b..05ec205c54a6ecbeb75db969956d48149ca772d3 100644 GIT binary patch literal 69 zcmXqDU|=u+VvVi(e-#X585wRcGBPl51{oL_7{(iBr!$AU1~W2nx)~V78!<33IGjDv Pq{a(W4+3C43_#@o#7_+y literal 63 zcmdO5U|>)KVvVi(e-+f;GBE65WMtqBGB7YOj5o|qcLOpE42AU0HG=j#sB~S diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6.idx/.index.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/.index.crc similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6.idx/.index.crc rename to hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/.index.crc diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6.idx/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6.idx/index b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/index similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6.idx/index rename to hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/index diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6.idx/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6.idx/metadata.json.gz rename to hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/metadata.json.gz diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/metadata.json.gz index 5284af4b0bb01cff4ac7e3d2d47d6f5b15044abc..9479e06e8ac7461ed9a7d2a0cd0c5d10a481e976 100644 GIT binary patch literal 356 zcmV-q0h|6GiwFP!000000F9DSYr-%Th5t*QHps+Qn3A`lFvj*URQ6Iz$>P;!OOue? zPKxxuFRhJI*~4Di=G^a`d%5X}?4^R@2b`Bi6ZSZtr3*?*F4a{e(@8o>(!~P+-z}2q zlqVt~lsMRf11-Q84pd<|mI1!10YUy5;<2$1D%V0jE9AoW5bWvdk!(5)dXq=_A1iV#DHCEa%V;goROl6G* zjh?U*Ij^h#)V%7}xc*ozBeJm|`kT0{Kk{u*&GG-f9u6kU9ZC%MUIlyDIT&zRatk5UU%%_}spBP?ta+BT#O{{9c@t?e(`_b7O}0ssK= C8m#01 literal 343 zcmV-d0jT~TiwFP!000000F9DSOT#b}h5t*Q6yjJra3gQBfrviHCccbPVtThm(48!f#6F?&-w02a@v+0q=Nh#oR>xucE4PtD@sZ()kP%pSvpJ7)g1sYR>^$M z6Oj-~92~%b7T|LSDz_ZV0AJLAApZ#Q*jNb8TmRA?mHFU`?ag{|GatJuFJ*p zHgbRL>XBpg&O{#-E|_~>*?#m%<07Rc;I(3^snUfrcA*eqDr+=o^n|tKw~Jw^dDVBc zec!Ajva=wDn$O)vz6+{3{eNzN!;s|;C5Dfh3saA07z19TwH}=7u*mO|tj}@ALf~PN pa2CgzDOE_1QVUegGc-^T7TSca3u)v0@OuoS-49vHLpqlN002-HrD6a8 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/.metadata.json.gz.crc index 6d4bcebe5ae8e4504c9eba7bbe28df0adf27fc20..dcbbf269b1a067d9ce763291a813428aeb890b5c 100644 GIT binary patch literal 16 XcmYc;N@ieSU}EU{lgX%mc>7rZC0hld literal 16 XcmYc;N@ieSU}BJYd!t7nW`8gMB)A18 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/metadata.json.gz index 4cc2e1674a4e4c5988726f5d4e43824a9992f57e..5e76d5dbbaef5c6bee2b7bbe5fbd6d340eaf98c7 100644 GIT binary patch literal 606 zcmV-k0-^mMiwFP!000000Ns|~Z`v>r$Nv{UZEDgHem8HWV--?0O)%|Y6^e{~2DibE z95a=n{O>&{C6I(lqgr3eLqzV*ci*r5k>VXubPXgV6UE^P`1IG;Spuw0JQ6*^mW1~t z)@+mFJB?$AAtiH6G0lOwLK5!w9e!>T;5TIgtH6aYb1@##6lT_JnBC>=!b*r~W#!Vv zq@rf)B|4Q>YK4)BvYmG3z0AhIwJ~&izb~D8iO~2`Xv>I^=YQVRiCDAlqqv&fYdb#YB;O<@&Ti;WOq zYUgn`9tG{;=#cUVTgjLom5k89BkHRZEnILW|6)PE*X#6~%YLVp=S$CAhgN~E>nk(& zLle>wSo1L1$8sPI#6PHAVf^y)OJ44xF;J0X_fISy10keIZ6x_qam@X$hsakTJJez( zd`FHmPYaQPEtOEEG_q2xKAMqbCrLb5IWkfK%o#Wr+fr3SQ%%hW0nma`H2#jvhX0GNHZAG+OdxlqV--?0O)%|Z6^e{~0&as7 zIc6$D`R_e;O2`M3Mz!9Qi-??ae9rsYj}#w>q+1{XnMn>$z~?`|EK*=S;*jtW_9VC` zal&>PzSB5{7&5ZN6iYZTQ%HitvBQsj3jC%_U=^4UWDOOS*u+X zfohz`-E`9JjV7m*L)b>dyizhj1COY$RsUBN0Q`IP5lvMdZz)E5;~biQ?x)UC3d%12y*A&YQlQt}iF`oRz;Ad5`eo zroHmpWcT2-JM}I(G0jt|^jBd%{&*iSFK)*j10Q-mqwZ+j8@q#!H*!a#6?40*0q-yh zeV=*uuJACYat)JYC%IXFAovYb+88Tt7Wk)N7^nKEyjcgkNyDj06Q2SCjbBd diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/.part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/.part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.crc similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/.part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6.crc rename to hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/.part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.crc diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6 b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818 similarity index 100% rename from hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/part-0-9ed567ca-8929-4068-88bc-1b4d0cae37c6 rename to hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818 diff --git a/hail_search/fixtures/GRCh38/MITO/lookup.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/MITO/lookup.ht/.README.txt.crc index 6def5d8db35cd6b61b41cd042d3cd17889d7b617..0ab5a311c58c535923eb7f702fe6a71268230d64 100644 GIT binary patch literal 12 TcmYc;N@ieSU}A`_meT?N5W@ml literal 12 TcmYc;N@ieSU}6Y diff --git a/hail_search/fixtures/GRCh38/MITO/lookup.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh38/MITO/lookup.ht/globals/metadata.json.gz index 1ed50398d6345b1b042d17949f5c9f21c96f277f..9d007df8c1a1365f0219d2947ba93908da56aa52 100644 GIT binary patch literal 368 zcmV-$0gwJ4iwFP!000000F_e7Zo)7S{TE+xC1?eza%)O@s#J-f9vVep?137bMP887 zDF5Cu31MlG+Fas!^WK}qu1U@^AOU&P0yK!(`!p_r-xEp}JE%yIqasx*T!CvQojq;yfqIzx0X(g`mnX|Y>#wBEm!ZM$B%^&0fmEx$(L z=>9Q0r6}Q_Qi(H4s0zMAn+wv)gR%TV!el&tnEaG|7%L9af9kMjVA-{@@gELRGHqX^ z0)sD?_~33x{0rXBSiXUfRp23HCUHa!Bvw0St3u~HV0N83!A&@cpS7mtDvpGrx=e#Jy`={s1$2r*2sQ~@ zOK_ypK{(2()4&y++P?xAmC}GkvQ6pNx}bGh`#Fy795*Lu!jTdxyHS53m~Gi$Angh! za8mSZwT3~d25T#A+;$6k^a5{j^e}yjIw~OSTS(rML^5Z4j2VbF9!$g!3g)xf+9o9wqDkEy4OP^o1wZ4jF!YvVwD=2)< z2i;~#D=lJA^eciIYZM5IA`0t?QF?V#A_tP%^NRwfN(BPPiw4G-mI#1EED;s}F=P4a zZWjQ7L?Qt!;mK+Z_aoJNbo`NGee9UxfH*pGZj2|^g zwfKL=^{{pmSzpHT=H{?=2C>RFy`7)2KJ>ua4N&>oiN44e!`cbJ2&=VNJAw=!!`{mO zzU%CfK3?pnX(xdYvQ*!VUYe2$kQlQ+1YZL$2tf@`lCTv~~#;o{yvH z_GnwE;ZQ@6f@mZKNh!k`utEut$Tku{)D?vxga{|X|9|%bna>^6((3?GQ-F`ZXipLz(IN8dS@>c4oQ5%- zpG9-p^?*A!(VMOL7^R!I7aoqMvVVcIoS)Sn#@nkl&F!Rog>w(|wfUt0P<63aKgOu0 WI28>500000001bpFa00@0RRB$)IXvC literal 631 zcmV--0*L)$0ssJ?4*&otwJ-f(pAMxG0CwMBCGe2w0qRgmp-L-)e9+q=q^}0Y8b*hG zOW3B?6R*~o0C;AmZjWr1bMUjN{#3a4CC6NP3?fE5v(`4d+BLxHFOhZJ=^3~l= z03aYB(v%cSc(U4zpJ`%!77C<(^jaVNT5ZHc^1EL@zU1>&ng9C3`?B)2z^cvlQSa5X zJ7^cTcx*MOuWtVDhz%hQ*9M$6g z8P~(wJ!E|u%bT0S+CgHKZ+bgFV||Fi+7VFs+KIl%7sJ{OzzD0gSi6D@zrxU2RlypcK+sj9!I=r*l{ z*j|XWb`Uw|`~7- zJ%al%eOtVRe^Yybez)_gnTRy6-Xm>H{{Ky#ro<2E?{(eGl}sNLAP5U`Fix7tx{0m@ zlDl4nI=um;ZX=ZVEwdiddD5T5Gbqb>^O2e4W&!UQj>jNUU zt2aZ_8?{Z`c3vJ&m4AV=+^5ywj0>w)+Uuk|!g&sKwfUm}sOn;ievCWCOV!_v9}NHi R0000004TLD{U87V002*)IphEU diff --git a/hail_search/fixtures/GRCh38/MITO/lookup.ht/index/part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b.idx/.index.crc b/hail_search/fixtures/GRCh38/MITO/lookup.ht/index/part-0-87a9f074-c787-4edc-81ce-94ba0daffd80.idx/.index.crc similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/lookup.ht/index/part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b.idx/.index.crc rename to hail_search/fixtures/GRCh38/MITO/lookup.ht/index/part-0-87a9f074-c787-4edc-81ce-94ba0daffd80.idx/.index.crc diff --git a/hail_search/fixtures/GRCh38/MITO/lookup.ht/index/part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/MITO/lookup.ht/index/part-0-87a9f074-c787-4edc-81ce-94ba0daffd80.idx/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/lookup.ht/index/part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b.idx/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/MITO/lookup.ht/index/part-0-87a9f074-c787-4edc-81ce-94ba0daffd80.idx/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/MITO/lookup.ht/index/part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b.idx/index b/hail_search/fixtures/GRCh38/MITO/lookup.ht/index/part-0-87a9f074-c787-4edc-81ce-94ba0daffd80.idx/index similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/lookup.ht/index/part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b.idx/index rename to hail_search/fixtures/GRCh38/MITO/lookup.ht/index/part-0-87a9f074-c787-4edc-81ce-94ba0daffd80.idx/index diff --git a/hail_search/fixtures/GRCh38/MITO/lookup.ht/index/part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b.idx/metadata.json.gz b/hail_search/fixtures/GRCh38/MITO/lookup.ht/index/part-0-87a9f074-c787-4edc-81ce-94ba0daffd80.idx/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/lookup.ht/index/part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b.idx/metadata.json.gz rename to hail_search/fixtures/GRCh38/MITO/lookup.ht/index/part-0-87a9f074-c787-4edc-81ce-94ba0daffd80.idx/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/MITO/lookup.ht/metadata.json.gz b/hail_search/fixtures/GRCh38/MITO/lookup.ht/metadata.json.gz index 67e671d964a463e2e136a0c45dd9399e6ed354a4..60381e4539e1d710e062e3f4ca942425251a64f2 100644 GIT binary patch literal 385 zcmV-{0e=1;iwFP!000000F6>hYr`-M{x3dl;FUgLE53~u#@G&{ExUveJV$N3IkB;o zl~D5Er=(6xOAou`@x3HVKCrDeko|%W+Ez?FKFy|c&I+l`8D!(pbTpaH9{^-NpNz+H zq9%+p4_ok1DF~SdlR1e6iO(oWP<~N7HV%l{bU%lkv0u`NHTqiN!Fl@)3bU;&wb2m7 zQY%b0wKI^AL{~e0e5L%^d)Y)VO?mBdiDXypWs@dP0LNUFrfsk@<_rno-M!a{=yO-6Xn9c78ow(@vTpA?~+)2?z(d=Sh zLA+KtoALq>ymdx~QmbpumbaOfx^8Lu%9hSnP+?%=z>b6OKPy>w3t4?G7lUN&K=nI4 zEppJegxdZ8eZ81WPI}ZxHz-rKjy=SHkAYhB%yrnHPiASCL+1z5!$ragjGjwXt^?{y fp=sWrft(4NFSISCMSp)6du#g#whm(91Oos7xm~m^ literal 372 zcmV-)0gL`0iwFP!000000F9DSOT#b}h5t*Q6yjK0(2_UVKtvy86JJItS$eld(4Wkq2Nns?)lEi?M*telM3=ruukflu-m&NPAMt5RHq`D&*E7Wr#Aq+NTd0j zMGj0@(XB&{lXfKfs4$q^@yhhp#~PEEmVjWjQQTEh zTxz=dUGZ7weHg9KmC-fS=m_h`_f&_^npgd_S8vP3gscsS;lhr$H1R%E_2K`%0S+d^ zElLc}BNw_Edl&;=CTTG!mthmXNwPjC{67R9&JvEr(Q{4}(xI#cs`d$5C1VD(=XBVk?P0ssKsY_iS( diff --git a/hail_search/fixtures/GRCh38/MITO/lookup.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/MITO/lookup.ht/rows/.metadata.json.gz.crc index 3e5f0b7d529059a734d9c40bb99cebe504e824c7..498d4f224fc7672022f5d7f74d802686d3a32f02 100644 GIT binary patch literal 16 XcmYc;N@ieSU}6Yb>Nol5f_ZlVB|io8 literal 16 XcmYc;N@ieSU}E4dmk2ytzk?qD9$ExB diff --git a/hail_search/fixtures/GRCh38/MITO/lookup.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/MITO/lookup.ht/rows/metadata.json.gz index cf47dc3361f40853639eba114c4e30e40d2dcf38..b54d201997931940d1ec0f4bb3824945fdbbe0a1 100644 GIT binary patch delta 611 zcmV-p0-XKv1n>lq7=Q62Eg^4(u?nf1CaCtX3Pr}gfZO0ij+x4k|M#Ag5|RK@snnP9 z0Oam`_x(DbDBcoDS3m+XmK>gdPkw)!rocMHA>kuzNN`W$gsn5Yrg020WMqaZmT+LU zkObRZhr3M*d{`#13Ty~7Uq%C(!Bnd?)7!jVPz$j{YO$~}A%CgbIBAjzEfR^5vDDp; zab9L);94oVx!;w}oKz(bz;u{ts;l|pw>m{Z6lQdl+CD{JX9*fqlEyS!33iZ7Rwq(s zHZ#`W-xq&1XSeqI)6HbK?E0Te;e{M56mroNUF6D&rq=&SJ*~7RbiR%vP>uU_I~ue* z{$QVS2wRGnSAR+dXz39Z*NWygI2He};Og?Sdv&<%Po{ak^vrc=9_XsM(%2805SGw} zhw(0!1FInZMfDQn7Z+diavP0-Mmd(RVzo+4BvhtD!Ji5cuRI4Ii5i6aTFeCR$hmQn zD9S*WN~n|x(o?Li%rNFCn|mu2BQ@l#fpbAFRUK$*sDJq&0OBJkY5Wtpc^gpqJO%PN zLOHQvBw`2~H~SF8BJ%Fg6=M{zMDgRI&g8J%ff{><&YQlQuFog+n3WSoJ|euhX>WX6 z<$JK-oq88TZ1a>V{ax6@0iGA^h1+ud9_>f1p69Zj-*Y|S%=Oz0+`c!bEl#6|`>p&e z@i3us0bGM*ExBF4CHPiU+8QfP9{8tV7^kwO`hjwG43og~I(@T0o8b5x%wH3{;RGaq xWtX5o3)|t(6^Q%rlhck`Pu2006pHEZ_hD delta 612 zcmV-q0-OEt1n~rr7=Mi<{Cq2nRY=`5LA8ffC^Gg1+y*Cd%v6T_zwexskOY`YrM{F0 zAb01x@7MW6@s3Ek1~MQM$>9n3^w-x}3d|!82_Iog2KOXR*e1hk8pjYrM&_7e2?u5i z$zZqdaJNl?kIDpAfem5i(|AZTm}#|PdY88w)IuzgS}bi$NPnufPMTywi$r2%BDL=s z=Xo{;u9c$O`+e!$NmcRyOoy4Kx|%P3t5Xz2VMf=f?Nju1R-i#8X-u=VU`NSheI{k* zb7TGedG=Rxc5A;s+)PI+zxS~eUdX{hA(u_jMXs!9YW<(o^Ga(%7n>*o)wo}`<6+C| z4G$@Yu$72;rGI3ImL5@Yt!QC`Gw~M-I_z~#;W{)AblqHO><3K$R${8c?5nkN1SH7+C zJvi)6y^A5Xc}kW3ChXw=&kOd#{o*cs+6z41aXseyZm{q@x8LdoZnxiUvmogAX={<6 zB_5_!E?r@mY$UhqcLd*xN?T*a$pils4C7R`Qr}U|o?$Ww0NMgZXQMH=Kav yFYM9^x;?+%<_-%+c^e^Fyy3`ot_p2K85`RPHe#$1s<}NNF_P| diff --git a/hail_search/fixtures/GRCh38/MITO/lookup.ht/rows/parts/.part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b.crc b/hail_search/fixtures/GRCh38/MITO/lookup.ht/rows/parts/.part-0-87a9f074-c787-4edc-81ce-94ba0daffd80.crc similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/lookup.ht/rows/parts/.part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b.crc rename to hail_search/fixtures/GRCh38/MITO/lookup.ht/rows/parts/.part-0-87a9f074-c787-4edc-81ce-94ba0daffd80.crc diff --git a/hail_search/fixtures/GRCh38/MITO/lookup.ht/rows/parts/part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b b/hail_search/fixtures/GRCh38/MITO/lookup.ht/rows/parts/part-0-87a9f074-c787-4edc-81ce-94ba0daffd80 similarity index 100% rename from hail_search/fixtures/GRCh38/MITO/lookup.ht/rows/parts/part-0-b3a842d6-2c33-4b32-9184-7975c4499a1b rename to hail_search/fixtures/GRCh38/MITO/lookup.ht/rows/parts/part-0-87a9f074-c787-4edc-81ce-94ba0daffd80 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/.README.txt.crc index ac18ce1aac64a0e2ac8acce3465c69bde8cae411..92285ec48d482bce1bb9179da95e403691dddb3b 100644 GIT binary patch literal 12 TcmYc;N@ieSU}BgO+`9n)5|aa8 literal 12 TcmYc;N@ieSU}Crwacm9%6Ziwi diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/.metadata.json.gz.crc index a59974a85c71072216238679c45d6826f03e0236..fc9edcf61dba073e4468802289cf9679a69c55ad 100644 GIT binary patch literal 12 TcmYc;N@ieSU}E^uIVBJP6m$cj literal 12 TcmYc;N@ieSU}Er0{nP>g5<3H5 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/README.txt b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/README.txt index f201e7b745..5b8a155f43 100644 --- a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/README.txt +++ b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/README.txt @@ -1,3 +1,3 @@ This folder comprises a Hail (www.hail.is) native Table or MatrixTable. Written with version 0.2.128-eead8100a1c1 - Created at 2024/04/03 17:00:55 \ No newline at end of file + Created at 2024/08/16 15:40:56 \ No newline at end of file diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/globals/.metadata.json.gz.crc index d2b845640ae4b4a47caa8deedfec45f223ccee8b..06fd46b58c8cabd7e3ea6aa6a6acb3eb4a216d6f 100644 GIT binary patch literal 12 TcmYc;N@ieSU}7k7)bs}c5VQhl literal 12 TcmYc;N@ieSU}AW%X7y$O6%7O> diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/globals/metadata.json.gz index 1ed50398d6345b1b042d17949f5c9f21c96f277f..9d007df8c1a1365f0219d2947ba93908da56aa52 100644 GIT binary patch literal 368 zcmV-$0gwJ4iwFP!000000F_e7Zo)7S{TE+xC1?eza%)O@s#J-f9vVep?137bMP887 zDF5Cu31MlG+Fas!^WK}qu1U@^AOU&P0yK!(`!p_r-xEp}JE%yIqasx*T!CvQojq;yfqIzx0X(g`mnX|Y>#wBEm!ZM$B%^&0fmEx$(L z=>9Q0r6}Q_Qi(H4s0zMAn+wv)gR%TV!el&tnEaG|7%L9af9kMjVA-{@@gELRGHqX^ z0)sD?_~33x{0rXBSiXUfRp23HCUHa!Bvw0St3u~HV0N83!A&@cpS7mtDvpGrx=e#Jy`={s1$2r*2sQ~@ zOK_ypK{(2()4&y++P?xAmC}GkvQ6pNx}bGh`#Fy795*Lu!jTdxyHS53m~Gi$Angh! za8mSZwT3~d25T#A+;$6k^a5{j^e}yjIw~OSTS(rML^5Z4j2VbF9!$g!3g)xfHq)$ literal 52 zcmV-40L%Yla$^7h00IEL&Oz|Hr-fv3(*uLdj?;4-ySVT-%GaqL2U6>pmtH(r`z>Q1IH%T&PS+;_5ObZX&pW?+DLICr6a()x`~vG}H% zjd4mUR;tC^ySzI9W^MgR!8C*#k(8ZqR83W6m~ehu?Ve0Wg42=q|4{ zxclFGP9aQFWOr=OX+?z=d^hvwbfW62k?W4mX@y*;jrA0rQ;JA4)pyJ}t#BqCF;G>0 zG3WGwZLGAGb1GrURV#OSu=AQzi(D^o?4Gz9Ix@M(jiu@)auPbNU=li|h%}a}vu%VO z>k>M3=m=CH?Cj~F(~6Bcai%K0(?Aa!bV~6WbV^}Dl+S5}Ms=pwQFC?kD?XP(SndwWhToc+{E^XF7T!C1+K z%7B1S3bKJJ3LZ?tK73=&w1nX?fKZhq5C7Dx@02b03L!ZD{8M!?9NIu@Lgzt6O*o!tEGF{cuINxsSWnMBJw$DBs+$9`jppm&Wqg}{E& zkrUa^`1vOBJb02K&9B9rPFPDKojH9VC99E{Oe#^Tc;8f(S*PGzPu=z0avK~W;gk|G zjbR{*@zuyEuS=@vs#BMlPC;qiR`;Hi?8Rjxo2%Yi871vg)O^w@Cc>uVT^2Xl+jt@u zL}wS6{FUInuf6)gw${50enWux1^KGhL9>(Z&f@|!x1~(&` zJ_7>cp}0Ez#pOxJl#Ow!*s4>@E>p>cS!XW;wvic`9qk-h$0TF^ z)7{Tg?LXj?Zei`o##mLSOSPVJieW6HrgL?9t4aGpk%;Ru5Q(@rM^lbKO*{6TIeQ`( zj0hp(exCE~D{V~9geU)tuFeGo7ZgyS9sAf*M4BSe-yHpUQYfW$^ndkpw(dN3I9Rb0|Ou$UhhBkH=P8gS(}K(P;#Xm0Y+AnQYN6(JhK`ilW$@ zPQB-tip-r)QJ~~8EZyIDZ{*uNlhLU})b(cY85kfwLx&1}V@vnJ62|L0UuZSV_3p?NMF(yMrqTZvW%U|D$uF6Fs9;U-eYU2 zt0VUA2s}@O`8T7}397}KslP1CKh<9SfD*=c*J8}FD5gNEWR*_E3awJE(%hUFyO}(T z(WykeI?{ODM2|ZqtBhOjmQzb*Qo*Y@WtG^}oNGP!5YRB1T(P6!J3ul#1d@P}l+me#ITv4+r7lakOg(rN)uQ4QbdOO``kQaL1Ax&r zrrKP<=+9Nw8hlD)6l{y47RxxrFjo{~;{xYX2q2+T2#l3nFjn%@zSE=G2%}xT)>G8- zDb<{dQ+6&(wS_5{adSRclI!o?yvyix!ZcM#N6bH885xj=a#wQ?TKJk&vaYF^2%UP) zUBS01i*0VW*qN#n;Rjnb*+M}E1_+CFG;LhLH@Ox-N1RDxu(iX3af??)PUYRHoGVqm z*qoYcGU_eHuw}|9-v+ijiMY@~8e6PomE?=;TUvUq3&N-QX7L#D;RWo!QO_==hUJx$!~YR zA-w;&P<w_ar{hG1#&O~|Xo>L1fLXLhfxhj#C z+`SP~LIX*x0u-rAzDK`NdrmLxC|WAy>eyfGIeic{)ndm+ zo>L0v&nbjBol^?XeHb{M(}{ZDbWSbUbO8=BAk3Mr9VF;Z%CS?`b~DI)xm!?WUaF1# z2P0b_qUz5Po~n+N`aF?ckge79opyy@z8GOcPY>UP#U7->n~N6AEugI?($eFqovQrO zSp3lei5YSX?!|sW7OPiB8XHSw+CPba51uzBHQ!c*_-^KdbS|Tv098| z9Ad=?D?%PXazqUuTN`RmjOk8stN>niU<3t`<8DcADzc z4X(ydeUA_z5!TaB2%(5iq^BI&-%phapfKUdPdER5>PHA64#-+;iXJR|#vl+vHlC+m zz1qS}^s7lk_=)UhkSzu)K=9spYOa49?N9)ax6Xb!H$a#}9E*Kq%NP6UYdL$kqC0!} zVS+AlWVsqx+RuO`c|ar}KAky;W4W)2_m1dy=YaNRdlATH046VV^Ie{`SmBubt6OXm zk=fsjB_mA7@uxB2Gr z0M0sASLiGAxYpl|`^%lMzD~xUtHYSBTKa9Vjzl?f9_L@v0TLBbC>?rWYk6CLpStl& zMu3Yg2Gu+Lk~O65OAG|?7W1}1xv}_R0V>u~u(tay&mx2%x-Z6@NgnUP08Aq9j{pyz zYH`MoebzYaJ9%%==>wBN3={S)i#hw&;R4&39t>6Hb;SI&RoWUln*5pxkTB)H8j$;3&i|htC&V?4Pvo)_&iKLqLSZ^6xug(gMc>T`b9?$@qDyeLVRY zpVLPM28cI+1s^g7=8_K*H{<6@SbFxL)5l(N-3@EbDTU>(GQa?XlCDH8H)92g?Bf}R zQXjb2`iwQU^U{?I#!4=7v^6Z&`Bne|*9S{}*~?tc>BMiy*bsyHsKWY?V@#qM84-~r zNzwsA3K9e*fs&L|LLU+!@lX)uI0m8^#u#G=A%qY^2q8p3ghoIl5fPCO!&-4|T1=6f zj>eOZjIYcxS^2J15v!IdH2jcnIx9w|p2Q2hN2C5HhY;!xl+7itcdI&c9QIr(^MLCE zLXBl)_xGl1CE^64tD(AT*|_g<7%#wWzFc3`;kUkUvEUHWxq~fq-cE z8R$QOyz=BCN*ARJ1?vDpB0CbtHb7he%dbEVe`YWD0|LZ8gDl1R1RH3rMG0_2Mp$y% zs!tG7^*K_kt%$ujEi8iTC3o0etXyXHP^2V5DLIz|BpS60~aiWKN&U+%xT{m8aRS%b|FTas{AEeN2h;wJ&*Z+|SPF+e$ASoHyB07&T6 z)T+5xt~s5=>5&m=;xX*XamP5oxvL9(!mu3X$3&0cOr3k!crhI(RkYaYoU~@Hb*5dA z9;WHzHqVCG6Jmh?1e#qeIS%M^ctAxcg-_4sD43r|5EtOMu+AtP(f?Sl=E~9`>jL|O zNT1I5vTi%;x$Gpm-~*dO&xWp>6_*VMd}ISJ3I#iEJYYWi`_2Pl3XOBmQau<($M~Gr z=fjo?NX0CBmjh*|*$0r!jR3!-qsnB0p5y?(PUuWYb5wwym$R(gah{Cu3D|iK!C7)B zOjO&&DsZz&4z9bd3KjF?z}gW(q`n`eR2F=59}q8JnwGTa+*Fxb&iud~(;9hzWp>V* z_|>(v+-{r0My0X}Y-sK-}fG$QA%W*8gLJ>q>$7YSV9Ik0Z>etXPQW zanNPqKOdZodNSQ$&e^KLIJ~Xe3Me6z&7V!SiS)@1TmG}s#YPTO6}n+AGV{#H#ghqo zjH}F^ncWNWa;>o$;$)nR(2pD7=wsHhKFv0^vT=qyb16R%FTMwnOhF4j3&bpUJ;i55 ze3leK*DCa4%0k+q9TXW_Uhq%G&8zQ_(o(dIN3EEmbiOHs((~n0oUMTxpD#R{8K_8S z4Js=MZVL1-q#UfGfKEV)@Db7Une7ZBqtuS03zmQow1PwN48eUwK+B<^^GOMd_MR(m z)P_2tmCYPR(l^d^ns)BDt6C&x&0FHv6)uV1ppkbiN^_(Hp05lBgP?$gkwe<(Wei+6 zaZhXVF32O?h>F;wxFBx>aatT`SCJ&BS=!+dPOA~~Kxy{ zZ^Sr4J&>&}Ix1DqX#SqKiy;AW8Z<&7A9Xi4<^)T+(NKZ|89)@oATMyTSoMO^-}8vm z47lT>h(3wWy{}62&KBW-le*PpoKIZ=_}>Q|=nSt&{(K)rexq#x}6G`K4z z;gF)jsv5a5QxSy+vx$Cm2WM~|H7ukPLL$Y!kJ$K&wvM}{c48Im91#i<`*ql1E~zJO z*7Zw9Oa`Vo$?|fP(z|75JnDF1Jdb$xWB}kzI^GP$dY+d z%#Y;fCmThmS^Xx}3@DV{&bA_l%_|FzViG{(lm}Hvq5;TvzA=d>FkyzYKT8>+Ji_`| z(hZ3lj8SeaB%V-yq&TF@Q#wt%utKDJDe^zF{BGJOv+j@D~)67--S2WU^ z4_aRlQ;>_|8s1(M&nJH4)^c1zN_mo)OE@x}!(|k$Qle(iI;C6#A`Y9#&8w$#ER9qJ zdU_Wp3^6{W8x09<2o3=WG!@PX?R8ju zcGIlzmrDQiKt?gj2W;^7=o^Zw*bEC#QMCuw)*O(;F!?1?Xp)sN>m(!*O)PlyGRC`J z36^|)l(7{$gXGA7$onR-B;kZpj7A9gJiG}Ji{TG(@(D*`XbbT=D2^3mh;;id6eLEC zeOT%oRyp%f+O2`y$jkjUIT)*N8ssfNc3p8!6>ET0?fO*@HmCQYx4-x?8^d6&wR3?+ z0)yk-X-Mi2Ko1W~{|BRv{pW(G5)LxR#wO8;hJa+N zjgPilq)^F+D%|=Mt!P+Faj^ILDCs^{+%0PsvPdbNQNSY_X(8{ps~94h1Qb`T&1WVD fY@o{Mrf>6DEbamg000000000ewJ-f300961FK(E+ literal 5564 zcmV;t6+`N$6#xK#n*abPwJ-f(f11s#0D1=*FGApoN}2~iu@Mr=g9&p8gvMY^fBgSO zs@hs3*34*PBCM08WkL3hB1ltaAJFe> z#yBMvE7fA|UEZaDSzBKM`Q@Nf2rzT;JQ*(7h7B1U9yWw%`b^ubN8+2;Hn6pX!@RKJGK!XQ`?(&+0 zyZ^oC6v8w`cE|RdT2yGkcQb!ZDXOj+x$fwkTF7Mgsx!TgnyZ^%@j12F5^Bi=pcRDydo^V&+k z8_8=Z-w^2=Rxb+)5{IAAxDpGe$?8s(d2dN`p|iVm6!#zrm_ z#zTZs4-QmK;9wCJ;u~|Or3|mZgQ^^X_@`!lr)^;e(sW}%YP zDXOvP)VfSlijA$-<;pS+2L-!f-~kOdV69*4fdv>?pnYS{+}+)S0}C`T?K^#Qcltn+ zv3|Kbbzp%87zj=++&$eZnHtx}X1t|yTJb!Q)YyV$bWSTks`ESJj0a1g1Qu4XRoeJ@ z&pDOg+DKarm54}Of6i&6gwsG%LXPrB=bTp1k>l8{vE!Ux*pOY$X@xWCE6IuH<|;u7 z?wnFcJJQqD@6KssfttrzwAq|iXcCxgPAzZ;xkA5Wb2?#0kWnYBK&EG;s5!N8b(KsB z5jLk34_>G)k>X%;ia}DW*PL2#t~sq}l4qo7+WAUSWdE(^bV74#!6I%lrx82~B^+~# zp#+S~X=A||`TI<}TFK4d9&;+eSLBCEW^DOruoWKxMz#rvkR%sK_%dg`v{mfK(n38$2hX$%8djITyU zd0kRPSDm`dbP7uAwz~JEWG^lo*jx@;8F_ zzV_+|+gk57cn%scu3SsX_w1cjwI&tC7{w{(HK`PbjDd=iz-}ZTJ<)!Kd5_@M^TzxD z9v%*NtIaysauO<7m#X4&PBF%~QtqNG(t>EB&$p?1c^IS939hl%99)cOdW?q%hvMq= z7ndg?Q#Qt_VyjLqyG$h)W}Q6^*g|Gxk#7$9>Fzq!wE+&{jJJzu$h%`Z>zd8CzSIM!MGkZCF2jxch7Rkibtn%efH5s^` zYBNsh3ZuZZ?-XTp3SqgsI?~04khuA2E~Z?ut`X_pj;%BYcS{MQQwSIvxo{OS*`i&dTNL9IMX@=Zde1QxnLD4N zz{q1*y1((B<~>cOk178R$UdyIn8-+arZ07lQ4YI6ajKUZ06 z@F|T^uq}#OEaMczTv3dT3!G08fP_vVFg9|**vL=&PL5_HjCT22Pf^RKRC6v)*|{v$ z7N%Ur&H3O+uD^HlE~C>4(^MfHG5>sFU_2bkUCljc;cHUKx~5_xbm}>G1>dSHw#ngQ zXR11sJ9ry zmMNor8`$nd;zIXmY_XPMDy%Ci$5g2-bf=iNq*0C2^nnoHnG3u>gOJ zhluM-Rx4+dOH~THQaNW%N$E`0Xr+sf0Zqn^t#@)RI=ui%k2Cbnep+Zf zxPwj`vq7hg44g4UG08W>G3eAn-M?<6HU^zqFz6J6y$zesX@$ljzuo4T`L7CRR5oK`r0P9enU zoKk@9!@%jBQq=pVb6SB-7Yi_Fx_*$LJ1NIbRol%V^W|P!&`jlXk*+&kG{?n{j-phg|tVAW;H7|7F{iN8tgRHsU2L6pZXplJR+>8 zpYTx;pGZ$RvcI1y4M1VSlAmt={nU>TJ{pg;*c3fj`ix273Btx>uKGxmss2-y=xIJa z=o*uo>xjqqLf_oJn#6tP9(YWU{ivZvqP+Dh84uZCY&m1!9+T&(SFg5k6a8uv5q=`O z8Dxt=3lF>}PtEmjqa6m|@z&WdCx-{~M`N*%Z24k8eJy7XS9E7DLrl;`hAcM&OZyS9 zB96zz!>2R%Xe{?t@!k;q?iA48Y!3q2GGOvTH{a!1ix!T_zq-XX0ojd3ipHR~s+IbT zMGR|6EJYK@X|VGNpy2E|7BcB-KdQDa_^FY1OGE^bs=TdJzRfp}18~-{x$F=@W z?k{)3`Z^hZt`1|iYU#Jd8WQEmd7OVu1x!>(p>zm>t>ta~ed^?wi~tu~461kfC2L6A zml_1%E#_^3a%1sB3@p}Cu(tay&mw#vx-Z6@MI7%z07xM3PXG^|YH`MoebzkeD|v6w zsRNV23>5Y*i#hxD0Rx*%4~8o9I%59XD(ws%Nq)@&NSN~949N2Hf_-hIrK=@K-{v1S za5UnV!{>`F_D@#u)_z}!BS3`3^6x8Q(t^eWU95(Hqao0l5RvTH)92gtm9b+r9N=4^%-Yu=cOwb zjE!96XlGcg^Q}Mxt`CmQ;O$+!NC!O^r*u6kYh}u85t3gBuUZ%G71s|C5e)h zR6-vTAn{O;BoPsj4|wts@RjM2mG8Jm8CfkYO2<571P_ir5y>RWDr?94@;Y z#`AFVE!S3c2#qfsMT10@BO$3a=-frt%>@=mU?Uo?43wWha(Oa}(zuqP03LvsWkTYp z0vrw?>=o$6pE1h4On~CfB9>x(Y6n{DdjdL>Evm>u^@-r9`UxpEC}fT&mr~%mk^$_Z zac=#0Y$wc?KC|AC5{a@>$upj7kis#miw~oJ6w`Ewkm7_)db@F##(KH2Oo!!sw2$$JERY%}(nT1Rmy)2Xy=F8!fUcSd zZB1vrE2SzI5Cr4fGU58(bvHx~b6_CZvfSWzbbif2&*^iPvic(Efdb>7ot|8@K_k`+S>(9KA}d)6xWkHt^agy@ zi8Xiv14HnK%FRHsxa1$AU|Mr1m#*|L3#-;kBuKp~l`Z^dyRwRwSEN8MyYm0d`$w`R z${H+u*sg@D+JXRTDt-bWfAwSGj6q8J!m2Su0s!Ksre4pz;+oS*upY?)O+1HvIWEKp z^6u)|PWYw6w3z78o2mK>TP~&}Lw+sB4F^q_YaM78q@2k3NcwXjE<+?$0D(3ZOTMEy zcmtqb&_<`%GQzfm6m%PQ97sne5YhfvFLNd7taS(bgG-;<__EGB>qT~uqt3(f(6glL zvd87j03EsVMYX|>gA166{e7pCaAxD&Lst)p(Tsb}tM{>Lrm;pJ2-cdk!H>I8r^Q*oAh+8NdHSOp$7NyBy0Rl#Di99Sbo z5P9TBlgh#mt_9+GOJ|8hI(Jd#;WK4G!NiWdnKD6Ua)8=Ag^9%_a|Xs~l5HhVIca+J zUAAuS01$WiEwTkbkoEuA;JQ+9#%*GcjAz6?m30pB^c=bf{HK8?BAHD0o^!TpFb;jI zwt`CtbK^&oEqMB5RV_bB=`|yVsS4dN7nylxmD!L+Xx?SGLB>@O2PsAsMFQvB z5RCx=0tjOK$Pg6p0ziV42sMPZs>DiU^bV+_P1Q&elW;Xx?h+~ygwUBfwI$I;>$duS{f2;^Oz5Qsfvyz zK=Olr<1)c5R|7Rh)1qNp97R2>uSvo`&pA1m&&2D>DV=d#GyP(yV`pbK7ACzr z!AFHelxjU7qR`!P`bdJ&fm4(*-L%p4Q=-ORCgBK@okW?=*W?a!=k!yg>hU8g_q~gC z{9@7{wfhb=Tuw?*xcV7U`C1gE5Ky4rAL$3nf0|xTNz|pNkc39g&s0Q-JA*_&wZj>j zM-2)2w2(+~?;|$sL%8Bvq#OZPqps)716(y3^W1vQ*)g4RNzYV=PBo&P1C+T}~ni7d^NV!la!F4-uBnpJI5&48iowr?xa z*u1jhC?;_lr!1&KN)5p71s#)k$zwAl|14FA^62YhNiZaCFh;qxlXyb;k?@c%PwzDC zunLh5tc-Bzd&Fb7iF(TXe?S)9c&10qMq=;R)-=&&LM9r5k?mK6jxQEq^7anZNrMu! z?&3if)pik16;o z7>y9|d3YN#7Q>&#;}ed=&=%r#P!cQ15b5?^C`gPN`>@nGta9d|)LR4Fk(VQEawJyY zG|XFo=(^&ZG}ZvA=Jl%|1A7Q5+EXc~vG9TQ3IWM)8^30^NY#@MRTcHgSkbUX;$YAH zD5XADiCb0{vPi``qZmfC(n8*GS2Bc}1lHHDIlH)(qKDRMFm@D}bp#Cn00000001bp KFa00@0RR93v4Kkf diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-38581d1a-27f8-452f-9678-75225dfc64ab.idx/.index.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-38581d1a-27f8-452f-9678-75225dfc64ab.idx/.index.crc deleted file mode 100644 index cfd3665d8ceabc092c98b6f5c6fe4bd7b52304da..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}BiCaMf!76XFB` diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-38581d1a-27f8-452f-9678-75225dfc64ab.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-38581d1a-27f8-452f-9678-75225dfc64ab.idx/.metadata.json.gz.crc deleted file mode 100644 index 3bc8112b6871aabbd3633665f898363eeb350f6a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}6Zmsl6Kj5@Z8R diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-38581d1a-27f8-452f-9678-75225dfc64ab.idx/index b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-38581d1a-27f8-452f-9678-75225dfc64ab.idx/index deleted file mode 100644 index 3b6113f78c85dfd6b45c84ec875c1077703f8d14..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 110 zcmZ=`U|_HVVvVi(e--S`GBSK&VqjolNzN!TT;$Kh?C9*w=*Vz9kcr6=$PjP*ctM+q r(VfxJxNZAIAS;B?-IrN}g{kS$1Oo-2DZ((5gl94^Ffsy7LNy2go#`1X diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-38581d1a-27f8-452f-9678-75225dfc64ab.idx/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-38581d1a-27f8-452f-9678-75225dfc64ab.idx/metadata.json.gz deleted file mode 100644 index 2b31e5a9da3dd6974a7cd3d6ab3f0047d619cd9b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 184 zcmV;p07w5HiwFP!0000009B5`3c@f9hTkP8g&u~EIeQa94+@Hk7x7SQwiZh(Z3dRH zcQ^CqB?SI2-=7{D>+p&(+2BD|RZ-Rg9#MI1tZz3;Q-~nTVp$0=Xj?-dX98ZBFZqoi z;Y_j9)VrX&B=_T-Y)@-5uctyehYpim#i;U)K{nn85TLvdq=+m3C1d7ym=iY&u>Ogk mqawq=Z|5|IK!>JD$Zsr|QN-3qN607rFnj?OTpFOy0000svrTUR diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630.idx/.index.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630.idx/.index.crc new file mode 100644 index 0000000000000000000000000000000000000000..7013243ff9fd1b59dcb832d14eddcd98d1c6b955 GIT binary patch literal 12 TcmYc;N@ieSU}Bhf^4Jdm6r%)F literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630.idx/.metadata.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..359650e81674052c8db801f7ef6696b142f31b71 GIT binary patch literal 12 TcmYc;N@ieSU}AVWt7<<06qN(> literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630.idx/index b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630.idx/index new file mode 100644 index 0000000000000000000000000000000000000000..2196f12697914ab3e1fb7705a6b30e48c677e29d GIT binary patch literal 111 zcmZ=~U|_HVVvVi(e--SmGBSK&VqjolNzN!TT;$Kh?C9*w=*Vz9kcr6=$dGOPctM+q s(VfxJzHR$OAS;B?J%*Wu$$#OD_A>?wKx2eqMhVYkU|?hf8ii^S0NR=wC;$Ke literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630.idx/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/index/part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630.idx/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..521ca22d19aaad9700d820d725fb93cc43d0d61f GIT binary patch literal 184 zcmV;p07w5HiwFP!0000009B5`3c@fDME_+^3ON+5wdN**9uyQ6FXADs+a?$iNw#1p z{dc$CybJ>~`(}G+jKv#OntTA6XRAVhM$>8nITP^0bWLvr z2^R`oO}+JX7v*8p$?m+-^KvSbwP-QQbqFfmxyZ!F00NYAo)q!qKV;0@9#iB+0meP? mb5f+a@bjDo@9Eey5&4bff=R^Gsw3o+ei**=>PSz|0002eL{k6& literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/metadata.json.gz index 5a194f66847ba175afe52c6732e2aaf0a17b7d27..50f4a7346adcfd7e8d0f7dfb335c2ad21ecf6d74 100644 GIT binary patch delta 366 zcmV-!0g?Xg0{sGz7=Jg>k~i5vL?2`uzLZk3^lpo$Nl0!TW%R$Bc4^1N2VdIe-0z&+ z-sD7fQbF+z&P$^Sdw87BQc4b7s*6abli4JhrFQ_lOq1!9Cn6z~IM{&$Ex;EJRAD(D z0(?;ef_xj|v9S;;n{IL1E3@T_tG_q@ zC+Bz*#XP6AEjb3m8Ze7tf-B_#Su_sW`&yP!Yx7a{yt1)3&(7I@ZJW3B3mTRwBWh2> z2B5&_)S2V(3w?#Xb0PK>t#Q*>R delta 350 zcmV-k0ipi=0__5j7=Js_k~i5vL?2`mUq&fedbdW?CM36xQu5zTYrENj;7e)G`OfL( zCN0@Z1=$x^Cv`>G-Tfj?C@Hv9XOYZj@hpmy8vvds(R|J$5fMr(?7>1Mz-1OxW;hlB zKC1yiz6M@zQkBrwSJ_EdVbST}~CBrrw7|3SAmqL4}U6mi+9n zf30}g?RNdXT33$9#(?OL=X53`?}Mt3|IhVs2pMisVz>;M(Dg9GQ1Cih%ig&x8~N>R w+Vxm%An-70IEh1MO6Ag_tOTm&85+n53)=+mL)(z=uT>xQ-+IZz56=Ps0DL&G5&!@I diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/rows/.metadata.json.gz.crc index cefb7d4b9de03afa17451d74601f862b9d68b5d0..b87073cd48685faf47474df6d566664ca194d234 100644 GIT binary patch literal 16 XcmYc;N@ieSU}9)d(oL=m`Tht1ASVSE literal 16 XcmYc;N@ieSU}BJZ?EQATnZgPHBsv8p diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/rows/metadata.json.gz index 604320d847c286c9ab4b7f8216a0345b556cc3a9..a63aaa6698f0300b757e83fa52c499ca5e2ee4f7 100644 GIT binary patch literal 621 zcmV-z0+Rh7iwFP!000000Nqy6ZrU&u{TDxNTGAyU1e&+PScOzg6I6Rxg(4H*fZO0i zj+x3({(aX@2_#`ssnnP90OZ``d(W|t6z_?k8z3Q>3I@->X1~78bBH}+k?09_Bzz!g z7HBzuybG%YGM%nWY!5L#seH?Ukke zpb4(?*YYquU?z?d(w|hV5x=_nYUmnGfhHz&uS()+AQCPM6>;`l9dqDY82CoSj$EvS z@0jA!%95l2RV$%FWSEs|^~sE~Je$O$l@lY4fN2Al#ngl{p!ctfPSzYdqBNj0DfW*V$ZStYu~q*12(jsfy=z5 z>(HR*n47}mjEWVEvaMix{hr`EP%3S#x>w+zD#JK6UP~9{95PHo*Kz%zJYGF;{>^xd zlpBI)PC&3%HoA8Et{V(nz3!27YdUcer(1^UHrmiU)YwE1(MYN7P^F}vRhIt%rfX%l HItc&(-oPjy literal 619 zcmV-x0+jt9iwFP!000000NqwyZ`v>v{V#snw4`eS32EMdu?nf0CaCtX3PmQq0k^?H zj+x4k|NE|!67s>MQmHTH0m!+>_nu=PDLxQEH$Xx%6%3w%&3=8GZy|PxMWQFzlkkD0 zS-i{fnWibEkdph zHK;C@ZQ7V#EVME7=fzON%ANWCbUPcbd%@>QX*nN>T&&tE%UZ=~i|b133Twz(?vey# z>p1QvBgYL!r;3Ut$6sm%vX za1+3mhv^Y3@+cwwN!1$htE;aC-JmJZq)6SXqF4_^!ey=^&YsI-_WKqlzLBvL7c1dA z##mcflH{N&B~*wEvr?|E&CvR65>HlYMj8P#1}=?lrK+Q;rRIYKP=ORQ{eii8TcC=0 z62$ok6^V@!o~0WC&lARZIVGgSuXk@SJyzRT~`tqcnv+^G!A2GbR zX>WWt6?<^non{wJO!F<3>N_{@e!LEt2m9LY1zzAVhuZES3GBY-Ciaa#2<(C9x*khn zzfYINRpD_)#R^8*PB63nK=2(Xl`&RcEbvd6VVoMTrGs*g8784~?eu+px_jXK;dqRc zA;B{zAlNG#-8i1(_(7}JJ#ub~PF%$4j$yhBZD<}UY+{BOL>oI)DQRYD{STD=4M#f( F004Q(D3AaE diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/rows/parts/.part-0-38581d1a-27f8-452f-9678-75225dfc64ab.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/rows/parts/.part-0-38581d1a-27f8-452f-9678-75225dfc64ab.crc deleted file mode 100644 index b9ef33653c510e8296c5690159dac303b4f698d4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D(Kzwk5w5~c%j diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/rows/parts/.part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/rows/parts/.part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630.crc new file mode 100644 index 0000000000000000000000000000000000000000..2a7dde15ed6ca272aa3baaef8081bf56e1baa88e GIT binary patch literal 12 TcmYc;N@ieSU}E?);n-II6`=%z literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/rows/parts/part-0-38581d1a-27f8-452f-9678-75225dfc64ab b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/rows/parts/part-0-38581d1a-27f8-452f-9678-75225dfc64ab deleted file mode 100644 index 963999a2ef1ae25a21bc7f7eed2044621d76e702..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 118 zcmYdfU|`4qVvVi(e-$#8GBI3XW@JguC^B5+&&2HL?9AxM#Kg?Vz`)3GJdlaWk%5Vk zfr){UvGL;tZ6-!{hPLe&fpiF?J4hHP&cMN-5aZ9ZYiAhi=VG?=7cAGWJDkJB3$y?P Kz?J}wVgLZ!ff@J! diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/rows/parts/part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630 b/hail_search/fixtures/GRCh38/SNV_INDEL/lookup.ht/rows/parts/part-0-7a236e4f-7c20-4944-b7d8-071d2b10a630 new file mode 100644 index 0000000000000000000000000000000000000000..1d7128c9f7ba35bd412330fdb4772c2bb6278c44 GIT binary patch literal 123 zcmYdeU|^^MVvVi(e-*0EGBIpnW@KPV&L}cm~Px$iT$N zz{J4F*!b~+HWQ;eBV*h4i$F#Qh{?#n&EPD-aENhLKzVauvTJO^BxjAw_OqE(nNFON P;04+O0$`hf#xVc@Zp#}b literal 0 HcmV?d00001 diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 66b6334c95..bef76459b4 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -287,7 +287,7 @@ def _parse_sample_data(self, sample_data): project_samples = defaultdict(lambda: defaultdict(lambda: defaultdict(list))) for s in sample_data: families.add(s['family_guid']) - project_samples[s['project_guid']][s['family_guid']][s['sample_type']].append(s) + project_samples[s['project_guid']][s['sample_type']][s['family_guid']].append(s) num_families = len(families) logger.info(f'Loading {self.DATA_TYPE} data for {num_families} families in {len(project_samples)} projects') @@ -297,13 +297,12 @@ def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, n_ if len(project_samples) == 1: project_guid = list(project_samples.keys())[0] # for variant lookup, project_samples looks like - # {: {: {: True}, {: {: True}}, : ...} + # {: {: {: True}, : {: True}}, : ...} # for variant search, project_samples looks like - # {: {: {: [, , ...], : ...}, : ...}, : ...} - first_family_samples = list(project_samples[project_guid].values())[0] - sample_type = list(first_family_samples.keys())[0] + # {: {: {: [, , ...]}, : {: []} ...}, : ...} + sample_type = list(project_samples[project_guid].keys())[0] project_ht = self._read_table(f'projects/{sample_type}/{project_guid}.ht', use_ssd_dir=True) - return self._filter_entries_table(project_ht, project_samples[project_guid], **kwargs) + return self._filter_entries_table(project_ht, project_samples[project_guid][sample_type], **kwargs) # Need to chunk tables or else evaluating table globals throws LineTooLong exception # However, minimizing number of chunks minimizes number of aggregations/ evals and improves performance @@ -314,14 +313,13 @@ def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, n_ project_hts = [] sample_data = {} for project_guid, project_sample_data in project_samples.items(): - first_family_samples = list(project_sample_data.values())[0] - sample_type = list(first_family_samples.keys())[0] + sample_type = list(project_sample_data.keys())[0] project_ht = self._read_table(f'projects/{sample_type}/{project_guid}.ht', use_ssd_dir=True) if project_ht is None: continue project_hts.append(project_ht.select_globals('sample_type', 'family_guids', 'family_samples')) - sample_data.update(project_sample_data) + sample_data.update(project_sample_data[sample_type]) if len(project_hts) >= chunk_size: self._filter_merged_project_hts( @@ -342,14 +340,14 @@ def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, n_ def import_filtered_table(self, project_samples, num_families, **kwargs): if num_families == 1: family_sample_data = list(project_samples.values())[0] - family_guid = list(family_sample_data.keys())[0] - sample_type = list(family_sample_data[family_guid].keys())[0] + sample_type = list(family_sample_data.keys())[0] + family_guid = list(family_sample_data[sample_type].keys())[0] family_ht = self._read_table(f'families/{sample_type}/{family_guid}.ht', use_ssd_dir=True) family_ht = family_ht.transmute(family_entries=[family_ht.entries]) family_ht = family_ht.annotate_globals( family_guids=[family_guid], family_samples={family_guid: family_ht.sample_ids}, ) - families_ht, comp_het_families_ht = self._filter_entries_table(family_ht, family_sample_data, **kwargs) + families_ht, comp_het_families_ht = self._filter_entries_table(family_ht, family_sample_data[sample_type], **kwargs) else: families_ht, comp_het_families_ht = self._load_filtered_project_hts(project_samples, **kwargs) @@ -399,14 +397,6 @@ def _merge_project_hts(project_hts, n_partitions, include_all_globals=False): def _filter_entries_table(self, ht, sample_data, inheritance_filter=None, quality_filter=None, **kwargs): ht = self._prefilter_entries_table(ht, **kwargs) - # Temporarily reset sample_data until full blended eS/GS support is added - for family_guid, samples_by_sample_type in sample_data.items(): - if isinstance(list(samples_by_sample_type.values())[0], list): - samples = [s for samples in samples_by_sample_type.values() for s in samples] - sample_data[family_guid] = samples - else: - sample_data[family_guid] = True - ht, sorted_family_sample_data = self._add_entry_sample_families(ht, sample_data) passes_quality_filter = self._get_family_passes_quality_filter(quality_filter, ht, **kwargs) diff --git a/hail_search/queries/mito.py b/hail_search/queries/mito.py index cad6a0b3d7..90436bea27 100644 --- a/hail_search/queries/mito.py +++ b/hail_search/queries/mito.py @@ -1,4 +1,4 @@ -import os +from collections import defaultdict from aiohttp.web import HTTPNotFound import hail as hl @@ -310,34 +310,29 @@ def _gene_rank_sort(cls, r, gene_ranks): def _add_project_lookup_data(self, ht, annotation_fields, *args, **kwargs): # Get all the project-families for the looked up variant formatted as a dict of dicts: - # {: {: {: True}, {: {: True}}, : ...} + # {: {: {: True}, : {: True}}, : ...} lookup_ht = self._read_table('lookup.ht', use_ssd_dir=True, skip_missing_field='project_stats') if lookup_ht is None: raise HTTPNotFound() variant_projects = lookup_ht.aggregate(hl.agg.take( hl.dict(hl.enumerate(lookup_ht.project_stats).starmap(lambda i, ps: ( - lookup_ht.project_guids[i], + lookup_ht.project_sample_types[i], hl.enumerate(ps).starmap( lambda j, s: hl.or_missing(self._stat_has_non_ref(s), j) ).filter(hl.is_defined), )).filter( lambda x: x[1].any(hl.is_defined) - ).starmap(lambda project_guid, family_indices: ( - project_guid, - hl.dict(family_indices.map(lambda j: (lookup_ht.project_families[project_guid][j], True))), - ))), 1), + ).starmap(lambda project_key, family_indices: ( + project_key, + hl.dict(family_indices.map(lambda j: (lookup_ht.project_families[project_key][j], True))), + )).group_by( + lambda x: x[0][0] + ).map_values( + lambda project_data: hl.dict(project_data.starmap( + lambda project_key, families: (project_key[1], families) + )))), 1) )[0] - for project_guid, families in variant_projects.items(): - # Temporarily use try/except to determine sample_type, to be removed when lookup table contains sample_type - try: - hl.read_table(self._get_table_path(f'projects/WES/{project_guid}.ht', use_ssd_dir=True)) - sample_type = 'WES' - except Exception: - sample_type = 'WGS' - for family_guid, value in families.items(): - families[family_guid] = {sample_type: value} - # Variant can be present in the lookup table with only ref calls, so is still not present in any projects if not variant_projects: raise HTTPNotFound() From af8e501f1ac4f60d6868b39dc3098d93639de0f2 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 23 Aug 2024 10:52:54 -0400 Subject: [PATCH 662/736] pass through missing props --- ui/shared/components/panel/search/VariantSearchResults.jsx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ui/shared/components/panel/search/VariantSearchResults.jsx b/ui/shared/components/panel/search/VariantSearchResults.jsx index 93ae1e81b8..20eaac969b 100644 --- a/ui/shared/components/panel/search/VariantSearchResults.jsx +++ b/ui/shared/components/panel/search/VariantSearchResults.jsx @@ -46,7 +46,7 @@ DisplayVariants.propTypes = { const BaseVariantSearchResultsContent = React.memo(({ match, variantSearchDisplay, searchedVariantExportConfig, totalVariantsCount, additionalDisplayEdit, - displayVariants, compoundHetToggle, + displayVariants, compoundHetToggle, ...props }) => { const { searchHash } = match.params const { page = 1, recordsPerPage } = variantSearchDisplay @@ -61,7 +61,7 @@ const BaseVariantSearchResultsContent = React.memo(({
{additionalDisplayEdit} - + {searchedVariantExportConfig && 1000} />} @@ -71,7 +71,7 @@ const BaseVariantSearchResultsContent = React.memo(({ , - + From 540cab3dc03110daad571ff13de8b3d04ed88911 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 23 Aug 2024 11:30:47 -0400 Subject: [PATCH 663/736] do not change dropdown value on blur --- ui/shared/components/form/Inputs.jsx | 1 + 1 file changed, 1 insertion(+) diff --git a/ui/shared/components/form/Inputs.jsx b/ui/shared/components/form/Inputs.jsx index 4c27b529da..4dc492ca38 100644 --- a/ui/shared/components/form/Inputs.jsx +++ b/ui/shared/components/form/Inputs.jsx @@ -138,6 +138,7 @@ export const Dropdown = React.memo(({ options, includeCategories, ...props }) => inputType="Dropdown" options={processOptions(options, includeCategories)} noResultsMessage={null} + selectOnBlur={false} tabIndex="0" /> )) From eb943cbdf66d57fc119574abcc8c273ab8f1def1 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 26 Aug 2024 11:13:48 -0400 Subject: [PATCH 664/736] validating vcf works with local files --- seqr/utils/file_utils.py | 7 ++++ seqr/utils/vcf_utils.py | 4 +- seqr/views/apis/data_manager_api_tests.py | 45 ++++++++++++++++++++++- 3 files changed, 53 insertions(+), 3 deletions(-) diff --git a/seqr/utils/file_utils.py b/seqr/utils/file_utils.py index ee5b0835bd..76e1258da0 100644 --- a/seqr/utils/file_utils.py +++ b/seqr/utils/file_utils.py @@ -1,3 +1,4 @@ +import glob import gzip import os import subprocess # nosec @@ -47,6 +48,12 @@ def does_file_exist(file_path, user=None): return os.path.isfile(file_path) +def list_files(wildcard_path, user): + if is_google_bucket_file_path(wildcard_path): + return get_gs_file_list(wildcard_path, user, check_subfolders=False, allow_missing=True) + return [file_path for file_path in glob.glob(wildcard_path) if os.path.isfile(file_path)] + + def file_iter(file_path, byte_range=None, raw_content=False, user=None, **kwargs): if is_google_bucket_file_path(file_path): for line in _google_bucket_file_iter(file_path, byte_range=byte_range, raw_content=raw_content, user=user, **kwargs): diff --git a/seqr/utils/vcf_utils.py b/seqr/utils/vcf_utils.py index 92f9bdd750..7a421db930 100644 --- a/seqr/utils/vcf_utils.py +++ b/seqr/utils/vcf_utils.py @@ -3,7 +3,7 @@ from collections import defaultdict from seqr.utils.middleware import ErrorsWarningsException -from seqr.utils.file_utils import file_iter, does_file_exist, get_gs_file_list +from seqr.utils.file_utils import file_iter, does_file_exist, list_files from seqr.utils.search.constants import VCF_FILE_EXTENSIONS BLOCK_SIZE = 65536 @@ -97,7 +97,7 @@ def validate_vcf_exists(data_path, user, path_name=None, allowed_exts=None): file_to_check = None if '*' in data_path: - files = get_gs_file_list(data_path, user, check_subfolders=False, allow_missing=True) + files = list_files(data_path, user) if files: file_to_check = files[0] elif does_file_exist(data_path, user=user): diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index 68fbcf59d2..0372d9a155 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -1431,11 +1431,15 @@ def _test_write_success(self, success_path, url, mock_subprocess, mock_open, pro ] mock_subprocess.assert_has_calls(expected_calls) + @mock.patch('seqr.utils.file_utils.os.path.isfile') + @mock.patch('seqr.utils.file_utils.glob.glob') @mock.patch('seqr.utils.file_utils.subprocess.Popen') - def test_validate_callset(self, mock_subprocess): + def test_validate_callset(self, mock_subprocess, mock_glob, mock_os_isfile): url = reverse(validate_callset) self.check_pm_login(url) + mock_os_isfile.return_value = False + mock_glob.return_value = [] mock_subprocess.return_value.wait.return_value = -1 mock_subprocess.return_value.stdout = [b'File not found'] body = {'filePath': 'gs://test_bucket/mito_callset.mt', 'datasetType': 'SV'} @@ -1455,6 +1459,45 @@ def test_validate_callset(self, mock_subprocess): self.assertEqual(response.status_code, 200) self.assertDictEqual(response.json(), {'success': True}) + body['filePath'] = body['filePath'].replace('gs://test_bucket', '/local_dir') + response = self.client.post(url, content_type='application/json', data=json.dumps(body)) + self.assertEqual(response.status_code, 400) + self.assertListEqual(response.json()['errors'], ['Data file or path /local_dir/mito_callset.mt is not found.']) + + mock_os_isfile.return_value = True + response = self.client.post(url, content_type='application/json', data=json.dumps(body)) + self.assertEqual(response.status_code, 200) + self.assertDictEqual(response.json(), {'success': True}) + + mock_subprocess.return_value.communicate.return_value = ( + b'', b'CommandException: One or more URLs matched no objects.', + ) + body = {'filePath': 'gs://test_bucket/sharded_vcf/part0*.vcf', 'datasetType': 'SNV_INDEL'} + response = self.client.post(url, content_type='application/json', data=json.dumps(body)) + self.assertEqual(response.status_code, 400) + self.assertListEqual( + response.json()['errors'], ['Data file or path gs://test_bucket/sharded_vcf/part0*.vcf is not found.'], + ) + + mock_subprocess.return_value.communicate.return_value = ( + b'gs://test_bucket/sharded_vcf/part001.vcf\ngs://test_bucket/sharded_vcf/part002.vcf\n', b'', + ) + response = self.client.post(url, content_type='application/json', data=json.dumps(body)) + self.assertEqual(response.status_code, 200) + self.assertDictEqual(response.json(), {'success': True}) + + body['filePath'] = body['filePath'].replace('gs://test_bucket', '/local_dir') + response = self.client.post(url, content_type='application/json', data=json.dumps(body)) + self.assertEqual(response.status_code, 400) + self.assertListEqual( + response.json()['errors'], ['Data file or path /local_dir/sharded_vcf/part0*.vcf is not found.'], + ) + + mock_glob.return_value = ['/local_dir/sharded_vcf/part001.vcf', '/local_dir/sharded_vcf/part002.vcf'] + response = self.client.post(url, content_type='application/json', data=json.dumps(body)) + self.assertEqual(response.status_code, 200) + self.assertDictEqual(response.json(), {'success': True}) + # test data manager access self.login_data_manager_user() response = self.client.post(url, content_type='application/json', data=json.dumps(body)) From 00ed22518eec3f79a195fe6743a2e77481bf7eb7 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 26 Aug 2024 11:40:00 -0400 Subject: [PATCH 665/736] handle conditional check for airtable --- seqr/views/apis/anvil_workspace_api_tests.py | 10 +++------- seqr/views/apis/data_manager_api.py | 7 +++++-- seqr/views/utils/airtable_utils.py | 7 +++++++ seqr/views/utils/test_utils.py | 7 +++++++ 4 files changed, 22 insertions(+), 9 deletions(-) diff --git a/seqr/views/apis/anvil_workspace_api_tests.py b/seqr/views/apis/anvil_workspace_api_tests.py index e45e29f093..fb8a2639b6 100644 --- a/seqr/views/apis/anvil_workspace_api_tests.py +++ b/seqr/views/apis/anvil_workspace_api_tests.py @@ -8,7 +8,7 @@ from seqr.models import Project, Family, Individual from seqr.views.apis.anvil_workspace_api import anvil_workspace_page, create_project_from_workspace, \ validate_anvil_vcf, grant_workspace_access, add_workspace_data, get_anvil_vcf_list, get_anvil_igv_options -from seqr.views.utils.test_utils import AnvilAuthenticationTestCase, AuthenticationTestCase, AirflowTestCase, \ +from seqr.views.utils.test_utils import AnvilAuthenticationTestCase, AuthenticationTestCase, AirflowTestCase, AirtableTest, \ TEST_WORKSPACE_NAMESPACE, TEST_WORKSPACE_NAME, TEST_WORKSPACE_NAME1, TEST_NO_PROJECT_WORKSPACE_NAME, TEST_NO_PROJECT_WORKSPACE_NAME2 from seqr.views.utils.terra_api_utils import remove_token, TerraAPIException, TerraRefreshTokenFailedException from settings import SEQR_SLACK_ANVIL_DATA_LOADING_CHANNEL, SEQR_SLACK_LOADING_NOTIFICATION_CHANNEL @@ -67,7 +67,6 @@ TEMP_PATH = '/temp_path/temp_filename' MOCK_AIRTABLE_URL = 'http://testairtable' -MOCK_AIRTABLE_KEY = 'mock_key' # nosec PROJECT1_SAMPLES = ['HG00735', 'NA19678', 'NA20870', 'HG00732', 'NA19675_1', 'NA20874', 'HG00733', 'HG00731'] PROJECT2_SAMPLES = ['NA20885', 'NA19675_1', 'NA19678', 'HG00735'] @@ -484,7 +483,7 @@ def _test_get_workspace_files(self, url, response_key, expected_files, mock_subp ]) -class LoadAnvilDataAPITest(AirflowTestCase): +class LoadAnvilDataAPITest(AirflowTestCase, AirtableTest): fixtures = ['users', 'social_auth', 'reference_data', '1kg_project'] LOADING_PROJECT_GUID = f'P_{TEST_NO_PROJECT_WORKSPACE_NAME}' @@ -509,9 +508,6 @@ def _get_dag_variable_overrides(additional_tasks_check): def setUp(self): # Set up api responses responses.add(responses.POST, f'{MOCK_AIRTABLE_URL}/appUelDNM3BnWaR7M/AnVIL%20Seqr%20Loading%20Requests%20Tracking', status=400) - patcher = mock.patch('seqr.views.utils.airtable_utils.AIRTABLE_API_KEY', MOCK_AIRTABLE_KEY) - patcher.start() - self.addCleanup(patcher.stop) patcher = mock.patch('seqr.views.utils.airtable_utils.AIRTABLE_URL', MOCK_AIRTABLE_URL) patcher.start() self.addCleanup(patcher.stop) @@ -777,7 +773,7 @@ def _assert_valid_operation(self, project, test_add_data=True): 'Number of Samples': 8 if test_add_data else 3, 'Status': 'Loading', }}]}) - self.assertEqual(responses.calls[-1].request.headers['Authorization'], 'Bearer {}'.format(MOCK_AIRTABLE_KEY)) + self.assert_expected_airtable_headers(-1) dag_json = { 'projects_to_run': [project.guid], diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index 1ad54f5d99..2b85754116 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -471,12 +471,15 @@ def get_loaded_projects(request, sample_type, dataset_type): projects = get_internal_projects().filter(is_demo=False) project_samples = None if dataset_type == Sample.DATASET_TYPE_VARIANT_CALLS: - project_samples = _fetch_airtable_loadable_project_samples(request.user) - projects = projects.filter(guid__in=project_samples.keys()) + import pdb; pdb.set_trace() + if AirtableSession.is_airtable_enabled(): + project_samples = _fetch_airtable_loadable_project_samples(request.user) + projects = projects.filter(guid__in=project_samples.keys()) exclude_sample_type = Sample.SAMPLE_TYPE_WES if sample_type == Sample.SAMPLE_TYPE_WGS else Sample.SAMPLE_TYPE_WGS # Include projects with either the matched sample type OR with no loaded data projects = projects.exclude(family__individual__sample__sample_type=exclude_sample_type) else: + # All other data types can only be loaded to projects which already have loaded data projects = projects.filter(family__individual__sample__sample_type=sample_type) projects = projects.distinct().order_by('name').values('name', projectGuid=F('guid'), dataTypeLastLoaded=Max( diff --git a/seqr/views/utils/airtable_utils.py b/seqr/views/utils/airtable_utils.py index eb1a4f8d1b..3330006ebd 100644 --- a/seqr/views/utils/airtable_utils.py +++ b/seqr/views/utils/airtable_utils.py @@ -24,7 +24,14 @@ class AirtableSession(object): ANVIL_BASE: 'appUelDNM3BnWaR7M', } + @staticmethod + def is_airtable_enabled(): + return bool(AIRTABLE_API_KEY) + def __init__(self, user, base=RDG_BASE, no_auth=False): + if not self.is_airtable_enabled(): + raise ValueError('Airtable is not configured') + self._user = user if not no_auth: self._check_user_access(base) diff --git a/seqr/views/utils/test_utils.py b/seqr/views/utils/test_utils.py index 6e79acc07f..9081803c76 100644 --- a/seqr/views/utils/test_utils.py +++ b/seqr/views/utils/test_utils.py @@ -547,6 +547,8 @@ def setUp(self): self.mock_get_group_members = patcher.start() self.mock_get_group_members.side_effect = get_group_members_side_effect self.addCleanup(patcher.stop) + patcher = mock.patch('seqr.views.utils.airtable_utils.AIRTABLE_API_KEY', MOCK_AIRTABLE_KEY) + patcher.start() super(AnvilAuthenticationTestCase, self).setUp() @classmethod @@ -565,6 +567,7 @@ def assert_no_extra_anvil_calls(self): MOCK_AIRFLOW_URL = 'http://testairflowserver' +MOCK_AIRTABLE_KEY = 'airflow_access' DAG_NAME = 'LOADING_PIPELINE' PROJECT_GUID = 'R0001_1kg' @@ -720,6 +723,10 @@ def assert_expected_airtable_call(self, call_index, filter_formula, fields, addi expected_params.update(additional_params) self.assertDictEqual(responses.calls[call_index].request.params, expected_params) self.assertListEqual(self._get_list_param(responses.calls[call_index].request, 'fields%5B%5D'), fields) + self.assert_expected_airtable_headers(call_index) + + def assert_expected_airtable_headers(self, call_index): + self.assertEqual(responses.calls[call_index].request.headers['Authorization'], f'Bearer {MOCK_AIRTABLE_KEY}') @staticmethod def _get_list_param(call, param): From 2e8d919f00d9b660bd1aac1fe89c98fd403b39e9 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 26 Aug 2024 11:48:06 -0400 Subject: [PATCH 666/736] clean up test mocking --- seqr/views/apis/data_manager_api.py | 6 ++---- seqr/views/apis/report_api_tests.py | 2 ++ seqr/views/apis/summary_data_api_tests.py | 3 +-- seqr/views/utils/test_utils.py | 1 + 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index 2b85754116..a7f4ca7092 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -471,10 +471,8 @@ def get_loaded_projects(request, sample_type, dataset_type): projects = get_internal_projects().filter(is_demo=False) project_samples = None if dataset_type == Sample.DATASET_TYPE_VARIANT_CALLS: - import pdb; pdb.set_trace() - if AirtableSession.is_airtable_enabled(): - project_samples = _fetch_airtable_loadable_project_samples(request.user) - projects = projects.filter(guid__in=project_samples.keys()) + project_samples = _fetch_airtable_loadable_project_samples(request.user) + projects = projects.filter(guid__in=project_samples.keys()) exclude_sample_type = Sample.SAMPLE_TYPE_WES if sample_type == Sample.SAMPLE_TYPE_WGS else Sample.SAMPLE_TYPE_WGS # Include projects with either the matched sample type OR with no loaded data projects = projects.exclude(family__individual__sample__sample_type=exclude_sample_type) diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index 9fa87646cd..ace09f91f4 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -1443,6 +1443,8 @@ def test_variant_metadata(self): class LocalReportAPITest(AuthenticationTestCase, ReportAPITest): + # TODO tests failing for non-local reports + fixtures = ['users', '1kg_project', 'reference_data', 'report_variants'] ADDITIONAL_FAMILIES = ['F000014_14'] ADDITIONAL_FINDINGS = ['NA21234_1_248367227'] diff --git a/seqr/views/apis/summary_data_api_tests.py b/seqr/views/apis/summary_data_api_tests.py index 200c25eab6..057d598a8f 100644 --- a/seqr/views/apis/summary_data_api_tests.py +++ b/seqr/views/apis/summary_data_api_tests.py @@ -581,7 +581,6 @@ def _has_expected_metadata_response(self, response, expected_individuals, has_ai self.assertEqual(len([r['participant_id'] for r in response_json['rows'] if r['participant_id'] == 'NA20888']), 2) @mock.patch('seqr.views.utils.airtable_utils.MAX_OR_FILTERS', 2) - @mock.patch('seqr.views.utils.airtable_utils.AIRTABLE_API_KEY', 'mock_key') @mock.patch('seqr.views.utils.airtable_utils.is_google_authenticated') @responses.activate def test_sample_metadata_export(self, mock_google_authenticated): @@ -655,6 +654,7 @@ def test_sample_metadata_export(self, mock_google_authenticated): # Test invalid airtable responses response = self.client.get(include_airtable_url) + # TODO failing for local self.assertEqual(response.status_code, 403) self.assertEqual(response.json()['error'], 'Permission Denied') mock_google_authenticated.return_value = True @@ -715,7 +715,6 @@ def test_sample_metadata_export(self, mock_google_authenticated): self.assertEqual(len(responses.calls), 8) self.assert_expected_airtable_call( -1, "OR(RECORD_ID()='reca4hcBnbA2cnZf9')", ['CollaboratorID']) - self.assertSetEqual({call.request.headers['Authorization'] for call in responses.calls}, {'Bearer mock_key'}) # Test gregor projects response = self.client.get(gregor_projects_url) diff --git a/seqr/views/utils/test_utils.py b/seqr/views/utils/test_utils.py index 9081803c76..bba5a69490 100644 --- a/seqr/views/utils/test_utils.py +++ b/seqr/views/utils/test_utils.py @@ -549,6 +549,7 @@ def setUp(self): self.addCleanup(patcher.stop) patcher = mock.patch('seqr.views.utils.airtable_utils.AIRTABLE_API_KEY', MOCK_AIRTABLE_KEY) patcher.start() + self.addCleanup(patcher.stop) super(AnvilAuthenticationTestCase, self).setUp() @classmethod From 16200c610908f4397ac7ddf5cfe74470fda30f59 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 26 Aug 2024 11:40:00 -0400 Subject: [PATCH 667/736] handle conditional check for airtable --- seqr/views/apis/anvil_workspace_api_tests.py | 10 +++------- seqr/views/apis/data_manager_api.py | 7 +++++-- seqr/views/utils/airtable_utils.py | 7 +++++++ seqr/views/utils/test_utils.py | 7 +++++++ 4 files changed, 22 insertions(+), 9 deletions(-) diff --git a/seqr/views/apis/anvil_workspace_api_tests.py b/seqr/views/apis/anvil_workspace_api_tests.py index e45e29f093..fb8a2639b6 100644 --- a/seqr/views/apis/anvil_workspace_api_tests.py +++ b/seqr/views/apis/anvil_workspace_api_tests.py @@ -8,7 +8,7 @@ from seqr.models import Project, Family, Individual from seqr.views.apis.anvil_workspace_api import anvil_workspace_page, create_project_from_workspace, \ validate_anvil_vcf, grant_workspace_access, add_workspace_data, get_anvil_vcf_list, get_anvil_igv_options -from seqr.views.utils.test_utils import AnvilAuthenticationTestCase, AuthenticationTestCase, AirflowTestCase, \ +from seqr.views.utils.test_utils import AnvilAuthenticationTestCase, AuthenticationTestCase, AirflowTestCase, AirtableTest, \ TEST_WORKSPACE_NAMESPACE, TEST_WORKSPACE_NAME, TEST_WORKSPACE_NAME1, TEST_NO_PROJECT_WORKSPACE_NAME, TEST_NO_PROJECT_WORKSPACE_NAME2 from seqr.views.utils.terra_api_utils import remove_token, TerraAPIException, TerraRefreshTokenFailedException from settings import SEQR_SLACK_ANVIL_DATA_LOADING_CHANNEL, SEQR_SLACK_LOADING_NOTIFICATION_CHANNEL @@ -67,7 +67,6 @@ TEMP_PATH = '/temp_path/temp_filename' MOCK_AIRTABLE_URL = 'http://testairtable' -MOCK_AIRTABLE_KEY = 'mock_key' # nosec PROJECT1_SAMPLES = ['HG00735', 'NA19678', 'NA20870', 'HG00732', 'NA19675_1', 'NA20874', 'HG00733', 'HG00731'] PROJECT2_SAMPLES = ['NA20885', 'NA19675_1', 'NA19678', 'HG00735'] @@ -484,7 +483,7 @@ def _test_get_workspace_files(self, url, response_key, expected_files, mock_subp ]) -class LoadAnvilDataAPITest(AirflowTestCase): +class LoadAnvilDataAPITest(AirflowTestCase, AirtableTest): fixtures = ['users', 'social_auth', 'reference_data', '1kg_project'] LOADING_PROJECT_GUID = f'P_{TEST_NO_PROJECT_WORKSPACE_NAME}' @@ -509,9 +508,6 @@ def _get_dag_variable_overrides(additional_tasks_check): def setUp(self): # Set up api responses responses.add(responses.POST, f'{MOCK_AIRTABLE_URL}/appUelDNM3BnWaR7M/AnVIL%20Seqr%20Loading%20Requests%20Tracking', status=400) - patcher = mock.patch('seqr.views.utils.airtable_utils.AIRTABLE_API_KEY', MOCK_AIRTABLE_KEY) - patcher.start() - self.addCleanup(patcher.stop) patcher = mock.patch('seqr.views.utils.airtable_utils.AIRTABLE_URL', MOCK_AIRTABLE_URL) patcher.start() self.addCleanup(patcher.stop) @@ -777,7 +773,7 @@ def _assert_valid_operation(self, project, test_add_data=True): 'Number of Samples': 8 if test_add_data else 3, 'Status': 'Loading', }}]}) - self.assertEqual(responses.calls[-1].request.headers['Authorization'], 'Bearer {}'.format(MOCK_AIRTABLE_KEY)) + self.assert_expected_airtable_headers(-1) dag_json = { 'projects_to_run': [project.guid], diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index 1ad54f5d99..2b85754116 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -471,12 +471,15 @@ def get_loaded_projects(request, sample_type, dataset_type): projects = get_internal_projects().filter(is_demo=False) project_samples = None if dataset_type == Sample.DATASET_TYPE_VARIANT_CALLS: - project_samples = _fetch_airtable_loadable_project_samples(request.user) - projects = projects.filter(guid__in=project_samples.keys()) + import pdb; pdb.set_trace() + if AirtableSession.is_airtable_enabled(): + project_samples = _fetch_airtable_loadable_project_samples(request.user) + projects = projects.filter(guid__in=project_samples.keys()) exclude_sample_type = Sample.SAMPLE_TYPE_WES if sample_type == Sample.SAMPLE_TYPE_WGS else Sample.SAMPLE_TYPE_WGS # Include projects with either the matched sample type OR with no loaded data projects = projects.exclude(family__individual__sample__sample_type=exclude_sample_type) else: + # All other data types can only be loaded to projects which already have loaded data projects = projects.filter(family__individual__sample__sample_type=sample_type) projects = projects.distinct().order_by('name').values('name', projectGuid=F('guid'), dataTypeLastLoaded=Max( diff --git a/seqr/views/utils/airtable_utils.py b/seqr/views/utils/airtable_utils.py index eb1a4f8d1b..3330006ebd 100644 --- a/seqr/views/utils/airtable_utils.py +++ b/seqr/views/utils/airtable_utils.py @@ -24,7 +24,14 @@ class AirtableSession(object): ANVIL_BASE: 'appUelDNM3BnWaR7M', } + @staticmethod + def is_airtable_enabled(): + return bool(AIRTABLE_API_KEY) + def __init__(self, user, base=RDG_BASE, no_auth=False): + if not self.is_airtable_enabled(): + raise ValueError('Airtable is not configured') + self._user = user if not no_auth: self._check_user_access(base) diff --git a/seqr/views/utils/test_utils.py b/seqr/views/utils/test_utils.py index 6e79acc07f..9081803c76 100644 --- a/seqr/views/utils/test_utils.py +++ b/seqr/views/utils/test_utils.py @@ -547,6 +547,8 @@ def setUp(self): self.mock_get_group_members = patcher.start() self.mock_get_group_members.side_effect = get_group_members_side_effect self.addCleanup(patcher.stop) + patcher = mock.patch('seqr.views.utils.airtable_utils.AIRTABLE_API_KEY', MOCK_AIRTABLE_KEY) + patcher.start() super(AnvilAuthenticationTestCase, self).setUp() @classmethod @@ -565,6 +567,7 @@ def assert_no_extra_anvil_calls(self): MOCK_AIRFLOW_URL = 'http://testairflowserver' +MOCK_AIRTABLE_KEY = 'airflow_access' DAG_NAME = 'LOADING_PIPELINE' PROJECT_GUID = 'R0001_1kg' @@ -720,6 +723,10 @@ def assert_expected_airtable_call(self, call_index, filter_formula, fields, addi expected_params.update(additional_params) self.assertDictEqual(responses.calls[call_index].request.params, expected_params) self.assertListEqual(self._get_list_param(responses.calls[call_index].request, 'fields%5B%5D'), fields) + self.assert_expected_airtable_headers(call_index) + + def assert_expected_airtable_headers(self, call_index): + self.assertEqual(responses.calls[call_index].request.headers['Authorization'], f'Bearer {MOCK_AIRTABLE_KEY}') @staticmethod def _get_list_param(call, param): From a7d7b9862a37f8dbd02852968754592663619d90 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 26 Aug 2024 11:48:06 -0400 Subject: [PATCH 668/736] clean up test mocking --- seqr/views/apis/data_manager_api.py | 6 ++---- seqr/views/apis/report_api_tests.py | 2 ++ seqr/views/apis/summary_data_api_tests.py | 3 +-- seqr/views/utils/test_utils.py | 1 + 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index 2b85754116..a7f4ca7092 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -471,10 +471,8 @@ def get_loaded_projects(request, sample_type, dataset_type): projects = get_internal_projects().filter(is_demo=False) project_samples = None if dataset_type == Sample.DATASET_TYPE_VARIANT_CALLS: - import pdb; pdb.set_trace() - if AirtableSession.is_airtable_enabled(): - project_samples = _fetch_airtable_loadable_project_samples(request.user) - projects = projects.filter(guid__in=project_samples.keys()) + project_samples = _fetch_airtable_loadable_project_samples(request.user) + projects = projects.filter(guid__in=project_samples.keys()) exclude_sample_type = Sample.SAMPLE_TYPE_WES if sample_type == Sample.SAMPLE_TYPE_WGS else Sample.SAMPLE_TYPE_WGS # Include projects with either the matched sample type OR with no loaded data projects = projects.exclude(family__individual__sample__sample_type=exclude_sample_type) diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index 9fa87646cd..ace09f91f4 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -1443,6 +1443,8 @@ def test_variant_metadata(self): class LocalReportAPITest(AuthenticationTestCase, ReportAPITest): + # TODO tests failing for non-local reports + fixtures = ['users', '1kg_project', 'reference_data', 'report_variants'] ADDITIONAL_FAMILIES = ['F000014_14'] ADDITIONAL_FINDINGS = ['NA21234_1_248367227'] diff --git a/seqr/views/apis/summary_data_api_tests.py b/seqr/views/apis/summary_data_api_tests.py index 200c25eab6..057d598a8f 100644 --- a/seqr/views/apis/summary_data_api_tests.py +++ b/seqr/views/apis/summary_data_api_tests.py @@ -581,7 +581,6 @@ def _has_expected_metadata_response(self, response, expected_individuals, has_ai self.assertEqual(len([r['participant_id'] for r in response_json['rows'] if r['participant_id'] == 'NA20888']), 2) @mock.patch('seqr.views.utils.airtable_utils.MAX_OR_FILTERS', 2) - @mock.patch('seqr.views.utils.airtable_utils.AIRTABLE_API_KEY', 'mock_key') @mock.patch('seqr.views.utils.airtable_utils.is_google_authenticated') @responses.activate def test_sample_metadata_export(self, mock_google_authenticated): @@ -655,6 +654,7 @@ def test_sample_metadata_export(self, mock_google_authenticated): # Test invalid airtable responses response = self.client.get(include_airtable_url) + # TODO failing for local self.assertEqual(response.status_code, 403) self.assertEqual(response.json()['error'], 'Permission Denied') mock_google_authenticated.return_value = True @@ -715,7 +715,6 @@ def test_sample_metadata_export(self, mock_google_authenticated): self.assertEqual(len(responses.calls), 8) self.assert_expected_airtable_call( -1, "OR(RECORD_ID()='reca4hcBnbA2cnZf9')", ['CollaboratorID']) - self.assertSetEqual({call.request.headers['Authorization'] for call in responses.calls}, {'Bearer mock_key'}) # Test gregor projects response = self.client.get(gregor_projects_url) diff --git a/seqr/views/utils/test_utils.py b/seqr/views/utils/test_utils.py index 9081803c76..bba5a69490 100644 --- a/seqr/views/utils/test_utils.py +++ b/seqr/views/utils/test_utils.py @@ -549,6 +549,7 @@ def setUp(self): self.addCleanup(patcher.stop) patcher = mock.patch('seqr.views.utils.airtable_utils.AIRTABLE_API_KEY', MOCK_AIRTABLE_KEY) patcher.start() + self.addCleanup(patcher.stop) super(AnvilAuthenticationTestCase, self).setUp() @classmethod From ec5a9e4bfd89ac75162b013b83952c42e53e51ae Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 26 Aug 2024 11:51:00 -0400 Subject: [PATCH 669/736] clean up --- seqr/views/apis/data_manager_api.py | 1 - seqr/views/apis/data_manager_api_tests.py | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index a7f4ca7092..1ad54f5d99 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -477,7 +477,6 @@ def get_loaded_projects(request, sample_type, dataset_type): # Include projects with either the matched sample type OR with no loaded data projects = projects.exclude(family__individual__sample__sample_type=exclude_sample_type) else: - # All other data types can only be loaded to projects which already have loaded data projects = projects.filter(family__individual__sample__sample_type=sample_type) projects = projects.distinct().order_by('name').values('name', projectGuid=F('guid'), dataTypeLastLoaded=Max( diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index 68fbcf59d2..9d68289aba 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -1481,6 +1481,7 @@ def test_get_loaded_projects(self): self.assertEqual(response.status_code, 200) # test with airtable filter + # TODO failing responses.add( responses.GET, 'https://api.airtable.com/v0/app3Y97xtbbaOopVR/PDO', json=AIRTABLE_PDO_RECORDS, status=200, ) From c078a4efc5aa9a8019343b263170aeb749a85324 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 26 Aug 2024 11:52:49 -0400 Subject: [PATCH 670/736] add check for data manager api --- seqr/views/apis/data_manager_api.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index 1ad54f5d99..cfb0410902 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -471,12 +471,14 @@ def get_loaded_projects(request, sample_type, dataset_type): projects = get_internal_projects().filter(is_demo=False) project_samples = None if dataset_type == Sample.DATASET_TYPE_VARIANT_CALLS: - project_samples = _fetch_airtable_loadable_project_samples(request.user) - projects = projects.filter(guid__in=project_samples.keys()) + if AirtableSession.is_airtable_enabled(): + project_samples = _fetch_airtable_loadable_project_samples(request.user) + projects = projects.filter(guid__in=project_samples.keys()) exclude_sample_type = Sample.SAMPLE_TYPE_WES if sample_type == Sample.SAMPLE_TYPE_WGS else Sample.SAMPLE_TYPE_WGS # Include projects with either the matched sample type OR with no loaded data projects = projects.exclude(family__individual__sample__sample_type=exclude_sample_type) else: + # All other data types can only be loaded to projects which already have loaded data projects = projects.filter(family__individual__sample__sample_type=sample_type) projects = projects.distinct().order_by('name').values('name', projectGuid=F('guid'), dataTypeLastLoaded=Max( From e707fa49808e8b6f9cb0ec5b8ce012b6ea411fcf Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 26 Aug 2024 12:42:03 -0400 Subject: [PATCH 671/736] test load projects with and withot airtable --- seqr/views/apis/data_manager_api.py | 17 +++++++++----- seqr/views/apis/data_manager_api_tests.py | 27 ++++++++++++++--------- seqr/views/utils/airtable_utils.py | 6 +---- seqr/views/utils/test_utils.py | 11 ++++----- 4 files changed, 36 insertions(+), 25 deletions(-) diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index cfb0410902..9b275d866a 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -471,8 +471,14 @@ def get_loaded_projects(request, sample_type, dataset_type): projects = get_internal_projects().filter(is_demo=False) project_samples = None if dataset_type == Sample.DATASET_TYPE_VARIANT_CALLS: - if AirtableSession.is_airtable_enabled(): - project_samples = _fetch_airtable_loadable_project_samples(request.user) + try: + airtable_session = AirtableSession(request.user) + except ValueError: + # Airtable is not configured for the deployment + airtable_session = None + if airtable_session: + project_samples = _fetch_airtable_loadable_project_samples(airtable_session) + import pdb; pdb.set_trace() projects = projects.filter(guid__in=project_samples.keys()) exclude_sample_type = Sample.SAMPLE_TYPE_WES if sample_type == Sample.SAMPLE_TYPE_WGS else Sample.SAMPLE_TYPE_WGS # Include projects with either the matched sample type OR with no loaded data @@ -482,7 +488,8 @@ def get_loaded_projects(request, sample_type, dataset_type): projects = projects.filter(family__individual__sample__sample_type=sample_type) projects = projects.distinct().order_by('name').values('name', projectGuid=F('guid'), dataTypeLastLoaded=Max( - 'family__individual__sample__loaded_date', filter=Q(family__individual__sample__dataset_type=dataset_type), + 'family__individual__sample__loaded_date', + filter=Q(family__individual__sample__dataset_type=dataset_type) & Q(family__individual__sample__sample_type=sample_type), )) if project_samples: @@ -492,8 +499,8 @@ def get_loaded_projects(request, sample_type, dataset_type): return create_json_response({'projects': list(projects)}) -def _fetch_airtable_loadable_project_samples(user): - pdos = AirtableSession(user).fetch_records( +def _fetch_airtable_loadable_project_samples(session): + pdos = session.fetch_records( 'PDO', fields=['PassingCollaboratorSampleIDs', 'SeqrIDs', 'SeqrProjectURL'], or_filters={'PDOStatus': LOADABLE_PDO_STATUSES} ) diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index 9d68289aba..3cdde6a40b 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -405,8 +405,8 @@ 'dataTypeLastLoaded': None, 'name': 'Empty Project', 'projectGuid': 'R0002_empty', - 'sampleIds': ['HG00738', 'HG00739'], } +EMPTY_PROJECT_SAMPLES_OPTION = {**EMPTY_PROJECT_OPTION, 'sampleIds': ['HG00738', 'HG00739']} AIRTABLE_PDO_RECORDS = { 'records': [ @@ -1481,28 +1481,29 @@ def test_get_loaded_projects(self): self.assertEqual(response.status_code, 200) # test with airtable filter - # TODO failing responses.add( responses.GET, 'https://api.airtable.com/v0/app3Y97xtbbaOopVR/PDO', json=AIRTABLE_PDO_RECORDS, status=200, ) snv_indel_url = url.replace('SV', 'SNV_INDEL') response = self.client.get(snv_indel_url) self.assertEqual(response.status_code, 200) - self.assertDictEqual(response.json(), {'projects': [EMPTY_PROJECT_OPTION, PROJECT_SAMPLES_OPTION]}) - self.assert_expected_airtable_call( - call_index=0, filter_formula="OR(PDOStatus='Methods (Loading)',PDOStatus='On hold for phenotips, but ready to load')", - fields=['PassingCollaboratorSampleIDs', 'SeqrIDs', 'SeqrProjectURL'], - ) + self.assertDictEqual(response.json(), {'projects': self.WGS_PROJECT_OPTIONS}) # test projects with no data loaded are returned for any sample type response = self.client.get(snv_indel_url.replace('WGS', 'WES')) self.assertEqual(response.status_code, 200) - self.assertDictEqual(response.json(), {'projects': [EMPTY_PROJECT_OPTION]}) + self.assertDictEqual(response.json(), {'projects': self.WES_PROJECT_OPTIONS}) class LocalDataManagerAPITest(AuthenticationTestCase, DataManagerAPITest): fixtures = ['users', '1kg_project', 'reference_data'] + WGS_PROJECT_OPTIONS = [EMPTY_PROJECT_OPTION, PROJECT_OPTION] + WES_PROJECT_OPTIONS = [ + {'name': '1kg project nåme with uniçøde', 'projectGuid': 'R0001_1kg', 'dataTypeLastLoaded': '2017-02-05T06:25:55.397Z'}, + EMPTY_PROJECT_OPTION, + ] + def setUp(self): patcher = mock.patch('seqr.utils.file_utils.os.path.isfile') self.mock_does_file_exist = patcher.start() @@ -1530,6 +1531,8 @@ class AnvilDataManagerAPITest(AirflowTestCase, DataManagerAPITest): LOADING_PROJECT_GUID = 'R0004_non_analyst_project' PROJECTS = [PROJECT_GUID, LOADING_PROJECT_GUID] + WGS_PROJECT_OPTIONS = [EMPTY_PROJECT_SAMPLES_OPTION, PROJECT_SAMPLES_OPTION] + WES_PROJECT_OPTIONS = [EMPTY_PROJECT_SAMPLES_OPTION, PROJECT_SAMPLES_OPTION] def setUp(self): patcher = mock.patch('seqr.utils.file_utils.subprocess.Popen') @@ -1577,8 +1580,12 @@ def _assert_expected_delete_index_response(self, response): self.assertEqual(response.json()['error'], 'Deleting indices is disabled for the hail backend') def test_get_loaded_projects(self, *args, **kwargs): - # Test relies on the local-only project data, and has no real difference for local/ non-local behavior - pass + super().test_get_loaded_projects(*args, **kwargs) + self.assert_expected_airtable_call( + call_index=0, + filter_formula="OR(PDOStatus='Methods (Loading)',PDOStatus='On hold for phenotips, but ready to load')", + fields=['PassingCollaboratorSampleIDs', 'SeqrIDs', 'SeqrProjectURL'], + ) @staticmethod def _get_dag_variable_overrides(*args, **kwargs): diff --git a/seqr/views/utils/airtable_utils.py b/seqr/views/utils/airtable_utils.py index 3330006ebd..a5a080413c 100644 --- a/seqr/views/utils/airtable_utils.py +++ b/seqr/views/utils/airtable_utils.py @@ -24,12 +24,8 @@ class AirtableSession(object): ANVIL_BASE: 'appUelDNM3BnWaR7M', } - @staticmethod - def is_airtable_enabled(): - return bool(AIRTABLE_API_KEY) - def __init__(self, user, base=RDG_BASE, no_auth=False): - if not self.is_airtable_enabled(): + if not AIRTABLE_API_KEY: raise ValueError('Airtable is not configured') self._user = user diff --git a/seqr/views/utils/test_utils.py b/seqr/views/utils/test_utils.py index bba5a69490..52af07345f 100644 --- a/seqr/views/utils/test_utils.py +++ b/seqr/views/utils/test_utils.py @@ -30,6 +30,7 @@ class AuthenticationTestCase(TestCase): NO_POLICY_USER = 'no_policy' ES_HOSTNAME = 'testhost' + MOCK_AIRTABLE_KEY = '' super_user = None analyst_user = None @@ -45,6 +46,9 @@ def setUp(self): patcher = mock.patch('seqr.utils.search.elasticsearch.es_utils.ELASTICSEARCH_SERVICE_HOSTNAME', self.ES_HOSTNAME) patcher.start() self.addCleanup(patcher.stop) + patcher = mock.patch('seqr.views.utils.airtable_utils.AIRTABLE_API_KEY', self.MOCK_AIRTABLE_KEY) + patcher.start() + self.addCleanup(patcher.stop) patcher = mock.patch('seqr.views.utils.permissions_utils.SEQR_PRIVACY_VERSION', 2.1) patcher.start() self.addCleanup(patcher.stop) @@ -509,6 +513,7 @@ def get_group_members_side_effect(user, group, use_sa_credentials=False): class AnvilAuthenticationTestCase(AuthenticationTestCase): ES_HOSTNAME = '' + MOCK_AIRTABLE_KEY = 'airflow_access' # mock the terra apis def setUp(self): @@ -547,9 +552,6 @@ def setUp(self): self.mock_get_group_members = patcher.start() self.mock_get_group_members.side_effect = get_group_members_side_effect self.addCleanup(patcher.stop) - patcher = mock.patch('seqr.views.utils.airtable_utils.AIRTABLE_API_KEY', MOCK_AIRTABLE_KEY) - patcher.start() - self.addCleanup(patcher.stop) super(AnvilAuthenticationTestCase, self).setUp() @classmethod @@ -568,7 +570,6 @@ def assert_no_extra_anvil_calls(self): MOCK_AIRFLOW_URL = 'http://testairflowserver' -MOCK_AIRTABLE_KEY = 'airflow_access' DAG_NAME = 'LOADING_PIPELINE' PROJECT_GUID = 'R0001_1kg' @@ -727,7 +728,7 @@ def assert_expected_airtable_call(self, call_index, filter_formula, fields, addi self.assert_expected_airtable_headers(call_index) def assert_expected_airtable_headers(self, call_index): - self.assertEqual(responses.calls[call_index].request.headers['Authorization'], f'Bearer {MOCK_AIRTABLE_KEY}') + self.assertEqual(responses.calls[call_index].request.headers['Authorization'], f'Bearer {self.MOCK_AIRTABLE_KEY}') @staticmethod def _get_list_param(call, param): From 125fb71dba534f84b1db06574f8545612c305135 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 26 Aug 2024 12:55:54 -0400 Subject: [PATCH 672/736] properly test conditional get project --- seqr/views/apis/data_manager_api.py | 14 ++++---------- seqr/views/apis/data_manager_api_tests.py | 12 ++++++++---- seqr/views/utils/airtable_utils.py | 6 +++++- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index 9b275d866a..b233b1ce96 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -471,14 +471,8 @@ def get_loaded_projects(request, sample_type, dataset_type): projects = get_internal_projects().filter(is_demo=False) project_samples = None if dataset_type == Sample.DATASET_TYPE_VARIANT_CALLS: - try: - airtable_session = AirtableSession(request.user) - except ValueError: - # Airtable is not configured for the deployment - airtable_session = None - if airtable_session: - project_samples = _fetch_airtable_loadable_project_samples(airtable_session) - import pdb; pdb.set_trace() + if AirtableSession.is_airtable_enabled(): + project_samples = _fetch_airtable_loadable_project_samples(request.user) projects = projects.filter(guid__in=project_samples.keys()) exclude_sample_type = Sample.SAMPLE_TYPE_WES if sample_type == Sample.SAMPLE_TYPE_WGS else Sample.SAMPLE_TYPE_WGS # Include projects with either the matched sample type OR with no loaded data @@ -499,8 +493,8 @@ def get_loaded_projects(request, sample_type, dataset_type): return create_json_response({'projects': list(projects)}) -def _fetch_airtable_loadable_project_samples(session): - pdos = session.fetch_records( +def _fetch_airtable_loadable_project_samples(user): + pdos = AirtableSession(user).fetch_records( 'PDO', fields=['PassingCollaboratorSampleIDs', 'SeqrIDs', 'SeqrProjectURL'], or_filters={'PDOStatus': LOADABLE_PDO_STATUSES} ) diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index 3cdde6a40b..4fd7ceda04 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -1460,8 +1460,8 @@ def test_validate_callset(self, mock_subprocess): response = self.client.post(url, content_type='application/json', data=json.dumps(body)) self.assertEqual(response.status_code, 200) + @mock.patch('seqr.views.utils.permissions_utils.INTERNAL_NAMESPACES', ['my-seqr-billing', 'ext-data']) @mock.patch('seqr.views.apis.data_manager_api.BASE_URL', 'https://seqr.broadinstitute.org/') - @mock.patch('seqr.views.utils.airtable_utils.is_google_authenticated', lambda x: True) @responses.activate def test_get_loaded_projects(self): url = reverse(get_loaded_projects, args=['WGS', 'SV']) @@ -1488,6 +1488,7 @@ def test_get_loaded_projects(self): response = self.client.get(snv_indel_url) self.assertEqual(response.status_code, 200) self.assertDictEqual(response.json(), {'projects': self.WGS_PROJECT_OPTIONS}) + self._assert_expected_get_projects_requests() # test projects with no data loaded are returned for any sample type response = self.client.get(snv_indel_url.replace('WGS', 'WES')) @@ -1524,6 +1525,9 @@ def _add_file_iter(self, stdout): self.mock_does_file_exist.return_value = True self.mock_file_iter.return_value += stdout + def _assert_expected_get_projects_requests(self): + self.assertEqual(len(responses.calls), 0) + @mock.patch('seqr.views.utils.permissions_utils.PM_USER_GROUP', 'project-managers') class AnvilDataManagerAPITest(AirflowTestCase, DataManagerAPITest): @@ -1532,7 +1536,7 @@ class AnvilDataManagerAPITest(AirflowTestCase, DataManagerAPITest): LOADING_PROJECT_GUID = 'R0004_non_analyst_project' PROJECTS = [PROJECT_GUID, LOADING_PROJECT_GUID] WGS_PROJECT_OPTIONS = [EMPTY_PROJECT_SAMPLES_OPTION, PROJECT_SAMPLES_OPTION] - WES_PROJECT_OPTIONS = [EMPTY_PROJECT_SAMPLES_OPTION, PROJECT_SAMPLES_OPTION] + WES_PROJECT_OPTIONS = [EMPTY_PROJECT_SAMPLES_OPTION] def setUp(self): patcher = mock.patch('seqr.utils.file_utils.subprocess.Popen') @@ -1579,8 +1583,8 @@ def _assert_expected_delete_index_response(self, response): self.assertEqual(response.status_code, 400) self.assertEqual(response.json()['error'], 'Deleting indices is disabled for the hail backend') - def test_get_loaded_projects(self, *args, **kwargs): - super().test_get_loaded_projects(*args, **kwargs) + def _assert_expected_get_projects_requests(self): + self.assertEqual(len(responses.calls), 1) self.assert_expected_airtable_call( call_index=0, filter_formula="OR(PDOStatus='Methods (Loading)',PDOStatus='On hold for phenotips, but ready to load')", diff --git a/seqr/views/utils/airtable_utils.py b/seqr/views/utils/airtable_utils.py index a5a080413c..3330006ebd 100644 --- a/seqr/views/utils/airtable_utils.py +++ b/seqr/views/utils/airtable_utils.py @@ -24,8 +24,12 @@ class AirtableSession(object): ANVIL_BASE: 'appUelDNM3BnWaR7M', } + @staticmethod + def is_airtable_enabled(): + return bool(AIRTABLE_API_KEY) + def __init__(self, user, base=RDG_BASE, no_auth=False): - if not AIRTABLE_API_KEY: + if not self.is_airtable_enabled(): raise ValueError('Airtable is not configured') self._user = user From c89b0c007598b2894d8caf263193ecd9078a78d0 Mon Sep 17 00:00:00 2001 From: hanars Date: Mon, 26 Aug 2024 16:07:04 -0400 Subject: [PATCH 673/736] clearer variable name Co-authored-by: Julia Klugherz --- seqr/management/commands/check_for_new_samples_from_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seqr/management/commands/check_for_new_samples_from_pipeline.py b/seqr/management/commands/check_for_new_samples_from_pipeline.py index e70ad1295b..829ad706b8 100644 --- a/seqr/management/commands/check_for_new_samples_from_pipeline.py +++ b/seqr/management/commands/check_for_new_samples_from_pipeline.py @@ -174,7 +174,7 @@ def _update_pdos(session, project_guid, sample_ids): session.safe_patch_records_by_id('PDO', pdo_ids, {'PDOStatus': AVAILABLE_PDO_STATUS}) skipped_pdo_samples = { - pdo_id: sample_ids for pdo_id, sample_ids in skipped_pdo_samples.items() if pdo_id in pdo_ids + pdo_id: sample_record_ids for pdo_id, sample_record_ids in skipped_pdo_samples.items() if pdo_id in pdo_ids } if not skipped_pdo_samples: return [] From 1313f260cd927eeb6a33d4281c2e96971f37ca99 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 26 Aug 2024 16:56:39 -0400 Subject: [PATCH 674/736] restrice non-local report endpoints --- seqr/views/apis/report_api.py | 12 ++++-- seqr/views/apis/report_api_tests.py | 61 ++++++++++++++--------------- seqr/views/utils/test_utils.py | 10 ----- 3 files changed, 38 insertions(+), 45 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 8839c48f59..3e5329638a 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -17,8 +17,8 @@ EXPERIMENT_TABLE, EXPERIMENT_LOOKUP_TABLE, FINDINGS_TABLE, GENE_COLUMN, FAMILY_INDIVIDUAL_FIELDS from seqr.views.utils.export_utils import export_multiple_files, write_multiple_files_to_gs from seqr.views.utils.json_utils import create_json_response -from seqr.views.utils.permissions_utils import analyst_required, get_project_and_check_permissions, \ - get_project_guids_user_can_view, get_internal_projects, pm_or_analyst_required +from seqr.views.utils.permissions_utils import user_is_analyst, get_project_and_check_permissions, \ + get_project_guids_user_can_view, get_internal_projects, pm_or_analyst_required, active_user_has_policies_and_passes_test from seqr.views.utils.terra_api_utils import anvil_enabled from seqr.views.utils.variant_utils import DISCOVERY_CATEGORY @@ -31,6 +31,10 @@ MONDO_BASE_URL = 'https://monarchinitiative.org/v3/api/entity' +anvil_enabled_analyst_required = active_user_has_policies_and_passes_test( + lambda user: user_is_analyst(user) and anvil_enabled()) + + @pm_or_analyst_required def seqr_stats(request): non_demo_projects = Project.objects.filter(is_demo=False) @@ -111,7 +115,7 @@ def _get_sample_counts(sample_q, data_type_key='dataset_type'): ] -@analyst_required +@anvil_enabled_analyst_required def anvil_export(request, project_guid): project = get_project_and_check_permissions(project_guid, request.user) @@ -349,7 +353,7 @@ def _add_row(row, family_id, row_type): } -@analyst_required +@anvil_enabled_analyst_required def gregor_export(request): request_json = json.loads(request.body) missing_required_fields = [field for field in ['consentCode', 'deliveryPath'] if not request_json.get(field)] diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index ace09f91f4..cfd2ab06e7 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -677,10 +677,8 @@ def test_seqr_stats(self): self.check_no_analyst_no_access(url, has_override=self.HAS_PM_OVERRIDE) @mock.patch('seqr.views.utils.export_utils.zipfile.ZipFile') - @mock.patch('seqr.views.utils.airtable_utils.is_google_authenticated') @responses.activate - def test_anvil_export(self, mock_google_authenticated, mock_zip): - mock_google_authenticated.return_value = False + def test_anvil_export(self, mock_zip): url = reverse(anvil_export, args=[PROJECT_GUID]) self.check_analyst_login(url) @@ -689,13 +687,19 @@ def test_anvil_export(self, mock_google_authenticated, mock_zip): self.assertEqual(response.status_code, 403) self.assertEqual(response.json()['error'], 'Permission Denied') + responses.add(responses.GET, '{}/app3Y97xtbbaOopVR/Samples'.format(AIRTABLE_URL), json=AIRTABLE_SAMPLE_RECORDS, status=200) + response = self.client.get(url) + self._check_anvil_export_response(response, mock_zip, no_analyst_project_url) + + # Test non-broad analysts do not have access + self.login_pm_user() response = self.client.get(url) self.assertEqual(response.status_code, 403) self.assertEqual(response.json()['error'], 'Permission Denied') - mock_google_authenticated.return_value = True - responses.add(responses.GET, '{}/app3Y97xtbbaOopVR/Samples'.format(AIRTABLE_URL), json=AIRTABLE_SAMPLE_RECORDS, status=200) - response = self.client.get(url) + self.check_no_analyst_no_access(url) + + def _check_anvil_export_response(self, response, mock_zip, no_analyst_project_url): self.assertEqual(response.status_code, 200) self.assertEqual( response.get('content-disposition'), @@ -764,30 +768,27 @@ def test_anvil_export(self, mock_google_authenticated, mock_zip): 'p.Ala196Leu): 19-1912633-G-T, 19-1912634-C-T'], discovery_file) - added_perm = self.add_analyst_project(4) - if added_perm: - response = self.client.get(no_analyst_project_url) - self.assertEqual(response.status_code, 400) - self.assertEqual(response.json()['errors'], ['Discovery variant(s) 1-248367227-TC-T in family 14 have no associated gene']) - - self.check_no_analyst_no_access(url) - - # Test non-broad analysts do not have access - self.login_pm_user() - response = self.client.get(url) - self.assertEqual(response.status_code, 403) - self.assertEqual(response.json()['error'], 'Permission Denied') + self.login_data_manager_user() + self.mock_get_groups.side_effect = lambda user: ['Analysts'] + response = self.client.get(no_analyst_project_url) + self.assertEqual(response.status_code, 400) + self.assertEqual(response.json()['errors'], + ['Discovery variant(s) 1-248367227-TC-T in family 14 have no associated gene']) @mock.patch('seqr.views.apis.report_api.GREGOR_DATA_MODEL_URL', MOCK_DATA_MODEL_URL) - @mock.patch('seqr.views.utils.airtable_utils.is_google_authenticated') @mock.patch('seqr.views.apis.report_api.datetime') @mock.patch('seqr.views.utils.export_utils.open') @mock.patch('seqr.views.utils.export_utils.TemporaryDirectory') @mock.patch('seqr.utils.file_utils.subprocess.Popen') @responses.activate - def test_gregor_export(self, mock_subprocess, mock_temp_dir, mock_open, mock_datetime, mock_google_authenticated): + def test_gregor_export(self, *args): + url = reverse(gregor_export) + self.check_analyst_login(url) + + self._test_gregor_export(url, *args) + + def _test_gregor_export(self, url, mock_subprocess, mock_temp_dir, mock_open, mock_datetime): mock_datetime.now.return_value.year = 2020 - mock_google_authenticated.return_value = False mock_temp_dir.return_value.__enter__.return_value = '/mock/tmp' mock_subprocess.return_value.wait.return_value = 1 @@ -799,9 +800,6 @@ def test_gregor_export(self, mock_subprocess, mock_temp_dir, mock_open, mock_dat status=200) responses.add(responses.GET, MOCK_DATA_MODEL_URL, status=404) - url = reverse(gregor_export) - self.check_analyst_login(url) - response = self.client.post(url, content_type='application/json', data=json.dumps({})) self.assertEqual(response.status_code, 400) self.assertListEqual(response.json()['errors'], ['Missing required field(s): consentCode, deliveryPath']) @@ -818,11 +816,6 @@ def test_gregor_export(self, mock_subprocess, mock_temp_dir, mock_open, mock_dat mock_subprocess.return_value.wait.return_value = 0 response = self.client.post(url, content_type='application/json', data=json.dumps(body)) - self.assertEqual(response.status_code, 403) - self.assertEqual(response.json()['error'], 'Permission Denied') - - mock_google_authenticated.return_value = True - response = self.client.post(url, content_type='application/json', data=json.dumps(body)) self.assertEqual(response.status_code, 400) self.assertListEqual(response.json()['errors'], [ 'Unable to load data model: 404 Client Error: Not Found for url: http://raw.githubusercontent.com/gregor_data_model.json', @@ -1443,7 +1436,6 @@ def test_variant_metadata(self): class LocalReportAPITest(AuthenticationTestCase, ReportAPITest): - # TODO tests failing for non-local reports fixtures = ['users', '1kg_project', 'reference_data', 'report_variants'] ADDITIONAL_FAMILIES = ['F000014_14'] @@ -1464,6 +1456,13 @@ class LocalReportAPITest(AuthenticationTestCase, ReportAPITest): }, } + def _check_anvil_export_response(self, response, *args): + self.assertEqual(response.status_code, 403) + + def _test_gregor_export(self, url, *args): + response = self.client.post(url, content_type='application/json', data=json.dumps({})) + self.assertEqual(response.status_code, 403) + class AnvilReportAPITest(AnvilAuthenticationTestCase, ReportAPITest): fixtures = ['users', 'social_auth', '1kg_project', 'reference_data', 'report_variants'] diff --git a/seqr/views/utils/test_utils.py b/seqr/views/utils/test_utils.py index 52af07345f..ba39a01adf 100644 --- a/seqr/views/utils/test_utils.py +++ b/seqr/views/utils/test_utils.py @@ -99,12 +99,6 @@ def add_additional_user_groups(cls): pm_group = Group.objects.get(pk=5) pm_group.user_set.add(cls.pm_user) - @classmethod - def add_analyst_project(cls, project_id): - analyst_group = Group.objects.get(pk=4) - assign_perm(user_or_group=analyst_group, perm=CAN_VIEW, obj=Project.objects.filter(id=project_id)) - return True - def check_require_login(self, url, **request_kwargs): self._check_login(url, self.AUTHENTICATED_USER, **request_kwargs) @@ -559,10 +553,6 @@ def add_additional_user_groups(cls): analyst_group = Group.objects.get(pk=4) analyst_group.user_set.add(cls.analyst_user, cls.pm_user) - @classmethod - def add_analyst_project(cls, project_id): - return False - def assert_no_extra_anvil_calls(self): self.mock_get_ws_acl.assert_not_called() self.mock_get_groups.assert_not_called() From 61c5c5032f7eb82c471b4f5f0e2190e9f5dbd64e Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 26 Aug 2024 17:13:44 -0400 Subject: [PATCH 675/736] do not query airtable for metadata when disabled --- seqr/views/apis/summary_data_api.py | 2 +- seqr/views/apis/summary_data_api_tests.py | 34 ++++++++++++----------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/seqr/views/apis/summary_data_api.py b/seqr/views/apis/summary_data_api.py index 775562313d..96cb2b6547 100644 --- a/seqr/views/apis/summary_data_api.py +++ b/seqr/views/apis/summary_data_api.py @@ -274,7 +274,7 @@ def _search_new_saved_variants(family_variant_ids: list[FamilyVariantKey], user: def _get_metadata_projects(request, project_guid): is_analyst = user_is_analyst(request.user) is_all_projects = project_guid == ALL_PROJECTS - include_airtable = 'true' in request.GET.get('includeAirtable', '') and is_analyst and not is_all_projects + include_airtable = 'true' in request.GET.get('includeAirtable', '') and AirtableSession.is_airtable_enabled() and is_analyst and not is_all_projects if is_all_projects: projects = get_internal_projects() if is_analyst else Project.objects.filter( guid__in=get_project_guids_user_can_view(request.user)) diff --git a/seqr/views/apis/summary_data_api_tests.py b/seqr/views/apis/summary_data_api_tests.py index 057d598a8f..dde9e17a00 100644 --- a/seqr/views/apis/summary_data_api_tests.py +++ b/seqr/views/apis/summary_data_api_tests.py @@ -581,10 +581,8 @@ def _has_expected_metadata_response(self, response, expected_individuals, has_ai self.assertEqual(len([r['participant_id'] for r in response_json['rows'] if r['participant_id'] == 'NA20888']), 2) @mock.patch('seqr.views.utils.airtable_utils.MAX_OR_FILTERS', 2) - @mock.patch('seqr.views.utils.airtable_utils.is_google_authenticated') @responses.activate - def test_sample_metadata_export(self, mock_google_authenticated): - mock_google_authenticated.return_value = False + def test_sample_metadata_export(self): url = reverse(individual_metadata, args=['R0003_test']) self.check_require_login(url) @@ -653,12 +651,16 @@ def test_sample_metadata_export(self, mock_google_authenticated): self._has_expected_metadata_response(response, all_project_individuals, has_duplicate=True) # Test invalid airtable responses - response = self.client.get(include_airtable_url) - # TODO failing for local - self.assertEqual(response.status_code, 403) - self.assertEqual(response.json()['error'], 'Permission Denied') - mock_google_authenticated.return_value = True + self._test_metadata_airtable_responses(include_airtable_url, expected_individuals) + + # Test gregor projects + response = self.client.get(gregor_projects_url) + self._has_expected_metadata_response(response, multi_project_individuals, has_duplicate=True) + response = self.client.get(f'{gregor_projects_url}?includeAirtable=true') + self._has_expected_metadata_response(response, multi_project_individuals, has_airtable=self.HAS_AIRTABLE, has_duplicate=True) + + def _test_metadata_airtable_responses(self, include_airtable_url, expected_individuals): responses.add(responses.GET, '{}/app3Y97xtbbaOopVR/Samples'.format(AIRTABLE_URL), status=402) response = self.client.get(include_airtable_url) self.assertEqual(response.status_code, 402) @@ -683,7 +685,6 @@ def test_sample_metadata_export(self, mock_google_authenticated): }) ]) - responses.reset() responses.add(responses.GET, '{}/app3Y97xtbbaOopVR/Samples'.format(AIRTABLE_URL), json=PAGINATED_AIRTABLE_SAMPLE_RECORDS, status=200) @@ -716,13 +717,6 @@ def test_sample_metadata_export(self, mock_google_authenticated): self.assert_expected_airtable_call( -1, "OR(RECORD_ID()='reca4hcBnbA2cnZf9')", ['CollaboratorID']) - # Test gregor projects - response = self.client.get(gregor_projects_url) - self._has_expected_metadata_response(response, multi_project_individuals, has_duplicate=True) - - response = self.client.get(f'{gregor_projects_url}?includeAirtable=true') - self._has_expected_metadata_response(response, multi_project_individuals, has_airtable=True, has_duplicate=True) - @mock.patch('seqr.views.apis.summary_data_api.EmailMessage') def test_send_vlm_email(self, mock_email): url = reverse(send_vlm_email) @@ -773,6 +767,13 @@ class LocalSummaryDataAPITest(AuthenticationTestCase, SummaryDataAPITest): fixtures = ['users', '1kg_project', 'reference_data', 'report_variants'] NUM_MANAGER_SUBMISSIONS = 4 ADDITIONAL_SAMPLES = ['NA21234', 'NA21987'] + HAS_AIRTABLE = False + + def _test_metadata_airtable_responses(self, include_airtable_url, expected_individuals): + # Returns successfully without airtable data when disabled + response = self.client.get(include_airtable_url) + self.assertEqual(response.status_code, 200) + self._has_expected_metadata_response(response, expected_individuals) def assert_has_expected_calls(self, users, skip_group_call_idxs=None): @@ -789,6 +790,7 @@ class AnvilSummaryDataAPITest(AnvilAuthenticationTestCase, SummaryDataAPITest): fixtures = ['users', 'social_auth', '1kg_project', 'reference_data', 'report_variants'] NUM_MANAGER_SUBMISSIONS = 4 ADDITIONAL_SAMPLES = [] + HAS_AIRTABLE = True def test_mme_details(self, *args): super(AnvilSummaryDataAPITest, self).test_mme_details(*args) From 2b524e4aef71d9d6275538c169cc5357a98b56aa Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 26 Aug 2024 17:17:02 -0400 Subject: [PATCH 676/736] metter check for reports --- seqr/views/apis/report_api.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 3e5329638a..72952a1fa5 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -31,8 +31,8 @@ MONDO_BASE_URL = 'https://monarchinitiative.org/v3/api/entity' -anvil_enabled_analyst_required = active_user_has_policies_and_passes_test( - lambda user: user_is_analyst(user) and anvil_enabled()) +airtable_enabled_analyst_required = active_user_has_policies_and_passes_test( + lambda user: user_is_analyst(user) and AirtableSession.is_airtable_enabled()) @pm_or_analyst_required @@ -115,7 +115,7 @@ def _get_sample_counts(sample_q, data_type_key='dataset_type'): ] -@anvil_enabled_analyst_required +@airtable_enabled_analyst_required def anvil_export(request, project_guid): project = get_project_and_check_permissions(project_guid, request.user) @@ -353,7 +353,7 @@ def _add_row(row, family_id, row_type): } -@anvil_enabled_analyst_required +@airtable_enabled_analyst_required def gregor_export(request): request_json = json.loads(request.body) missing_required_fields = [field for field in ['consentCode', 'deliveryPath'] if not request_json.get(field)] From 9291d4d6655226e6105290b1b78c680dd62ee66b Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 26 Aug 2024 18:13:08 -0400 Subject: [PATCH 677/736] send error handled basic notification for local installs --- seqr/utils/communication_utils.py | 6 ++- seqr/utils/search/add_data_utils.py | 64 ++++++++++++++++++----------- 2 files changed, 45 insertions(+), 25 deletions(-) diff --git a/seqr/utils/communication_utils.py b/seqr/utils/communication_utils.py index ef4291065d..366807d947 100644 --- a/seqr/utils/communication_utils.py +++ b/seqr/utils/communication_utils.py @@ -58,12 +58,16 @@ def send_html_email(email_body, process_message=None, **kwargs): def send_project_notification(project, notification, email, subject): users = project.subscribers.user_set.all() notify.send(project, recipient=users, verb=notification) - send_html_email( + email_kwargs = dict( email_body=BASE_EMAIL_TEMPLATE.format(email), to=list(users.values_list('email', flat=True)), subject=subject, process_message=_set_bulk_notification_stream, ) + try: + send_html_email(**email_kwargs) + except Exception as e: + logger.error(f'Error sending project email for {project.guid}: {e}', detail=email_kwargs) def _set_bulk_notification_stream(message): diff --git a/seqr/utils/search/add_data_utils.py b/seqr/utils/search/add_data_utils.py index ece4f55d48..83b03eeeb0 100644 --- a/seqr/utils/search/add_data_utils.py +++ b/seqr/utils/search/add_data_utils.py @@ -42,51 +42,67 @@ def add_new_es_search_samples(request_json, project, user, notify=False, expecte ) if notify: - num_samples = len(sample_ids) - num_skipped updated_sample_data = updated_samples.values('sample_id', 'individual_id') - notify_search_data_loaded(project, dataset_type, sample_type, inactivated_sample_guids, updated_sample_data, num_samples) + _basic_notify_search_data_loaded(project, dataset_type, sample_type, inactivated_sample_guids, updated_sample_data) return inactivated_sample_guids, updated_family_guids, updated_samples -def notify_search_data_loaded(project, dataset_type, sample_type, inactivated_sample_guids, updated_samples, num_samples): - is_internal = not project_has_anvil(project) or is_internal_anvil_project(project) +def _format_email(sample_summary, project_link, *args): + return f'This is to notify you that {sample_summary} have been loaded in seqr project {project_link}' + +def _basic_notify_search_data_loaded(project, dataset_type, sample_type, inactivated_sample_guids, updated_samples, format_email=_format_email): previous_loaded_individuals = set(Sample.objects.filter(guid__in=inactivated_sample_guids).values_list('individual_id', flat=True)) new_sample_ids = [sample['sample_id'] for sample in updated_samples if sample['individual_id'] not in previous_loaded_individuals] url = f'{BASE_URL}project/{project.guid}/project_page' msg_dataset_type = '' if dataset_type == Sample.DATASET_TYPE_VARIANT_CALLS else f' {dataset_type}' - sample_id_list = f'\n```{", ".join(sorted(new_sample_ids))}```' if is_internal else '' num_new_samples = len(new_sample_ids) sample_summary = f'{num_new_samples} new {sample_type}{msg_dataset_type} samples' - summary_message = f'{sample_summary} are loaded in {url}{sample_id_list}' + project_link = f'{project.name}' + email = format_email(sample_summary, project_link, num_new_samples) + + send_project_notification( + project, + notification=f'Loaded {sample_summary}', + email=email, + subject='New data available in seqr', + ) + + return sample_summary, new_sample_ids, url + + +def notify_search_data_loaded(project, dataset_type, sample_type, inactivated_sample_guids, updated_samples, num_samples): + is_internal = not project_has_anvil(project) or is_internal_anvil_project(project) + + if is_internal: + format_email = _format_email + else: + workspace_name = f'{project.workspace_namespace}/{project.workspace_name}' + def format_email(sample_summary, project_link, num_new_samples): + reload_summary = f' and {num_samples - num_new_samples} re-loaded samples' if num_samples > num_new_samples else '' + return '\n'.join([ + f'We are following up on the request to load data from AnVIL on {project.created_date.date().strftime("%B %d, %Y")}.', + f'We have loaded {sample_summary}{reload_summary} from the AnVIL workspace {workspace_name} to the corresponding seqr project {project_link}.', + 'Let us know if you have any questions.', + ]) + + sample_summary, new_sample_ids, url = _basic_notify_search_data_loaded( + project, dataset_type, sample_type, inactivated_sample_guids, updated_samples, format_email=format_email, + ) + + sample_id_list = f'\n```{", ".join(sorted(new_sample_ids))}```' if is_internal else '' + summary_message = f'{sample_summary} are loaded in {url}{sample_id_list}' safe_post_to_slack( SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL if is_internal else SEQR_SLACK_ANVIL_DATA_LOADING_CHANNEL, summary_message) - project_link = f'{project.name}' - if is_internal: - email = f'This is to notify you that {sample_summary} have been loaded in seqr project {project_link}' - else: + if not is_internal: AirtableSession(user=None, base=AirtableSession.ANVIL_BASE, no_auth=True).safe_patch_records( ANVIL_REQUEST_TRACKING_TABLE, max_records=1, record_or_filters={'Status': ['Loading', 'Loading Requested']}, record_and_filters={'AnVIL Project URL': url}, update={'Status': 'Available in Seqr'}, ) - workspace_name = f'{project.workspace_namespace}/{project.workspace_name}' - reload_summary = f' and {num_samples - num_new_samples} re-loaded samples' if num_samples > num_new_samples else '' - email = '\n'.join([ - f'We are following up on the request to load data from AnVIL on {project.created_date.date().strftime("%B %d, %Y")}.', - f'We have loaded {sample_summary}{reload_summary} from the AnVIL workspace {workspace_name} to the corresponding seqr project {project_link}.', - 'Let us know if you have any questions.', - ]) - - send_project_notification( - project, - notification=f'Loaded {sample_summary}', - email=email, - subject='New data available in seqr', - ) From 1226fd3d94842098bf3c7dbfca9271fec0f822d7 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 26 Aug 2024 18:36:55 -0400 Subject: [PATCH 678/736] update tests --- seqr/utils/communication_utils.py | 2 +- seqr/views/apis/dataset_api_tests.py | 64 +++++++--------------------- 2 files changed, 16 insertions(+), 50 deletions(-) diff --git a/seqr/utils/communication_utils.py b/seqr/utils/communication_utils.py index 366807d947..25291ebf84 100644 --- a/seqr/utils/communication_utils.py +++ b/seqr/utils/communication_utils.py @@ -67,7 +67,7 @@ def send_project_notification(project, notification, email, subject): try: send_html_email(**email_kwargs) except Exception as e: - logger.error(f'Error sending project email for {project.guid}: {e}', detail=email_kwargs) + logger.error(f'Error sending project email for {project.guid}: {e}', extra={'detail': email_kwargs}) def _set_bulk_notification_stream(message): diff --git a/seqr/views/apis/dataset_api_tests.py b/seqr/views/apis/dataset_api_tests.py index 9c3d029212..721018a6f6 100644 --- a/seqr/views/apis/dataset_api_tests.py +++ b/seqr/views/apis/dataset_api_tests.py @@ -4,7 +4,6 @@ from datetime import datetime from django.urls.base import reverse from io import StringIO -import responses from seqr.models import Sample, Family from seqr.views.apis.dataset_api import add_variants_dataset_handler @@ -41,25 +40,17 @@ MOCK_OPEN = mock.MagicMock() MOCK_FILE_ITER = MOCK_OPEN.return_value.__enter__.return_value.__iter__ -MOCK_AIRTABLE_URL = 'http://testairtable' -MOCK_RECORD_ID = 'recH4SEO1CeoIlOiE' -MOCK_RECORDS = {'records': [{'id': MOCK_RECORD_ID, 'fields': {'Status': 'Loading'}}]} - @mock.patch('seqr.utils.redis_utils.redis.StrictRedis', lambda **kwargs: MOCK_REDIS) @mock.patch('seqr.utils.file_utils.open', MOCK_OPEN) class DatasetAPITest(object): @mock.patch('seqr.models.random.randint') - @mock.patch('seqr.utils.search.add_data_utils.safe_post_to_slack') + @mock.patch('seqr.utils.communication_utils.logger') @mock.patch('seqr.utils.communication_utils.send_html_email') - @mock.patch('seqr.views.utils.airtable_utils.AIRTABLE_URL', MOCK_AIRTABLE_URL) @mock.patch('seqr.utils.search.add_data_utils.BASE_URL', 'https://seqr.broadinstitute.org/') - @mock.patch('seqr.utils.search.add_data_utils.SEQR_SLACK_ANVIL_DATA_LOADING_CHANNEL', 'anvil-data-loading') - @mock.patch('seqr.utils.search.add_data_utils.SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL', 'seqr-data-loading') @urllib3_responses.activate - @responses.activate - def test_add_variants_dataset(self, mock_send_email, mock_send_slack, mock_random): + def test_add_variants_dataset(self, mock_send_email, mock_logger, mock_random): url = reverse(add_variants_dataset_handler, args=[PROJECT_GUID]) self.check_data_manager_login(url) @@ -81,12 +72,6 @@ def test_add_variants_dataset(self, mock_send_email, mock_send_slack, mock_rando mock_random.return_value = 98765432101234567890 - airtable_tracking_url = f'{MOCK_AIRTABLE_URL}/appUelDNM3BnWaR7M/AnVIL%20Seqr%20Loading%20Requests%20Tracking' - responses.add( - responses.GET, - airtable_tracking_url + "?fields[]=Status&pageSize=2&filterByFormula=AND({AnVIL Project URL}='https://seqr.broadinstitute.org/project/R0004_non_analyst_project/project_page',OR(Status='Loading',Status='Loading Requested'))", - json=MOCK_RECORDS) - urllib3_responses.add_json('/{}/_mapping'.format(INDEX_NAME), MAPPING_JSON) urllib3_responses.add_json('/{}/_search?size=0'.format(INDEX_NAME), {'aggregations': { 'sample_ids': {'buckets': [{'key': 'NA19675'}, {'key': 'NA19679'}, {'key': 'NA19678_1'}, {'key': 'NA20878'}]} @@ -159,12 +144,11 @@ def test_add_variants_dataset(self, mock_send_email, mock_send_slack, mock_rando self.assertTrue(existing_index_sample_model.is_active) self.assertTrue(str(existing_index_sample_model.loaded_date).startswith('2017-02-05')) - self._assert_expected_notification(mock_send_email, mock_send_slack, sample_type='WES', count=2, samples='NA19679, NA20878') + self._assert_expected_notification(mock_send_email, sample_type='WES', count=2) # Adding an SV index works additively with the regular variants index mock_random.return_value = 1234567 mock_send_email.reset_mock() - mock_send_slack.reset_mock() urllib3_responses.add_json('/{}/_mapping'.format(SV_INDEX_NAME), { SV_INDEX_NAME: {'mappings': {'_meta': { 'sampleType': 'WES', @@ -206,13 +190,11 @@ def test_add_variants_dataset(self, mock_send_email, mock_send_slack, mock_rando {sample.guid for sample in sample_models}) self.assertSetEqual({True}, {sample.is_active for sample in sample_models}) - self._assert_expected_notification(mock_send_email, mock_send_slack, sample_type='WES SV', count=1, samples='NA19675_1') - self.assertEqual(len(responses.calls), 0) + self._assert_expected_notification(mock_send_email, sample_type='WES SV', count=1) # Adding an index for a different sample type works additively mock_random.return_value = 987654 mock_send_email.reset_mock() - mock_send_slack.reset_mock() urllib3_responses.add_json('/{}/_mapping'.format(NEW_SAMPLE_TYPE_INDEX_NAME), { 'sub_index_1': {'mappings': {'_meta': { 'sampleType': 'WGS', @@ -247,7 +229,7 @@ def test_add_variants_dataset(self, mock_send_email, mock_send_slack, mock_rando self.assertSetEqual(set(response_json['individualsByGuid']['I000001_na19675']['sampleGuids']), {sv_sample_guid, existing_index_sample_guid, new_sample_type_sample_guid}) - self._assert_expected_notification(mock_send_email, mock_send_slack, sample_type='WGS', count=1, samples='NA19675_1') + self._assert_expected_notification(mock_send_email, sample_type='WGS', count=1) # Previous variant samples should still be active sample_models = Sample.objects.filter(individual__guid='I000001_na19675') @@ -266,46 +248,32 @@ def test_add_variants_dataset(self, mock_send_email, mock_send_slack, mock_rando }}, method=urllib3_responses.POST) mock_send_email.reset_mock() - mock_send_slack.reset_mock() + mock_send_email.side_effect = Exception('Email server is not configured') response = self.client.post(url, content_type='application/json', data=json.dumps({ 'elasticsearchIndex': INDEX_NAME, 'datasetType': 'SNV_INDEL', })) self.assertEqual(response.status_code, 200) - additional_kwargs = {'samples': 'NA21234'} - if not self.ANVIL_DISABLED: - namespace_path = 'ext-data/anvil-non-analyst-project 1000 Genomes Demo' - additional_kwargs['email_content'] = """We are following up on the request to load data from AnVIL on March 12, 2017. -We have loaded 1 new WES samples from the AnVIL workspace {anvil_link} to the corresponding seqr project {seqr_link}. -Let us know if you have any questions.""".format( - anvil_link=f'{namespace_path}', - seqr_link=f'Non-Analyst Project', - ) - additional_kwargs.update({'slack_channel': 'anvil-data-loading','samples': None}) - - self.assertEqual(responses.calls[1].request.url, f'{airtable_tracking_url}/{MOCK_RECORD_ID}') - self.assertEqual(responses.calls[1].request.method, 'PATCH') - self.assertDictEqual(json.loads(responses.calls[1].request.body), {'fields': {'Status': 'Available in Seqr'}}) - self._assert_expected_notification( - mock_send_email, mock_send_slack, sample_type='WES', count=1, project_guid=NON_ANALYST_PROJECT_GUID, - project_name='Non-Analyst Project', recipient='test_user_collaborator@test.com', **additional_kwargs, + mock_send_email, sample_type='WES', count=1, project_guid=NON_ANALYST_PROJECT_GUID, + project_name='Non-Analyst Project', recipient='test_user_collaborator@test.com', ) + mock_logger.error.assert_called_with( + 'Error sending project email for R0004_non_analyst_project: Email server is not configured', extra={'detail': { + 'email_body': mock.ANY, 'process_message': mock.ANY, + 'subject': 'New data available in seqr', 'to': ['test_user_collaborator@test.com'], + }}) - def _assert_expected_notification(self, mock_send_email, mock_send_slack, sample_type, count, samples, email_content=None, + def _assert_expected_notification(self, mock_send_email, sample_type, count, email_content=None, project_guid=PROJECT_GUID, project_name='1kg project nåme with uniçøde', - recipient='test_user_manager@test.com', slack_channel='seqr-data-loading'): + recipient='test_user_manager@test.com'): if not email_content: email_content = f'This is to notify you that {count} new {sample_type} samples have been loaded in seqr project {project_name}' mock_send_email.assert_called_once_with( email_body=f'Dear seqr user,\n\n{email_content}\n\nAll the best,\nThe seqr team', subject='New data available in seqr', to=[recipient], process_message=mock.ANY, ) - slack_message = f'{count} new {sample_type} samples are loaded in {SEQR_URL}/project/{project_guid}/project_page' - if samples: - slack_message = f'{slack_message}\n```{samples}```' - mock_send_slack.assert_called_with(slack_channel, slack_message) @urllib3_responses.activate def test_add_variants_dataset_errors(self): @@ -477,7 +445,6 @@ def _assert_expected_add_dataset_errors(self, url): # Tests for AnVIL access disabled class LocalDatasetAPITest(AuthenticationTestCase, DatasetAPITest): fixtures = ['users', '1kg_project'] - ANVIL_DISABLED = True def assert_no_anvil_calls(self): @@ -489,7 +456,6 @@ def assert_no_anvil_calls(self): # Test for permissions from AnVIL only class AnvilDatasetAPITest(AnvilAuthenticationTestCase, DatasetAPITest): fixtures = ['users', 'social_auth', '1kg_project'] - ANVIL_DISABLED = False def _assert_expected_add_dataset_errors(self, url): response = self.client.post(url, content_type='application/json', data=ADD_DATASET_PAYLOAD) From 83375beadce39799ca0df0f435d82ce273932fd6 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 27 Aug 2024 11:09:14 -0400 Subject: [PATCH 679/736] move is_internal computation --- .../commands/check_for_new_samples_from_pipeline.py | 6 ++++-- seqr/utils/search/add_data_utils.py | 7 +------ 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/seqr/management/commands/check_for_new_samples_from_pipeline.py b/seqr/management/commands/check_for_new_samples_from_pipeline.py index 829ad706b8..ffa517cba3 100644 --- a/seqr/management/commands/check_for_new_samples_from_pipeline.py +++ b/seqr/management/commands/check_for_new_samples_from_pipeline.py @@ -15,6 +15,7 @@ from seqr.utils.search.hail_search_utils import hail_variant_multi_lookup, search_data_type from seqr.views.utils.airtable_utils import AirtableSession, LOADABLE_PDO_STATUSES, AVAILABLE_PDO_STATUS from seqr.views.utils.dataset_utils import match_and_update_search_samples +from seqr.views.utils.permissions_utils import is_internal_anvil_project, project_has_anvil from seqr.views.utils.variant_utils import reset_cached_search_results, update_projects_saved_variant_json, \ get_saved_variants from settings import SEQR_SLACK_LOADING_NOTIFICATION_CHANNEL, BASE_URL @@ -111,8 +112,9 @@ def handle(self, *args, **options): session = AirtableSession(user=None, no_auth=True) for project, sample_ids in samples_by_project.items(): project_sample_data = update_sample_data_by_project[project.id] - is_internal = notify_search_data_loaded( - project, dataset_type, sample_type, inactivated_sample_guids, + is_internal = not project_has_anvil(project) or is_internal_anvil_project(project) + notify_search_data_loaded( + project, is_internal, dataset_type, sample_type, inactivated_sample_guids, updated_samples=project_sample_data['samples'], num_samples=len(sample_ids), ) project_families = project_sample_data['family_guids'] diff --git a/seqr/utils/search/add_data_utils.py b/seqr/utils/search/add_data_utils.py index 518a09faa0..e6035d2e39 100644 --- a/seqr/utils/search/add_data_utils.py +++ b/seqr/utils/search/add_data_utils.py @@ -4,7 +4,6 @@ from seqr.utils.search.elasticsearch.es_utils import validate_es_index_metadata_and_get_samples from seqr.views.utils.airtable_utils import AirtableSession, ANVIL_REQUEST_TRACKING_TABLE from seqr.views.utils.dataset_utils import match_and_update_search_samples, load_mapping_file -from seqr.views.utils.permissions_utils import is_internal_anvil_project, project_has_anvil from settings import SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL, BASE_URL, ANVIL_UI_URL, \ SEQR_SLACK_ANVIL_DATA_LOADING_CHANNEL @@ -74,9 +73,7 @@ def _basic_notify_search_data_loaded(project, dataset_type, sample_type, inactiv return sample_summary, new_sample_ids, url -def notify_search_data_loaded(project, dataset_type, sample_type, inactivated_sample_guids, updated_samples, num_samples): - is_internal = not project_has_anvil(project) or is_internal_anvil_project(project) - +def notify_search_data_loaded(project, is_internal, dataset_type, sample_type, inactivated_sample_guids, updated_samples, num_samples): if is_internal: format_email = _format_email else: @@ -106,5 +103,3 @@ def format_email(sample_summary, project_link, num_new_samples): record_and_filters={'AnVIL Project URL': url}, update={'Status': 'Available in Seqr'}, ) - - return is_internal \ No newline at end of file From 8ea93fd270332772654b9cac04df505c22a376b4 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 27 Aug 2024 11:16:55 -0400 Subject: [PATCH 680/736] merge cleanup --- seqr/views/utils/test_utils.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/seqr/views/utils/test_utils.py b/seqr/views/utils/test_utils.py index 7546fcf832..ba39a01adf 100644 --- a/seqr/views/utils/test_utils.py +++ b/seqr/views/utils/test_utils.py @@ -546,9 +546,6 @@ def setUp(self): self.mock_get_group_members = patcher.start() self.mock_get_group_members.side_effect = get_group_members_side_effect self.addCleanup(patcher.stop) - patcher = mock.patch('seqr.views.utils.airtable_utils.AIRTABLE_API_KEY', MOCK_AIRTABLE_KEY) - patcher.start() - self.addCleanup(patcher.stop) super(AnvilAuthenticationTestCase, self).setUp() @classmethod @@ -563,7 +560,6 @@ def assert_no_extra_anvil_calls(self): MOCK_AIRFLOW_URL = 'http://testairflowserver' -MOCK_AIRTABLE_KEY = 'airflow_access' DAG_NAME = 'LOADING_PIPELINE' PROJECT_GUID = 'R0001_1kg' From 99a2613876d6e81a23c14059574f009410e0c375 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 27 Aug 2024 11:20:34 -0400 Subject: [PATCH 681/736] rmove broad-specific message from UI --- ui/pages/DataManagement/components/LoadData.jsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ui/pages/DataManagement/components/LoadData.jsx b/ui/pages/DataManagement/components/LoadData.jsx index d4a32cf448..1b13eaab86 100644 --- a/ui/pages/DataManagement/components/LoadData.jsx +++ b/ui/pages/DataManagement/components/LoadData.jsx @@ -91,7 +91,7 @@ const LoadData = () => ( ) From c681d48b47d0d5316686c1738fe3be607633fc46 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 27 Aug 2024 11:32:02 -0400 Subject: [PATCH 682/736] basic updates for load_data endpoint --- seqr/views/apis/anvil_workspace_api.py | 4 ++-- seqr/views/apis/data_manager_api.py | 7 ++++--- seqr/views/utils/airflow_utils.py | 8 ++++---- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/seqr/views/apis/anvil_workspace_api.py b/seqr/views/apis/anvil_workspace_api.py index 49fb942c2a..449b5c9d93 100644 --- a/seqr/views/apis/anvil_workspace_api.py +++ b/seqr/views/apis/anvil_workspace_api.py @@ -18,7 +18,7 @@ from seqr.views.utils.airtable_utils import AirtableSession, ANVIL_REQUEST_TRACKING_TABLE from seqr.utils.search.constants import VCF_FILE_EXTENSIONS from seqr.utils.search.utils import get_search_samples -from seqr.views.utils.airflow_utils import trigger_data_loading +from seqr.views.utils.airflow_utils import trigger_airflow_data_loading from seqr.views.utils.json_to_orm_utils import create_model_from_json from seqr.views.utils.json_utils import create_json_response from seqr.views.utils.file_utils import load_uploaded_file @@ -302,7 +302,7 @@ def _trigger_add_workspace_data(project, pedigree_records, user, data_path, samp success_message = f""" *{user.email}* requested to load {num_updated_individuals} new{reload_summary} {sample_type} samples ({GENOME_VERSION_LOOKUP.get(project.genome_version)}) from AnVIL workspace *{project.workspace_namespace}/{project.workspace_name}* at {data_path} to seqr project <{_get_seqr_project_url(project)}|*{project.name}*> (guid: {project.guid})""" - trigger_success = trigger_data_loading( + trigger_success = trigger_airflow_data_loading( [project], sample_type, Sample.DATASET_TYPE_VARIANT_CALLS, data_path, user, success_message, SEQR_SLACK_ANVIL_DATA_LOADING_CHANNEL, f'ERROR triggering AnVIL loading for project {project.guid}', genome_version=project.genome_version, diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index b233b1ce96..20dcc5e83f 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -21,7 +21,7 @@ from seqr.utils.middleware import ErrorsWarningsException from seqr.utils.vcf_utils import validate_vcf_exists -from seqr.views.utils.airflow_utils import trigger_data_loading, write_data_loading_pedigree +from seqr.views.utils.airflow_utils import trigger_airflow_data_loading, write_data_loading_pedigree from seqr.views.utils.airtable_utils import AirtableSession from seqr.views.utils.dataset_utils import load_rna_seq, load_phenotype_prioritization_data_file, RNA_DATA_TYPE_CONFIGS, \ post_process_rna_data @@ -524,15 +524,16 @@ def load_data(request): additional_project_files = None individual_ids = None - if dataset_type == Sample.DATASET_TYPE_VARIANT_CALLS: + if dataset_type == Sample.DATASET_TYPE_VARIANT_CALLS and AirtableSession.is_airtable_enabled(): individual_ids = _get_valid_project_samples(project_samples, sample_type, request.user) additional_project_files = { project_guid: (f'{project_guid}_ids', ['s'], [{'s': sample_id} for sample_id in sample_ids]) for project_guid, sample_ids in project_samples.items() } + # TODO add support for local trigger success_message = f'*{request.user.email}* triggered loading internal {sample_type} {dataset_type} data for {len(projects)} projects' - trigger_data_loading( + trigger_airflow_data_loading( project_models, sample_type, dataset_type, request_json['filePath'], request.user, success_message, SEQR_SLACK_LOADING_NOTIFICATION_CHANNEL, f'ERROR triggering internal {sample_type} {dataset_type} loading', is_internal=True, individual_ids=individual_ids, additional_project_files=additional_project_files, diff --git a/seqr/views/utils/airflow_utils.py b/seqr/views/utils/airflow_utils.py index c8a84de65a..3d4ee9cbfa 100644 --- a/seqr/views/utils/airflow_utils.py +++ b/seqr/views/utils/airflow_utils.py @@ -26,10 +26,10 @@ class DagRunningException(Exception): pass -def trigger_data_loading(projects: list[Project], sample_type: str, dataset_type: str, data_path: str, user: User, - success_message: str, success_slack_channel: str, error_message: str, - genome_version: str = GENOME_VERSION_GRCh38, is_internal: bool = False, - individual_ids: list[str] = None, additional_project_files: dict = None): +def trigger_airflow_data_loading(projects: list[Project], sample_type: str, dataset_type: str, data_path: str, user: User, + success_message: str, success_slack_channel: str, error_message: str, + genome_version: str = GENOME_VERSION_GRCh38, is_internal: bool = False, + individual_ids: list[str] = None, additional_project_files: dict = None): success = True project_guids = sorted([p.guid for p in projects]) From f3a554afd35e010c0a78d9e7eeabd7cab4d81bb4 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 27 Aug 2024 11:41:34 -0400 Subject: [PATCH 683/736] do not write sample id subset file --- seqr/views/apis/data_manager_api.py | 7 +------ seqr/views/utils/airflow_utils.py | 20 +++++--------------- 2 files changed, 6 insertions(+), 21 deletions(-) diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index 5578ec10c1..27d387c755 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -515,20 +515,15 @@ def load_data(request): missing = sorted(set(project_samples.keys()) - {p.guid for p in project_models}) return create_json_response({'error': f'The following projects are invalid: {", ".join(missing)}'}, status=400) - additional_project_files = None individual_ids = None if dataset_type == Sample.DATASET_TYPE_VARIANT_CALLS: individual_ids = _get_valid_project_samples(project_samples, sample_type, request.user) - additional_project_files = { - project_guid: (f'{project_guid}_ids', ['s'], [{'s': sample_id} for sample_id in sample_ids]) - for project_guid, sample_ids in project_samples.items() - } success_message = f'*{request.user.email}* triggered loading internal {sample_type} {dataset_type} data for {len(projects)} projects' trigger_data_loading( project_models, sample_type, dataset_type, request_json['filePath'], request.user, success_message, SEQR_SLACK_LOADING_NOTIFICATION_CHANNEL, f'ERROR triggering internal {sample_type} {dataset_type} loading', - is_internal=True, individual_ids=individual_ids, additional_project_files=additional_project_files, + is_internal=True, individual_ids=individual_ids, ) return create_json_response({'success': True}) diff --git a/seqr/views/utils/airflow_utils.py b/seqr/views/utils/airflow_utils.py index c8a84de65a..523258dfe6 100644 --- a/seqr/views/utils/airflow_utils.py +++ b/seqr/views/utils/airflow_utils.py @@ -29,7 +29,7 @@ class DagRunningException(Exception): def trigger_data_loading(projects: list[Project], sample_type: str, dataset_type: str, data_path: str, user: User, success_message: str, success_slack_channel: str, error_message: str, genome_version: str = GENOME_VERSION_GRCh38, is_internal: bool = False, - individual_ids: list[str] = None, additional_project_files: dict = None): + individual_ids: list[str] = None): success = True project_guids = sorted([p.guid for p in projects]) @@ -44,7 +44,7 @@ def trigger_data_loading(projects: list[Project], sample_type: str, dataset_type upload_info = _upload_data_loading_files( projects, is_internal, user, genome_version, sample_type, dataset_type=dataset_type, - individual_ids=individual_ids, additional_project_files=additional_project_files, + individual_ids=individual_ids, ) try: @@ -112,7 +112,7 @@ def _dag_dataset_type(sample_type: str, dataset_type: str): def _upload_data_loading_files(projects: list[Project], is_internal: bool, user: User, genome_version: str, sample_type: str, dataset_type: str = None, callset: str = 'Internal', - individual_ids: list[str] = None, additional_project_files: dict = None): + individual_ids: list[str] = None): file_annotations = OrderedDict({ 'Project_GUID': F('family__project__guid'), 'Family_GUID': F('family__guid'), 'Family_ID': F('family__family_id'), @@ -132,8 +132,8 @@ def _upload_data_loading_files(projects: list[Project], is_internal: bool, for project_guid, rows in data_by_project.items(): gs_path = _get_dag_project_gs_path(project_guid, genome_version, sample_type, is_internal, callset) try: - files, file_suffixes = _parse_project_upload_files(project_guid, rows, file_annotations.keys(), additional_project_files) - write_multiple_files_to_gs(files, gs_path, user, file_format='tsv', file_suffixes=file_suffixes) + files = [(f'{project_guid}_pedigree', file_annotations.keys(), rows)] + write_multiple_files_to_gs(files, gs_path, user, file_format='tsv') if dataset_type: additional_gs_path = _get_gs_pedigree_path(genome_version, sample_type, dataset_type) run_gsutil_with_wait(f'rsync -r {gs_path}', additional_gs_path, user) @@ -144,16 +144,6 @@ def _upload_data_loading_files(projects: list[Project], is_internal: bool, return info -def _parse_project_upload_files(project_guid, rows, header, additional_project_files): - files = [(f'{project_guid}_pedigree', header, rows)] - file_suffixes = None - additional_file = additional_project_files and additional_project_files.get(project_guid) - if additional_file: - files.append(additional_file) - file_suffixes = {additional_file[0]: 'txt'} - return files, file_suffixes - - def _get_dag_project_gs_path(project: str, genome_version: str, sample_type: str, is_internal: bool, callset: str): dag_name = f'RDG_{sample_type}_Broad_{callset}' if is_internal else f'AnVIL_{sample_type}' dag_path = f'{SEQR_V2_DATASETS_GS_PATH}/{GENOME_VERSION_LOOKUP[genome_version]}/{dag_name}' From dff157961d8eb79c84b2a6536797a586e93faf78 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 27 Aug 2024 13:42:48 -0400 Subject: [PATCH 684/736] only upload pedigrees to one location --- seqr/views/apis/anvil_workspace_api_tests.py | 15 +++------ seqr/views/apis/data_manager_api_tests.py | 33 ++++++-------------- seqr/views/utils/airflow_utils.py | 32 +++++++++---------- 3 files changed, 28 insertions(+), 52 deletions(-) diff --git a/seqr/views/apis/anvil_workspace_api_tests.py b/seqr/views/apis/anvil_workspace_api_tests.py index a72939cac6..eabb0b37d2 100644 --- a/seqr/views/apis/anvil_workspace_api_tests.py +++ b/seqr/views/apis/anvil_workspace_api_tests.py @@ -533,9 +533,6 @@ def setUp(self): self.mock_mv_file = patcher.start() self.mock_mv_file.return_value = True self.addCleanup(patcher.stop) - patcher = mock.patch('seqr.views.utils.airflow_utils.run_gsutil_with_wait') - self.mock_gsutil = patcher.start() - self.addCleanup(patcher.stop) patcher = mock.patch('seqr.views.utils.export_utils.TemporaryDirectory') mock_tempdir = patcher.start() mock_tempdir.return_value.__enter__.return_value = TEMP_PATH @@ -757,15 +754,11 @@ def _assert_valid_operation(self, project, test_add_data=True): '\n'.join(['\t'.join(row) for row in [header] + rows]) ) - gs_path = f'gs://seqr-datasets/v02/{genome_version}/AnVIL_WES/{project.guid}/base/' + gs_path = f'gs://seqr-loading-temp/v3.1/{genome_version}/SNV_INDEL/pedigrees/WES/' self.mock_mv_file.assert_called_with( f'{TEMP_PATH}/*', gs_path, self.manager_user ) - self.mock_gsutil.assert_called_with( - f'rsync -r {gs_path}', f'gs://seqr-loading-temp/v3.1/{genome_version}/SNV_INDEL/pedigrees/WES/', self.manager_user, - ) - self.assert_airflow_calls(additional_tasks_check=test_add_data) # create airtable record @@ -794,7 +787,7 @@ def _assert_valid_operation(self, project, test_add_data=True): *test_user_manager@test.com* requested to load {sample_summary} WES samples ({version}) from AnVIL workspace *my-seqr-billing/{workspace_name}* at gs://test_bucket/test_path.vcf to seqr project (guid: {guid}) - Pedigree file has been uploaded to gs://seqr-datasets/v02/{version}/AnVIL_WES/{guid}/base/ + Pedigree files have been uploaded to gs://seqr-loading-temp/v3.1/{version}/SNV_INDEL/pedigrees/WES/ DAG LOADING_PIPELINE is triggered with following: ```{dag}``` @@ -848,8 +841,8 @@ def _test_mv_file_and_triggering_dag_exception(self, url, workspace, sample_data project = Project.objects.get(**workspace) self.mock_airflow_logger.error.assert_called_with( - 'Uploading Pedigree to Google Storage failed. Errors: Something wrong while moving the file.', - self.manager_user, detail=sample_data) + 'Uploading Pedigrees to Google Storage failed. Errors: Something wrong while moving the file.', + self.manager_user, detail={f'{project.guid}_pedigree': sample_data}) self.mock_api_logger.error.assert_not_called() self.mock_airflow_logger.warning.assert_called_with( 'LOADING_PIPELINE DAG is running and cannot be triggered again.', self.manager_user) diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index 68fbcf59d2..eaf5ad9ea8 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -1630,9 +1630,7 @@ def test_load_data(self, mock_temp_dir, mock_open): }""" message = f"""*test_pm_user@test.com* triggered loading internal WGS MITO data for 2 projects - Pedigree file has been uploaded to gs://seqr-datasets/v02/GRCh38/RDG_WGS_Broad_Internal/base/projects/R0001_1kg/ - - Pedigree file has been uploaded to gs://seqr-datasets/v02/GRCh38/RDG_WGS_Broad_Internal/base/projects/R0004_non_analyst_project/ + Pedigree files have been uploaded to gs://seqr-loading-temp/v3.1/GRCh38/MITO/pedigrees/WGS/ DAG LOADING_PIPELINE is triggered with following: ```{dag_json}``` @@ -1643,6 +1641,7 @@ def test_load_data(self, mock_temp_dir, mock_open): self.set_dag_trigger_error_response(status=400) self.mock_authorized_session.reset_mock() self.mock_slack.reset_mock() + self.mock_subprocess.reset_mock() mock_open.reset_mock() responses.calls.reset() mock_subprocess.reset_mock() @@ -1710,6 +1709,7 @@ def test_load_data(self, mock_temp_dir, mock_open): fields=['SeqrCollaboratorSampleID', 'PDOStatus', 'SeqrProject'], ) + self.mock_subprocess.reset_mock() mock_subprocess.reset_mock() responses.calls.reset() responses.add(responses.GET, airtable_samples_url, json=AIRTABLE_SAMPLE_RECORDS, status=200) @@ -1736,32 +1736,19 @@ def _has_expected_gs_calls(self, mock_open, dataset_type, sample_type='WGS', has [row.split('\t') for row in write_call.args[0].split('\n')] for write_call in mock_open.return_value.__enter__.return_value.write.call_args_list ] - self.assertEqual(len(files), 4 if has_project_subset else 2) - if has_project_subset: - self.assertEqual(len(files[1]), 4) - self.assertListEqual(files[1], [['s'], ['NA19675_1'], ['NA19679'], ['NA19678']]) - self.assertEqual(len(files[3]), 3) - self.assertListEqual(files[3], [['s'], ['NA21234'], ['NA21987']]) + self.assertEqual(len(files), 2) num_rows = 4 if has_project_subset else 15 self.assertEqual(len(files[0]), num_rows) self.assertListEqual(files[0][:5], [PEDIGREE_HEADER] + EXPECTED_PEDIGREE_ROWS[:num_rows-1]) - ped_file = files[2 if has_project_subset else 1] - self.assertEqual(len(ped_file), 3) - self.assertListEqual(ped_file, [ + self.assertEqual(len(files[1]), 3) + self.assertListEqual(files[1], [ PEDIGREE_HEADER, ['R0004_non_analyst_project', 'F000014_14', '14', 'NA21234', '', '', 'F'], ['R0004_non_analyst_project', 'F000014_14', '14', 'NA21987', '', '', 'M'], ]) - self.mock_subprocess.assert_has_calls([ - mock.call( - f'gsutil mv /mock/tmp/* gs://seqr-datasets/v02/GRCh38/RDG_{sample_type}_Broad_Internal/base/projects/{project}/', - stdout=-1, stderr=-2, shell=True, # nosec - ) for project in self.PROJECTS - ] + [ - mock.call( - f'gsutil rsync -r gs://seqr-datasets/v02/GRCh38/RDG_{sample_type}_Broad_Internal/base/projects/{project}/ gs://seqr-loading-temp/v3.1/GRCh38/{dataset_type}/pedigrees/{sample_type}/', - stdout=-1, stderr=-2, shell=True, # nosec - ) for project in self.PROJECTS - ], any_order=True) + self.mock_subprocess.assert_called_once_with( + f'gsutil mv /mock/tmp/* gs://seqr-loading-temp/v3.1/GRCh38/{dataset_type}/pedigrees/{sample_type}/', + stdout=-1, stderr=-2, shell=True, # nosec + ) diff --git a/seqr/views/utils/airflow_utils.py b/seqr/views/utils/airflow_utils.py index 523258dfe6..5bfa051cc4 100644 --- a/seqr/views/utils/airflow_utils.py +++ b/seqr/views/utils/airflow_utils.py @@ -9,7 +9,7 @@ from reference_data.models import GENOME_VERSION_GRCh38, GENOME_VERSION_LOOKUP from seqr.models import Individual, Sample, Project from seqr.utils.communication_utils import safe_post_to_slack -from seqr.utils.file_utils import does_file_exist, run_gsutil_with_wait +from seqr.utils.file_utils import does_file_exist from seqr.utils.logging_utils import SeqrLogger from seqr.views.utils.export_utils import write_multiple_files_to_gs from settings import AIRFLOW_WEBSERVER_URL, SEQR_SLACK_LOADING_NOTIFICATION_CHANNEL @@ -42,10 +42,7 @@ def trigger_data_loading(projects: list[Project], sample_type: str, dataset_type 'reference_genome': GENOME_VERSION_LOOKUP[genome_version], } - upload_info = _upload_data_loading_files( - projects, is_internal, user, genome_version, sample_type, dataset_type=dataset_type, - individual_ids=individual_ids, - ) + upload_info = _upload_data_loading_files(projects, user, genome_version, sample_type, dataset_type, individual_ids) try: _check_dag_running_state() @@ -64,6 +61,7 @@ def trigger_data_loading(projects: list[Project], sample_type: str, dataset_type def write_data_loading_pedigree(project: Project, user: User): + # TODO fix all of this match = next(( (callset, sample_type) for callset, sample_type in itertools.product(['Internal', 'External', 'AnVIL'], ['WGS', 'WES']) if does_file_exist(_get_dag_project_gs_path( @@ -110,8 +108,7 @@ def _dag_dataset_type(sample_type: str, dataset_type: str): else dataset_type -def _upload_data_loading_files(projects: list[Project], is_internal: bool, - user: User, genome_version: str, sample_type: str, dataset_type: str = None, callset: str = 'Internal', +def _upload_data_loading_files(projects: list[Project], user: User, genome_version: str, sample_type: str, dataset_type: str, individual_ids: list[str] = None): file_annotations = OrderedDict({ 'Project_GUID': F('family__project__guid'), 'Family_GUID': F('family__guid'), @@ -129,17 +126,16 @@ def _upload_data_loading_files(projects: list[Project], is_internal: bool, data_by_project[row.pop('project')].append(row) info = [] - for project_guid, rows in data_by_project.items(): - gs_path = _get_dag_project_gs_path(project_guid, genome_version, sample_type, is_internal, callset) - try: - files = [(f'{project_guid}_pedigree', file_annotations.keys(), rows)] - write_multiple_files_to_gs(files, gs_path, user, file_format='tsv') - if dataset_type: - additional_gs_path = _get_gs_pedigree_path(genome_version, sample_type, dataset_type) - run_gsutil_with_wait(f'rsync -r {gs_path}', additional_gs_path, user) - except Exception as e: - logger.error(f'Uploading Pedigree to Google Storage failed. Errors: {e}', user, detail=rows) - info.append(f'Pedigree file has been uploaded to {gs_path}') + header = list(file_annotations.keys()) + files = [(f'{project_guid}_pedigree', header, rows) for project_guid, rows in data_by_project.items()] + gs_path = _get_gs_pedigree_path(genome_version, sample_type, dataset_type) + try: + write_multiple_files_to_gs(files, gs_path, user, file_format='tsv') + except Exception as e: + logger.error(f'Uploading Pedigrees to Google Storage failed. Errors: {e}', user, detail={ + project: rows for project, _, rows in files + }) + info.append(f'Pedigree files have been uploaded to {gs_path}') return info From 663710e826fd3946ead294a2ba0e4a614d1986c2 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 27 Aug 2024 13:49:53 -0400 Subject: [PATCH 685/736] remove write pedigree functionality --- seqr/urls.py | 3 +- seqr/views/apis/data_manager_api.py | 13 +--- seqr/views/apis/data_manager_api_tests.py | 72 +------------------ seqr/views/utils/airflow_utils.py | 26 +------ ui/pages/DataManagement/DataManagement.jsx | 2 - .../components/WritePedigree.jsx | 21 ------ 6 files changed, 4 insertions(+), 133 deletions(-) delete mode 100644 ui/pages/DataManagement/components/WritePedigree.jsx diff --git a/seqr/urls.py b/seqr/urls.py index a56da19889..52e180c1ae 100644 --- a/seqr/urls.py +++ b/seqr/urls.py @@ -121,7 +121,7 @@ forgot_password from seqr.views.apis.data_manager_api import elasticsearch_status, upload_qc_pipeline_output, delete_index, \ - update_rna_seq, load_rna_seq_sample_data, proxy_to_kibana, load_phenotype_prioritization_data, write_pedigree, \ + update_rna_seq, load_rna_seq_sample_data, proxy_to_kibana, load_phenotype_prioritization_data, \ validate_callset, get_loaded_projects, load_data from seqr.views.apis.report_api import \ anvil_export, \ @@ -333,7 +333,6 @@ 'data_management/update_rna_seq': update_rna_seq, 'data_management/load_rna_seq_sample/(?P[^/]+)': load_rna_seq_sample_data, 'data_management/load_phenotype_prioritization_data': load_phenotype_prioritization_data, - 'data_management/write_pedigree/(?P[^/]+)': write_pedigree, 'data_management/validate_callset': validate_callset, 'data_management/loaded_projects/(?P[^/]+)/(?P[^/]+)': get_loaded_projects, 'data_management/load_data': load_data, diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index 27d387c755..6835db449f 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -21,7 +21,7 @@ from seqr.utils.middleware import ErrorsWarningsException from seqr.utils.vcf_utils import validate_vcf_exists -from seqr.views.utils.airflow_utils import trigger_data_loading, write_data_loading_pedigree +from seqr.views.utils.airflow_utils import trigger_data_loading from seqr.views.utils.airtable_utils import AirtableSession, LOADABLE_PDO_STATUSES, AVAILABLE_PDO_STATUS from seqr.views.utils.dataset_utils import load_rna_seq, load_phenotype_prioritization_data_file, RNA_DATA_TYPE_CONFIGS, \ post_process_rna_data, convert_django_meta_to_http_headers @@ -431,17 +431,6 @@ def load_phenotype_prioritization_data(request): }) -@data_manager_required -def write_pedigree(request, project_guid): - project = Project.objects.get(guid=project_guid) - try: - write_data_loading_pedigree(project, request.user) - except ValueError as e: - return create_json_response({'error': str(e)}, status=400) - - return create_json_response({'success': True}) - - DATA_TYPE_FILE_EXTS = { Sample.DATASET_TYPE_MITO_CALLS: ('.mt',), Sample.DATASET_TYPE_SV_CALLS: ('.bed', '.bed.gz'), diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index eaf5ad9ea8..35cfde5199 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -8,7 +8,7 @@ from seqr.utils.communication_utils import _set_bulk_notification_stream from seqr.views.apis.data_manager_api import elasticsearch_status, upload_qc_pipeline_output, delete_index, \ - update_rna_seq, load_rna_seq_sample_data, load_phenotype_prioritization_data, write_pedigree, validate_callset, \ + update_rna_seq, load_rna_seq_sample_data, load_phenotype_prioritization_data, validate_callset, \ get_loaded_projects, load_data from seqr.views.utils.orm_to_json_utils import _get_json_for_models from seqr.views.utils.test_utils import AuthenticationTestCase, AirflowTestCase, AirtableTest @@ -1361,76 +1361,6 @@ def _assert_expected_notifications(mock_send_email, expected_notifs: list[dict], ) mock_send_email.assert_has_calls(calls) - @staticmethod - def _ls_subprocess_calls(file, is_error=True): - calls = [ - mock.call(f'gsutil ls {file}',stdout=-1, stderr=-2, shell=True), # nosec - mock.call().wait(), - ] - if is_error: - calls.append(mock.call().stdout.__iter__()) - return calls - - @mock.patch('seqr.views.utils.export_utils.open') - @mock.patch('seqr.views.utils.export_utils.TemporaryDirectory') - @mock.patch('seqr.utils.file_utils.subprocess.Popen') - def test_write_pedigree(self, mock_subprocess, mock_temp_dir, mock_open): - mock_temp_dir.return_value.__enter__.return_value = '/mock/tmp' - mock_subprocess.return_value.wait.return_value = 1 - - url = reverse(write_pedigree, args=[PROJECT_GUID]) - self.check_data_manager_login(url) - - response = self.client.get(url) - self.assertEqual(response.status_code, 400) - self.assertEqual(response.json()['error'], f'No gs://seqr-datasets/v02 project directory found for {PROJECT_GUID}') - - project_directory_paths = [ - 'gs://seqr-datasets/v02/GRCh37/RDG_WGS_Broad_Internal/base/projects/R0001_1kg/', - 'gs://seqr-datasets/v02/GRCh37/RDG_WES_Broad_Internal/base/projects/R0001_1kg/', - 'gs://seqr-datasets/v02/GRCh37/RDG_WGS_Broad_External/base/projects/R0001_1kg/', - 'gs://seqr-datasets/v02/GRCh37/RDG_WES_Broad_External/base/projects/R0001_1kg/', - 'gs://seqr-datasets/v02/GRCh37/AnVIL_WGS/R0001_1kg/base/', - 'gs://seqr-datasets/v02/GRCh37/AnVIL_WES/R0001_1kg/base/', - ] - expected_calls = [] - for path in project_directory_paths: - expected_calls += self._ls_subprocess_calls(path) - mock_subprocess.assert_has_calls(expected_calls) - - # Test success - self._test_write_success( - 'gs://seqr-datasets/v02/GRCh37/RDG_WES_Broad_Internal/base/projects/R0001_1kg/', - url, mock_subprocess, mock_open, project_directory_paths, - ) - self._test_write_success( - 'gs://seqr-datasets/v02/GRCh37/AnVIL_WES/R0001_1kg/base/', - url, mock_subprocess, mock_open, project_directory_paths, - ) - - def _test_write_success(self, success_path, url, mock_subprocess, mock_open, project_directory_paths): - success_index = project_directory_paths.index(success_path) - mock_subprocess.reset_mock() - mock_subprocess.return_value.wait.side_effect = [1 for _ in range(success_index)] + [0, 0] - response = self.client.get(url) - self.assertEqual(response.status_code, 200) - self.assertDictEqual(response.json(), {'success': True}) - - mock_open.assert_called_with(f'/mock/tmp/{PROJECT_GUID}_pedigree.tsv', 'w') - write_call = mock_open.return_value.__enter__.return_value.write.call_args.args[0] - file = [row.split('\t') for row in write_call.split('\n')] - self.assertEqual(len(file), 15) - self.assertListEqual(file[:5], [PEDIGREE_HEADER] + EXPECTED_PEDIGREE_ROWS) - - expected_calls = [] - for path in project_directory_paths[:success_index]: - expected_calls += self._ls_subprocess_calls(path) - expected_calls += self._ls_subprocess_calls(success_path, is_error=False) + [ - mock.call('gsutil mv /mock/tmp/* ' + success_path, stdout=-1, stderr=-2, shell=True), # nosec - mock.call().wait(), - ] - mock_subprocess.assert_has_calls(expected_calls) - @mock.patch('seqr.utils.file_utils.subprocess.Popen') def test_validate_callset(self, mock_subprocess): url = reverse(validate_callset) diff --git a/seqr/views/utils/airflow_utils.py b/seqr/views/utils/airflow_utils.py index 5bfa051cc4..dfdbbcfd3d 100644 --- a/seqr/views/utils/airflow_utils.py +++ b/seqr/views/utils/airflow_utils.py @@ -3,13 +3,11 @@ from django.db.models import F import google.auth from google.auth.transport.requests import AuthorizedSession -import itertools import json from reference_data.models import GENOME_VERSION_GRCh38, GENOME_VERSION_LOOKUP from seqr.models import Individual, Sample, Project from seqr.utils.communication_utils import safe_post_to_slack -from seqr.utils.file_utils import does_file_exist from seqr.utils.logging_utils import SeqrLogger from seqr.views.utils.export_utils import write_multiple_files_to_gs from settings import AIRFLOW_WEBSERVER_URL, SEQR_SLACK_LOADING_NOTIFICATION_CHANNEL @@ -60,22 +58,6 @@ def trigger_data_loading(projects: list[Project], sample_type: str, dataset_type return success -def write_data_loading_pedigree(project: Project, user: User): - # TODO fix all of this - match = next(( - (callset, sample_type) for callset, sample_type in itertools.product(['Internal', 'External', 'AnVIL'], ['WGS', 'WES']) - if does_file_exist(_get_dag_project_gs_path( - project.guid, project.genome_version, sample_type, is_internal=callset != 'AnVIL', callset=callset, - ))), None) - if not match: - raise ValueError(f'No {SEQR_V2_DATASETS_GS_PATH} project directory found for {project.guid}') - callset, sample_type = match - _upload_data_loading_files( - [project], is_internal=callset != 'AnVIL', user=user, genome_version=project.genome_version, - sample_type=sample_type, callset=callset, - ) - - def _send_load_data_slack_msg(messages: list[str], channel: str, dag: dict): message = '\n\n '.join(messages) message_content = f"""{message} @@ -109,7 +91,7 @@ def _dag_dataset_type(sample_type: str, dataset_type: str): def _upload_data_loading_files(projects: list[Project], user: User, genome_version: str, sample_type: str, dataset_type: str, - individual_ids: list[str] = None): + individual_ids: list[str]): file_annotations = OrderedDict({ 'Project_GUID': F('family__project__guid'), 'Family_GUID': F('family__guid'), 'Family_ID': F('family__family_id'), @@ -140,12 +122,6 @@ def _upload_data_loading_files(projects: list[Project], user: User, genome_versi return info -def _get_dag_project_gs_path(project: str, genome_version: str, sample_type: str, is_internal: bool, callset: str): - dag_name = f'RDG_{sample_type}_Broad_{callset}' if is_internal else f'AnVIL_{sample_type}' - dag_path = f'{SEQR_V2_DATASETS_GS_PATH}/{GENOME_VERSION_LOOKUP[genome_version]}/{dag_name}' - return f'{dag_path}/base/projects/{project}/' if is_internal else f'{dag_path}/{project}/base/' - - def _get_gs_pedigree_path(genome_version: str, sample_type: str, dataset_type: str): return f'{SEQR_V3_PEDIGREE_GS_PATH}/{GENOME_VERSION_LOOKUP[genome_version]}/{dataset_type}/pedigrees/{sample_type}/' diff --git a/ui/pages/DataManagement/DataManagement.jsx b/ui/pages/DataManagement/DataManagement.jsx index 51a31bd36b..39dec92df8 100644 --- a/ui/pages/DataManagement/DataManagement.jsx +++ b/ui/pages/DataManagement/DataManagement.jsx @@ -14,7 +14,6 @@ import RnaSeq from './components/RnaSeq' import SampleQc from './components/SampleQc' import Users from './components/Users' import PhenotypePrioritization from './components/PhenotypePrioritization' -import WritePedigree from './components/WritePedigree' const IFRAME_STYLE = { position: 'fixed', left: '0', top: '95px' } @@ -28,7 +27,6 @@ const DATA_MANAGEMENT_PAGES = [ ...PM_DATA_MANAGEMENT_PAGES, { path: 'sample_qc', component: SampleQc }, { path: 'users', component: Users }, - { path: 'write_pedigree', component: WritePedigree }, { path: 'phenotype_prioritization', component: PhenotypePrioritization }, ] diff --git a/ui/pages/DataManagement/components/WritePedigree.jsx b/ui/pages/DataManagement/components/WritePedigree.jsx deleted file mode 100644 index 5d33dfdabb..0000000000 --- a/ui/pages/DataManagement/components/WritePedigree.jsx +++ /dev/null @@ -1,21 +0,0 @@ -import React from 'react' -import PropTypes from 'prop-types' -import { Button, Segment } from 'semantic-ui-react' - -import DispatchRequestButton from 'shared/components/buttons/DispatchRequestButton' -import ProjectSelector from 'shared/components/page/ProjectSelector' -import { HttpRequestHelper } from 'shared/utils/httpRequestHelper' - -const onSubmit = projectGuid => () => new HttpRequestHelper(`/api/data_management/write_pedigree/${projectGuid}`).get() - -const WritePedigree = ({ project }) => (project ? ( - }> -