diff --git a/reference_data/management/commands/update_human_phenotype_ontology.py b/reference_data/management/commands/update_human_phenotype_ontology.py index 76f50893bc..16e3bddcd1 100644 --- a/reference_data/management/commands/update_human_phenotype_ontology.py +++ b/reference_data/management/commands/update_human_phenotype_ontology.py @@ -1,5 +1,6 @@ import logging import os +import re from tqdm import tqdm from django.db import transaction @@ -79,7 +80,11 @@ def parse_obo_file(file_iterator): 'is_category': False, } elif line.startswith("is_a: "): - is_a = value.split(" ! ")[0] + # Match the HPO id directly; OBO modifiers ({...}) and comments (! ...) may appear in any order around it. + match = re.search(r'HP:\d{7}', value) + if not match: + raise ValueError("is_a line missing HPO id: %s" % line) + is_a = match.group(0) if is_a == "HP:0000118": hpo_id_to_record[hpo_id]['is_category'] = True hpo_id_to_record[hpo_id]['parent_id'] = is_a diff --git a/reference_data/management/tests/update_hpo_tests.py b/reference_data/management/tests/update_hpo_tests.py index e3cf107312..3e5b5edad7 100644 --- a/reference_data/management/tests/update_hpo_tests.py +++ b/reference_data/management/tests/update_hpo_tests.py @@ -81,6 +81,21 @@ 'synonym: "Head and neck abnormality" EXACT layperson []\n', 'xref: UMLS:C4021817\n', 'is_a: HP:0000118 ! Phenotypic abnormality\n', + '\n', + '[Term]\n', # is_a with xref and no label + 'id: HP:9999001\n', + 'name: trailer with xref only\n', + 'is_a: HP:0000118 {xref="PMID:31677808"}\n', + '\n', + '[Term]\n', # is_a with label and xref (label then modifier) + 'id: HP:9999002\n', + 'name: trailer with label and xref\n', + 'is_a: HP:0000118 ! Phenotypic abnormality {xref="PMID:31677808"}\n', + '\n', + '[Term]\n', # is_a with xref then label (OBO 1.4 standard ordering) + 'id: HP:9999003\n', + 'name: trailer with xref then label\n', + 'is_a: HP:0000118 {xref="PMID:31677808"} ! Phenotypic abnormality\n', ] EXPECTED_DB_DATA = { @@ -123,7 +138,31 @@ 'parent_id': 'HP:0000001', 'hpo_id': 'HP:0000003', 'category_id': None - } + }, + 'HP:9999001': { + 'is_category': True, + 'definition': None, + 'name': 'trailer with xref only', + 'parent_id': 'HP:0000118', + 'hpo_id': 'HP:9999001', + 'category_id': 'HP:9999001', + }, + 'HP:9999002': { + 'is_category': True, + 'definition': None, + 'name': 'trailer with label and xref', + 'parent_id': 'HP:0000118', + 'hpo_id': 'HP:9999002', + 'category_id': 'HP:9999002', + }, + 'HP:9999003': { + 'is_category': True, + 'definition': None, + 'name': 'trailer with xref then label', + 'parent_id': 'HP:0000118', + 'hpo_id': 'HP:9999003', + 'category_id': 'HP:9999003', + }, } class UpdateHpoTest(TestCase): @@ -152,7 +191,7 @@ def test_update_hpo_command(self, mock_tempfile, mock_logger): call_command('update_human_phenotype_ontology') calls = [ - mock.call('Deleting HumanPhenotypeOntology table with 12 records and creating new table with 5 records'), + mock.call('Deleting HumanPhenotypeOntology table with 12 records and creating new table with 8 records'), mock.call('Done'), ] mock_logger.info.assert_has_calls(calls) @@ -163,7 +202,7 @@ def test_update_hpo_command(self, mock_tempfile, mock_logger): call_command('update_human_phenotype_ontology', tmp_file) calls = [ - mock.call('Deleting HumanPhenotypeOntology table with 5 records and creating new table with 5 records'), + mock.call('Deleting HumanPhenotypeOntology table with 8 records and creating new table with 8 records'), mock.call('Done'), ] mock_logger.info.assert_has_calls(calls)