Skip to content

Commit e414581

Browse files
author
Oliver Scott
committed
added more basic analysis functions
1 parent c1d4dc0 commit e414581

1 file changed

Lines changed: 102 additions & 4 deletions

File tree

scaffoldgraph/core/graph.py

Lines changed: 102 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,15 @@
88
from collections import Counter
99

1010
import tqdm
11+
import networkx as nx
12+
1113
from loguru import logger
12-
from networkx import DiGraph
1314
from rdkit import RDLogger
1415
from rdkit.Chem import rdMolHash, MolToSmiles, rdmolops
1516
from rdkit.Chem.rdMolDescriptors import CalcNumRings
1617

1718
from scaffoldgraph.io import *
19+
from scaffoldgraph.utils import canonize_smiles
1820
from .fragment import get_murcko_scaffold, get_annotated_murcko_scaffold
1921
from .scaffold import Scaffold
2022

@@ -28,7 +30,7 @@ def init_molecule_name(mol):
2830
mol.SetProp('_Name', n)
2931

3032

31-
class ScaffoldGraph(DiGraph, ABC):
33+
class ScaffoldGraph(nx.DiGraph, ABC):
3234
"""Abstract base class for ScaffoldGraphs"""
3335

3436
def __init__(self, graph=None, fragmenter=None):
@@ -92,29 +94,125 @@ def _recursive_constructor(self, child):
9294
@property
9395
def num_scaffold_nodes(self):
9496
"""Return the number of scaffolds in the graph"""
95-
return len(list(self.get_scaffold_nodes()))
97+
count = 0
98+
for _ in self.get_scaffold_nodes():
99+
count += 1
100+
return count
96101

97102
@property
98103
def num_molecule_nodes(self):
99104
"""Return the number of molecules in the graph"""
100-
return len(list(self.get_molecule_nodes()))
105+
count = 0
106+
for _ in self.get_molecule_nodes():
107+
count += 1
108+
return count
101109

102110
def get_scaffold_nodes(self, data=False):
111+
"""Return a generator of all scaffold nodes in the graph"""
103112
if data is True:
104113
return ((n, self.nodes[n]) for n, d in self.nodes(data='type') if d == 'scaffold')
105114
else:
106115
return (n for n, d in self.nodes(data='type') if d == 'scaffold')
107116

108117
def get_molecule_nodes(self, data=False):
118+
"""Return a generator of all molecule nodes in the graph"""
109119
if data is True:
110120
return ((n, self.nodes[n]) for n, d in self.nodes(data='type') if d == 'molecule')
111121
else:
112122
return (n for n, d in self.nodes(data='type') if d == 'molecule')
113123

114124
def get_hierarchy_sizes(self):
125+
"""Return a collections.Counter object indicating the number of scaffolds
126+
within each hierarchy level"""
115127
hierarchy = (d['hierarchy'] for _, d in self.get_scaffold_nodes(data=True))
116128
return Counter(hierarchy)
117129

130+
def max_hierarchy(self):
131+
"""Return the largest hierarchy level"""
132+
return max(self.get_hierarchy_sizes())
133+
134+
def min_hierarchy(self):
135+
"""Return the smallest hierarchy level"""
136+
return min(self.get_hierarchy_sizes())
137+
138+
def get_scaffolds_in_hierarchy(self, hierarchy):
139+
"""Return a generator of all scaffolds within a specified hierarchy"""
140+
for s, d in self.get_scaffold_nodes(data=True):
141+
if d['hierarchy'] == int(hierarchy):
142+
yield s
143+
144+
def scaffold_in_graph(self, scaffold_smiles):
145+
"""Returns True if specified scaffold SMILES is in the scaffold graph
146+
147+
Parameters
148+
----------
149+
scaffold_smiles : (str) SMILES of query scaffold.
150+
"""
151+
result = scaffold_smiles in self
152+
if result is not True:
153+
scaffold_smiles = canonize_smiles(scaffold_smiles, failsafe=True)
154+
result = scaffold_smiles in self
155+
return result
156+
157+
def molecule_in_graph(self, molecule_id):
158+
"""Returns True if specified molecule ID is in the scaffold graph
159+
160+
Parameters
161+
----------
162+
molecule_id: (str) ID of query molecule.
163+
"""
164+
return str(molecule_id) in self
165+
166+
def get_molecules_for_scaffold(self, scaffold_smiles):
167+
"""Return a list of molecule IDs which are represented by a scaffold in the graph.
168+
169+
Note: This is determined by traversing the graph. In the case of a scaffold tree
170+
the results represent the rules used to prioritize the scaffolds.
171+
172+
Parameters
173+
----------
174+
scaffold_smiles : (str) SMILES of query scaffold.
175+
"""
176+
molecules = []
177+
if scaffold_smiles not in self:
178+
scaffold_smiles = canonize_smiles(scaffold_smiles, failsafe=True)
179+
if scaffold_smiles not in self:
180+
return molecules
181+
for succ in nx.bfs_tree(self, scaffold_smiles, reverse=False):
182+
if self.nodes[succ]['type'] == 'molecule':
183+
molecules.append(succ)
184+
return molecules
185+
186+
def get_scaffolds_for_molecule(self, molecule_id):
187+
"""Return a list of scaffold SMILES connected to a query molecule ID
188+
189+
Parameters
190+
----------
191+
molecule_id: (str) ID of query molecule.
192+
"""
193+
scaffolds = []
194+
if molecule_id not in self:
195+
return scaffolds
196+
for succ in nx.bfs_tree(self, molecule_id, reverse=True):
197+
if self.nodes[succ]['type'] == 'scaffold':
198+
scaffolds.append(succ)
199+
return scaffolds
200+
201+
def separate_disconnected_components(self, sort=False):
202+
"""Separate disconnected components into distinct ScaffoldGraph objects.
203+
204+
Parameters
205+
----------
206+
sort: if True sort components in descending order according
207+
to the number of nodes in the subgraph.
208+
"""
209+
components = []
210+
for c in nx.weakly_connected_components(self):
211+
components.append(self.subgraph(c).copy())
212+
if sort:
213+
return sorted(components, key=len, reverse=True)
214+
return components
215+
118216
def add_molecule_node(self, molecule, **attr):
119217
name = molecule.GetProp('_Name')
120218
default_attr = dict(type='molecule', smiles=MolToSmiles(molecule))

0 commit comments

Comments
 (0)