88from collections import Counter
99
1010import tqdm
11+ import networkx as nx
12+
1113from loguru import logger
12- from networkx import DiGraph
1314from rdkit import RDLogger
1415from rdkit .Chem import rdMolHash , MolToSmiles , rdmolops
1516from rdkit .Chem .rdMolDescriptors import CalcNumRings
1617
1718from scaffoldgraph .io import *
19+ from scaffoldgraph .utils import canonize_smiles
1820from .fragment import get_murcko_scaffold , get_annotated_murcko_scaffold
1921from .scaffold import Scaffold
2022
@@ -28,7 +30,7 @@ def init_molecule_name(mol):
2830 mol .SetProp ('_Name' , n )
2931
3032
31- class ScaffoldGraph (DiGraph , ABC ):
33+ class ScaffoldGraph (nx . DiGraph , ABC ):
3234 """Abstract base class for ScaffoldGraphs"""
3335
3436 def __init__ (self , graph = None , fragmenter = None ):
@@ -92,29 +94,125 @@ def _recursive_constructor(self, child):
9294 @property
9395 def num_scaffold_nodes (self ):
9496 """Return the number of scaffolds in the graph"""
95- return len (list (self .get_scaffold_nodes ()))
97+ count = 0
98+ for _ in self .get_scaffold_nodes ():
99+ count += 1
100+ return count
96101
97102 @property
98103 def num_molecule_nodes (self ):
99104 """Return the number of molecules in the graph"""
100- return len (list (self .get_molecule_nodes ()))
105+ count = 0
106+ for _ in self .get_molecule_nodes ():
107+ count += 1
108+ return count
101109
102110 def get_scaffold_nodes (self , data = False ):
111+ """Return a generator of all scaffold nodes in the graph"""
103112 if data is True :
104113 return ((n , self .nodes [n ]) for n , d in self .nodes (data = 'type' ) if d == 'scaffold' )
105114 else :
106115 return (n for n , d in self .nodes (data = 'type' ) if d == 'scaffold' )
107116
108117 def get_molecule_nodes (self , data = False ):
118+ """Return a generator of all molecule nodes in the graph"""
109119 if data is True :
110120 return ((n , self .nodes [n ]) for n , d in self .nodes (data = 'type' ) if d == 'molecule' )
111121 else :
112122 return (n for n , d in self .nodes (data = 'type' ) if d == 'molecule' )
113123
114124 def get_hierarchy_sizes (self ):
125+ """Return a collections.Counter object indicating the number of scaffolds
126+ within each hierarchy level"""
115127 hierarchy = (d ['hierarchy' ] for _ , d in self .get_scaffold_nodes (data = True ))
116128 return Counter (hierarchy )
117129
130+ def max_hierarchy (self ):
131+ """Return the largest hierarchy level"""
132+ return max (self .get_hierarchy_sizes ())
133+
134+ def min_hierarchy (self ):
135+ """Return the smallest hierarchy level"""
136+ return min (self .get_hierarchy_sizes ())
137+
138+ def get_scaffolds_in_hierarchy (self , hierarchy ):
139+ """Return a generator of all scaffolds within a specified hierarchy"""
140+ for s , d in self .get_scaffold_nodes (data = True ):
141+ if d ['hierarchy' ] == int (hierarchy ):
142+ yield s
143+
144+ def scaffold_in_graph (self , scaffold_smiles ):
145+ """Returns True if specified scaffold SMILES is in the scaffold graph
146+
147+ Parameters
148+ ----------
149+ scaffold_smiles : (str) SMILES of query scaffold.
150+ """
151+ result = scaffold_smiles in self
152+ if result is not True :
153+ scaffold_smiles = canonize_smiles (scaffold_smiles , failsafe = True )
154+ result = scaffold_smiles in self
155+ return result
156+
157+ def molecule_in_graph (self , molecule_id ):
158+ """Returns True if specified molecule ID is in the scaffold graph
159+
160+ Parameters
161+ ----------
162+ molecule_id: (str) ID of query molecule.
163+ """
164+ return str (molecule_id ) in self
165+
166+ def get_molecules_for_scaffold (self , scaffold_smiles ):
167+ """Return a list of molecule IDs which are represented by a scaffold in the graph.
168+
169+ Note: This is determined by traversing the graph. In the case of a scaffold tree
170+ the results represent the rules used to prioritize the scaffolds.
171+
172+ Parameters
173+ ----------
174+ scaffold_smiles : (str) SMILES of query scaffold.
175+ """
176+ molecules = []
177+ if scaffold_smiles not in self :
178+ scaffold_smiles = canonize_smiles (scaffold_smiles , failsafe = True )
179+ if scaffold_smiles not in self :
180+ return molecules
181+ for succ in nx .bfs_tree (self , scaffold_smiles , reverse = False ):
182+ if self .nodes [succ ]['type' ] == 'molecule' :
183+ molecules .append (succ )
184+ return molecules
185+
186+ def get_scaffolds_for_molecule (self , molecule_id ):
187+ """Return a list of scaffold SMILES connected to a query molecule ID
188+
189+ Parameters
190+ ----------
191+ molecule_id: (str) ID of query molecule.
192+ """
193+ scaffolds = []
194+ if molecule_id not in self :
195+ return scaffolds
196+ for succ in nx .bfs_tree (self , molecule_id , reverse = True ):
197+ if self .nodes [succ ]['type' ] == 'scaffold' :
198+ scaffolds .append (succ )
199+ return scaffolds
200+
201+ def separate_disconnected_components (self , sort = False ):
202+ """Separate disconnected components into distinct ScaffoldGraph objects.
203+
204+ Parameters
205+ ----------
206+ sort: if True sort components in descending order according
207+ to the number of nodes in the subgraph.
208+ """
209+ components = []
210+ for c in nx .weakly_connected_components (self ):
211+ components .append (self .subgraph (c ).copy ())
212+ if sort :
213+ return sorted (components , key = len , reverse = True )
214+ return components
215+
118216 def add_molecule_node (self , molecule , ** attr ):
119217 name = molecule .GetProp ('_Name' )
120218 default_attr = dict (type = 'molecule' , smiles = MolToSmiles (molecule ))
0 commit comments