Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 62 additions & 19 deletions dataloader/cath_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import sys
import math
import numpy as np
from numpy import pi
from torch_geometric.data import (
Data,
Dataset,
Expand Down Expand Up @@ -397,35 +398,77 @@ def rec_residue_featurizer(self, rec, one_hot=True, add_feature=None):
def get_node_features(self, n_coords, c_coords, c_alpha_coords, coord_mask, with_coord_mask=True, use_angle=False,
use_omega=False):
num_res = n_coords.shape[0]
assert len(c_coords) == len(n_coords)

# Infer peptide-bond connectivity using the C_i--N_{i+1} distance.
# Biopython's PPBuilder uses a 1.8 Å C--N distance cutoff for polypeptide construction.
bond = np.linalg.norm(c_coords[:-1] - n_coords[1:], axis=1) < 1.8

if use_omega:
num_angle_type = 3
angles = np.zeros((num_res, num_angle_type))
for i in range(num_res - 1):
# These angles are called φ (phi) which involves the backbone atoms C-N-Cα-C
angles[i, 0] = dihedral(
c_coords[i], n_coords[i], c_alpha_coords[i], n_coords[i + 1])
# psi involves the backbone atoms N-Cα-C-N.
angles[i, 1] = dihedral(
n_coords[i], c_alpha_coords[i], c_coords[i], n_coords[i + 1])
angles[i, 2] = dihedral(
c_alpha_coords[i], c_coords[i], n_coords[i + 1], c_alpha_coords[i + 1])
for i in range(num_res):
# Terminal residues or chain breaks have undefined backbone torsions.
# They are kept as NaN here and later encoded as (sin, cos) = (0, 0).
if i != 0:
if bond[i-1]:
# Phi for residue i: C_{i-1}-N_i-CA_i-C_i.
angles[i, 0] = dihedral(
c_coords[i-1], n_coords[i], c_alpha_coords[i], c_coords[i])
else:
angles[i, 0] = np.nan
else:
angles[i, 0] = np.nan

if i < num_res-1:
if bond[i]:
# Psi for residue i: N_i-CA_i-C_i-N_{i+1}.
angles[i, 1] = dihedral(
n_coords[i], c_alpha_coords[i], c_coords[i], n_coords[i + 1])
# Omega for residue i: CA_i-C_i-N_{i+1}-CA_{i+1}.
angles[i, 2] = dihedral(
c_alpha_coords[i], c_coords[i], n_coords[i + 1], c_alpha_coords[i + 1])
else:
angles[i, 1] = np.nan
angles[i, 2] = np.nan
else:
angles[i, 1] = np.nan
angles[i, 2] = np.nan

else:
num_angle_type = 2
angles = np.zeros((num_res, num_angle_type))
for i in range(num_res - 1):
# These angles are called φ (phi) which involves the backbone atoms C-N-Cα-C
angles[i, 0] = dihedral(
c_coords[i], n_coords[i], c_alpha_coords[i], n_coords[i + 1])
# psi involves the backbone atoms N-Cα-C-N.
angles[i, 1] = dihedral(
n_coords[i], c_alpha_coords[i], c_coords[i], n_coords[i + 1])
for i in range(num_res):
if i != 0:
if bond[i-1]:
angles[i, 0] = dihedral(
c_coords[i-1], n_coords[i], c_alpha_coords[i], c_coords[i])
else:
angles[i, 0] = np.nan
else:
angles[i, 0] = np.nan

if i<num_res-1:
if bond[i]:
angles[i, 1] = dihedral(
n_coords[i], c_alpha_coords[i], c_coords[i], n_coords[i + 1])
else:
angles[i, 1] = np.nan
else:
angles[i, 1] = np.nan

# Raw-angle mode is kept for backward compatibility. Note that undefined
# torsions become 0 here and are not distinguishable from true 0-degree angles.
if use_angle:
node_scalar_features = angles
node_scalar_features = np.nan_to_num(angles,nan=0.0)

else:
node_scalar_features = np.zeros((num_res, num_angle_type * 2))
for i in range(num_angle_type):
node_scalar_features[:, 2 * i] = np.sin(angles[:, i])
node_scalar_features[:, 2 * i + 1] = np.cos(angles[:, i])
rad=angles[:,i] * pi / 180
# np.sin/np.cos expect radians.
node_scalar_features[:, 2 * i] = np.nan_to_num(np.sin(rad), nan=0.0)
node_scalar_features[:, 2 * i + 1] = np.nan_to_num(np.cos(rad), nan=0.0)

if with_coord_mask:
node_scalar_features = torch.cat([
Expand Down