Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit

Permalink
opus-fold2
Browse files Browse the repository at this point in the history
  • Loading branch information
thuxugang committed May 26, 2021
1 parent b050495 commit 2719a8c
Show file tree
Hide file tree
Showing 13 changed files with 2,833 additions and 0 deletions.
549 changes: 549 additions & 0 deletions OPUS-Fold2/buildprotein/Geometry.py

Large diffs are not rendered by default.

88 changes: 88 additions & 0 deletions OPUS-Fold2/buildprotein/PeptideBuilder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import tensorflow as tf
import numpy as np
from buildprotein import Geometry

def get_norm(v):
return tf.norm(v, axis=-1)

def get_angle(p1, p2, p3):

v1 = p1 - p2
v2 = p3 - p2

v1_norm = get_norm(v1)
v2_norm = get_norm(v2)
c = tf.reduce_sum(v1*v2, -1)/(v1_norm * v2_norm)

c = tf.clip_by_value(c, -0.999999, 0.999999)

return tf.math.acos(c)/np.pi*180

def get_angle2(v1, v2):

v1_norm = get_norm(v1)
v2_norm = get_norm(v2)
c = tf.reduce_sum(v1*v2, -1)/(v1_norm * v2_norm)

c = tf.clip_by_value(c, -0.999999, 0.999999)

return tf.math.acos(c)/np.pi*180

def get_dihedral(p1, p2, p3, p4):

c1 = p1 - p2
c2 = p2 - p3
c3 = p3 - p4

v1 = tf.linalg.cross(c2, c1)
v2 = tf.linalg.cross(c3, c2)
v3 = tf.linalg.cross(v2, v1)

return tf.sign(tf.reduce_sum(v3*c2,-1))*get_angle2(v1,v2)

def calculateCoordinates(c1, c2, c3, L, ang, di):

d2 = tf.stack([L*tf.math.cos(ang/180*np.pi),
L*tf.math.cos(di/180*np.pi)*tf.math.sin(ang/180*np.pi),
L*tf.math.sin(di/180*np.pi)*tf.math.sin(ang/180*np.pi)])
ab = c2 - c1
bc = c3 - c2
bc = bc/get_norm(bc)
n = tf.linalg.cross(ab, bc)
n = n/get_norm(n)
ab = tf.linalg.cross(n, bc)

mtr = tf.stack([-bc, ab, n])
mtr = tf.transpose(mtr)

bc = tf.experimental.numpy.dot(mtr, d2)
cc = c3 + bc

return tf.cast(cc, tf.float32)

geo_ala = Geometry.geometry('A')
def get_mainchain(torsions, atoms_matrix, residue, geo, geo_last):

resid = residue.resid
if resid == 1:
N = np.array([geo.CA_N_length*np.cos(geo.N_CA_C_angle*(np.pi/180.0)),
geo.CA_N_length*np.sin(geo.N_CA_C_angle*(np.pi/180.0)),
0], dtype=np.float32)
CA = np.array([0,0,0], dtype=np.float32)
C = np.array([geo.CA_C_length,0,0], dtype=np.float32)
else:
_N = atoms_matrix[-5]
_CA = atoms_matrix[-4]
_C = atoms_matrix[-3]

N = calculateCoordinates(_N, _CA, _C, geo.peptide_bond, geo.CA_C_N_angle, torsions[1])
CA = calculateCoordinates(_CA, _C, N, geo.CA_N_length, geo.C_N_CA_angle, torsions[2])
C = calculateCoordinates(_C, N, CA, geo.CA_C_length, geo.N_CA_C_angle, torsions[0])

O = calculateCoordinates(N, CA, C, geo.C_O_length, geo.CA_C_O_angle, geo.N_CA_C_O_diangle)

if residue.resname == 'G': geo = geo_ala
CB = calculateCoordinates(C, N, CA, geo.CA_CB_length, geo.C_CA_CB_angle, geo.N_C_CA_CB_diangle)

return [N, CA, C, O, CB]

42 changes: 42 additions & 0 deletions OPUS-Fold2/buildprotein/RebuildStructure.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# -*- coding: utf-8 -*-
"""
Created on Fri Mar 18 16:15:25 2016
@author: XuGang
"""

from buildprotein import Geometry
from buildprotein import PeptideBuilder

def getGeosData(residuesData):

geosData = []
for residue in residuesData:
geo = Geometry.geometry(residue.resname)
geosData.append(geo)

return geosData

def rebuild_main_chain(torsions, geosData, residuesData):

count = 0
atoms_matrix = []
assert len(residuesData) == len(geosData)

length = len(residuesData)
for idx in range(length):

if idx == 0:
atoms_matrix.extend(PeptideBuilder.get_mainchain(None, atoms_matrix,
residuesData[idx], geosData[idx], None))
else:
# phi, psi, omega
torsion = [torsions[count], torsions[count-2], torsions[count+2]]
atoms_matrix.extend(PeptideBuilder.get_mainchain(torsion, atoms_matrix,
residuesData[idx], geosData[idx], geosData[idx-1]))

count += 3

assert count == len(torsions)

return atoms_matrix
1,298 changes: 1,298 additions & 0 deletions OPUS-Fold2/lib/ramachandran.txt

Large diffs are not rendered by default.

15 changes: 15 additions & 0 deletions OPUS-Fold2/list_casp14.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
T1027-D1
T1029-D1
T1031-D1
T1033-D1
T1037-D1
T1038-D1
T1039-D1
T1040-D1
T1041-D1
T1042-D1
T1043-D1
T1049-D1
T1064-D1
T1074-D1
T1090-D1
16 changes: 16 additions & 0 deletions OPUS-Fold2/myclass/Atoms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# -*- coding: utf-8 -*-
"""
Created on Sat May 30 07:14:18 2015
@author: XuGang
"""

from myclass import Residues

class Atom:
def __init__(self, atomid, name1, resname, resid, position):
self.atomid = atomid
self.name1 = name1
self.resname = Residues.singleResname(resname)
self.resid = resid
self.position = position
78 changes: 78 additions & 0 deletions OPUS-Fold2/myclass/Myio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# -*- coding: utf-8 -*-
"""
Created on Fri May 29 18:32:13 2015
@author: XuGang
"""

from myclass import Residues
import numpy as np

def readTASS(path):

init_torsions = []
with open(path,'r') as r:
for i in r.readlines():
if i.strip().split()[0][0] == '#':
continue
else:
context = i.strip().split()
assert len(context) == 17
phi = float(context[3])
psi = float(context[4])
omega = 180.0
init_torsions.extend([phi, psi, omega])

return np.array(init_torsions, dtype=np.float32)

def readFasta(path):

with open(path,'r') as r:
results = [i.strip() for i in r.readlines()]
return results[0][1:], results[1]

def outputPDB(residuesData, atoms_matrix, pdb_path):

atom_id = 1
counter = 0
f = open(pdb_path, 'w')
for residue in residuesData:
for idx, name1 in enumerate(["N", "CA", "C", "O", "CB"]):
if residue.resname == "G" and name1 == "CB":
counter += 1
continue
atom_id2 = atom_id + idx
string = 'ATOM '
id_len = len(list(str(atom_id2)))
string = string + " "*(5-id_len) + str(atom_id2)
string = string + " "*2
name1_len = len(list(name1))
string = string + name1 + " "*(3-name1_len)
resname = Residues.triResname(residue.resname)
resname_len = len(list(resname))
string = string + " "*(4-resname_len) + resname
string = string + " "*2
resid = str(residue.resid)
resid_len = len(list(resid))
string = string + " "*(4-resid_len) + str(resid)
string = string + " "*4
x = format(atoms_matrix[counter][0],".3f")
x_len = len(list(x))
string = string + " "*(8-x_len) + x
y = format(atoms_matrix[counter][1],".3f")
y_len = len(list(y))
string = string + " "*(8-y_len) + y
z = format(atoms_matrix[counter][2],".3f")
z_len = len(list(z))
string = string + " "*(8-z_len) + z

f.write(string)
f.write("\n")

counter += 1

atom_id += residue.num_atoms

assert len(atoms_matrix) == counter
f.close()

133 changes: 133 additions & 0 deletions OPUS-Fold2/myclass/Residues.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 17 09:47:45 2016
@author: XuGang
"""

num_side_chain_atoms_dict = {"G":0, "A":0, "S":1, "C":1, "V":2, "I":3, "L":3, "T":2, "R":6, "K":4,
"D":3, "N":3, "E":4, "Q":4, "M":3, "H":5, "P":2, "F":6, "Y":7, "W":9}

class Residue:
def __init__(self, resid, resname):
self.resid = resid
self.resname = resname
self.resname_tri = triResname(resname)

if resname == 'G':
self.num_atoms = num_side_chain_atoms_dict[resname] + 4
else:
self.num_atoms = num_side_chain_atoms_dict[resname] + 5

def singleResname(AA):
if(len(AA) == 1):
return AA
else:
if(AA in ['GLY','AGLY']):
return "G"
elif(AA in ['ALA','AALA']):
return "A"
elif(AA in ['SER','ASER']):
return "S"
elif(AA in ['CYS','ACYS']):
return "C"
elif(AA in ['VAL','AVAL']):
return "V"
elif(AA in ['ILE','AILE']):
return "I"
elif(AA in ['LEU','ALEU']):
return "L"
elif(AA in ['THR','ATHR']):
return "T"
elif(AA in ['ARG','AARG']):
return "R"
elif(AA in ['LYS','ALYS']):
return "K"
elif(AA in ['ASP','AASP']):
return "D"
elif(AA in ['GLU','AGLU']):
return "E"
elif(AA in ['ASN','AASN']):
return "N"
elif(AA in ['GLN','AGLN']):
return "Q"
elif(AA in ['MET','AMET']):
return "M"
elif(AA in ['HIS','AHIS','HSD']):
return "H"
elif(AA in ['PRO','APRO']):
return "P"
elif(AA in ['PHE','APHE']):
return "F"
elif(AA in ['TYR','ATYR']):
return "Y"
elif(AA in ['TRP','ATRP']):
return "W"
else:
return None
# print ("Residues.singleResname() false" + AA)

def triResname(AA):
if(len(AA) == 3):
return AA
else:
if(AA == "G"):
return "GLY"
elif(AA == "A"):
return "ALA"
elif(AA == "S"):
return "SER"
elif(AA == "C"):
return "CYS"
elif(AA == "V"):
return "VAL"
elif(AA == "I"):
return "ILE"
elif(AA == "L"):
return "LEU"
elif(AA == "T"):
return "THR"
elif(AA == "R"):
return "ARG"
elif(AA == "K"):
return "LYS"
elif(AA == "D"):
return "ASP"
elif(AA == "E"):
return "GLU"
elif(AA == "N"):
return "ASN"
elif(AA == "Q"):
return "GLN"
elif(AA == "M"):
return "MET"
elif(AA == "H"):
return "HIS"
elif(AA == "P"):
return "PRO"
elif(AA == "F"):
return "PHE"
elif(AA == "Y"):
return "TYR"
elif(AA == "W"):
return "TRP"
else:
return None
# print ("Residues.triResname() false" + AA)

def getResidueDataFromSequence(fasta):

residuesData = []
for resid, resname in enumerate(fasta):
residuesData.append(Residue(resid + 1, resname))

return residuesData









Loading

0 comments on commit 2719a8c

Please sign in to comment.