opus-fold2

thuxugang · May 26, 2021 · 2719a8c · 2719a8c
1 parent b050495
commit 2719a8c
Show file tree

Hide file tree

Showing 13 changed files with 2,833 additions and 0 deletions.
diff --git a/OPUS-Fold2/buildprotein/Geometry.py b/OPUS-Fold2/buildprotein/Geometry.py
diff --git a/OPUS-Fold2/buildprotein/PeptideBuilder.py b/OPUS-Fold2/buildprotein/PeptideBuilder.py
@@ -0,0 +1,88 @@
+import tensorflow as tf
+import numpy as np
+from buildprotein import Geometry
+
+def get_norm(v):
+    return tf.norm(v, axis=-1)
+
+def get_angle(p1, p2, p3):
+
+    v1 = p1 - p2
+    v2 = p3 - p2
+
+    v1_norm = get_norm(v1)
+    v2_norm = get_norm(v2)
+    c = tf.reduce_sum(v1*v2, -1)/(v1_norm * v2_norm)
+
+    c = tf.clip_by_value(c, -0.999999, 0.999999)
+
+    return tf.math.acos(c)/np.pi*180
+
+def get_angle2(v1, v2):
+
+    v1_norm = get_norm(v1)
+    v2_norm = get_norm(v2)
+    c = tf.reduce_sum(v1*v2, -1)/(v1_norm * v2_norm)
+
+    c = tf.clip_by_value(c, -0.999999, 0.999999)
+
+    return tf.math.acos(c)/np.pi*180
+
+def get_dihedral(p1, p2, p3, p4):
+
+    c1 = p1 - p2
+    c2 = p2 - p3
+    c3 = p3 - p4
+
+    v1 = tf.linalg.cross(c2, c1)
+    v2 = tf.linalg.cross(c3, c2)
+    v3 = tf.linalg.cross(v2, v1)
+
+    return tf.sign(tf.reduce_sum(v3*c2,-1))*get_angle2(v1,v2)
+
+def calculateCoordinates(c1, c2, c3, L, ang, di):
+
+    d2 = tf.stack([L*tf.math.cos(ang/180*np.pi),
+                   L*tf.math.cos(di/180*np.pi)*tf.math.sin(ang/180*np.pi),
+                   L*tf.math.sin(di/180*np.pi)*tf.math.sin(ang/180*np.pi)])
+    ab = c2 - c1
+    bc = c3 - c2
+    bc = bc/get_norm(bc)
+    n = tf.linalg.cross(ab, bc)
+    n = n/get_norm(n)
+    ab = tf.linalg.cross(n, bc)
+
+    mtr = tf.stack([-bc, ab, n])
+    mtr = tf.transpose(mtr)
+
+    bc = tf.experimental.numpy.dot(mtr, d2)
+    cc = c3 + bc
+
+    return tf.cast(cc, tf.float32)
+
+geo_ala = Geometry.geometry('A')
+def get_mainchain(torsions, atoms_matrix, residue, geo, geo_last):
+
+    resid = residue.resid
+    if resid == 1:
+        N = np.array([geo.CA_N_length*np.cos(geo.N_CA_C_angle*(np.pi/180.0)),
+                      geo.CA_N_length*np.sin(geo.N_CA_C_angle*(np.pi/180.0)),
+                      0], dtype=np.float32)
+        CA = np.array([0,0,0], dtype=np.float32)
+        C = np.array([geo.CA_C_length,0,0], dtype=np.float32)
+    else:
+        _N = atoms_matrix[-5]
+        _CA = atoms_matrix[-4]
+        _C = atoms_matrix[-3]
+
+        N = calculateCoordinates(_N, _CA, _C, geo.peptide_bond, geo.CA_C_N_angle, torsions[1])
+        CA = calculateCoordinates(_CA, _C, N, geo.CA_N_length, geo.C_N_CA_angle, torsions[2])
+        C = calculateCoordinates(_C, N, CA, geo.CA_C_length, geo.N_CA_C_angle, torsions[0])
+
+    O = calculateCoordinates(N, CA, C, geo.C_O_length, geo.CA_C_O_angle, geo.N_CA_C_O_diangle)
+
+    if residue.resname == 'G': geo = geo_ala
+    CB = calculateCoordinates(C, N, CA, geo.CA_CB_length, geo.C_CA_CB_angle, geo.N_C_CA_CB_diangle)
+
+    return [N, CA, C, O, CB]        
+
diff --git a/OPUS-Fold2/buildprotein/RebuildStructure.py b/OPUS-Fold2/buildprotein/RebuildStructure.py
@@ -0,0 +1,42 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Mar 18 16:15:25 2016
+
+@author: XuGang
+"""
+
+from buildprotein import Geometry
+from buildprotein import PeptideBuilder
+
+def getGeosData(residuesData):
+
+    geosData = []
+    for residue in residuesData:
+        geo = Geometry.geometry(residue.resname)
+        geosData.append(geo)
+
+    return geosData   
+
+def rebuild_main_chain(torsions, geosData, residuesData):
+
+    count = 0
+    atoms_matrix = []
+    assert len(residuesData) == len(geosData)
+
+    length = len(residuesData)
+    for idx in range(length):
+
+        if idx == 0:
+            atoms_matrix.extend(PeptideBuilder.get_mainchain(None, atoms_matrix, 
+                                                             residuesData[idx], geosData[idx], None))
+        else:
+            # phi, psi, omega
+            torsion = [torsions[count], torsions[count-2], torsions[count+2]]
+            atoms_matrix.extend(PeptideBuilder.get_mainchain(torsion, atoms_matrix, 
+                                                             residuesData[idx], geosData[idx], geosData[idx-1]))
+
+        count += 3
+
+    assert count == len(torsions)
+
+    return atoms_matrix
diff --git a/OPUS-Fold2/lib/ramachandran.txt b/OPUS-Fold2/lib/ramachandran.txt
diff --git a/OPUS-Fold2/list_casp14.txt b/OPUS-Fold2/list_casp14.txt
@@ -0,0 +1,15 @@
+T1027-D1
+T1029-D1
+T1031-D1
+T1033-D1
+T1037-D1
+T1038-D1
+T1039-D1
+T1040-D1
+T1041-D1
+T1042-D1
+T1043-D1
+T1049-D1
+T1064-D1
+T1074-D1
+T1090-D1
diff --git a/OPUS-Fold2/myclass/Atoms.py b/OPUS-Fold2/myclass/Atoms.py
@@ -0,0 +1,16 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sat May 30 07:14:18 2015
+
+@author: XuGang
+"""
+
+from myclass import Residues
+
+class Atom:
+    def __init__(self, atomid, name1, resname, resid, position):
+        self.atomid = atomid
+        self.name1 = name1
+        self.resname = Residues.singleResname(resname)
+        self.resid = resid
+        self.position = position
diff --git a/OPUS-Fold2/myclass/Myio.py b/OPUS-Fold2/myclass/Myio.py
@@ -0,0 +1,78 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Fri May 29 18:32:13 2015
+
+@author: XuGang
+"""
+
+from myclass import Residues
+import numpy as np    
+
+def readTASS(path):
+
+    init_torsions = []
+    with open(path,'r') as r:
+        for i in r.readlines():
+            if i.strip().split()[0][0] == '#':
+                continue
+            else:
+                context = i.strip().split()
+                assert len(context) == 17
+                phi = float(context[3])
+                psi = float(context[4])
+                omega = 180.0
+                init_torsions.extend([phi, psi, omega])
+
+    return np.array(init_torsions, dtype=np.float32)
+
+def readFasta(path):
+
+    with open(path,'r') as r:
+        results = [i.strip() for i in r.readlines()]
+    return results[0][1:], results[1]
+
+def outputPDB(residuesData, atoms_matrix, pdb_path):
+
+    atom_id = 1
+    counter = 0
+    f = open(pdb_path, 'w')
+    for residue in residuesData:
+        for idx, name1 in enumerate(["N", "CA", "C", "O", "CB"]):
+            if residue.resname == "G" and name1 == "CB": 
+                counter += 1
+                continue
+            atom_id2 = atom_id + idx
+            string = 'ATOM  '
+            id_len = len(list(str(atom_id2)))
+            string = string + " "*(5-id_len) + str(atom_id2)
+            string = string + " "*2
+            name1_len = len(list(name1))
+            string = string + name1 + " "*(3-name1_len)
+            resname = Residues.triResname(residue.resname)
+            resname_len = len(list(resname))
+            string = string + " "*(4-resname_len) + resname
+            string = string + " "*2
+            resid = str(residue.resid)
+            resid_len = len(list(resid))
+            string = string + " "*(4-resid_len) + str(resid)
+            string = string + " "*4
+            x = format(atoms_matrix[counter][0],".3f")
+            x_len = len(list(x))
+            string = string + " "*(8-x_len) + x
+            y = format(atoms_matrix[counter][1],".3f")
+            y_len = len(list(y))
+            string = string + " "*(8-y_len) + y
+            z = format(atoms_matrix[counter][2],".3f")        
+            z_len = len(list(z))
+            string = string + " "*(8-z_len) + z  
+
+            f.write(string)
+            f.write("\n")
+
+            counter += 1
+
+        atom_id += residue.num_atoms
+
+    assert len(atoms_matrix) == counter
+    f.close()
+
diff --git a/OPUS-Fold2/myclass/Residues.py b/OPUS-Fold2/myclass/Residues.py
@@ -0,0 +1,133 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Mar 17 09:47:45 2016
+
+@author: XuGang
+"""
+
+num_side_chain_atoms_dict = {"G":0, "A":0, "S":1, "C":1, "V":2, "I":3, "L":3, "T":2, "R":6, "K":4,
+                             "D":3, "N":3, "E":4, "Q":4, "M":3, "H":5, "P":2, "F":6, "Y":7, "W":9}
+
+class Residue:
+    def __init__(self, resid, resname):
+        self.resid = resid       
+        self.resname = resname
+        self.resname_tri = triResname(resname)
+
+        if resname == 'G':
+            self.num_atoms = num_side_chain_atoms_dict[resname] + 4
+        else:
+            self.num_atoms = num_side_chain_atoms_dict[resname] + 5
+
+def singleResname(AA):
+    if(len(AA) == 1):
+        return AA
+    else:
+        if(AA in ['GLY','AGLY']):
+            return "G"
+        elif(AA in ['ALA','AALA']):
+            return "A"
+        elif(AA in ['SER','ASER']):
+            return "S"
+        elif(AA in ['CYS','ACYS']):
+            return "C"
+        elif(AA in ['VAL','AVAL']):
+            return "V"
+        elif(AA in ['ILE','AILE']):
+            return "I"
+        elif(AA in ['LEU','ALEU']):
+            return "L"
+        elif(AA in ['THR','ATHR']):
+            return "T"
+        elif(AA in ['ARG','AARG']):
+            return "R"
+        elif(AA in ['LYS','ALYS']):
+            return "K"
+        elif(AA in ['ASP','AASP']):
+            return "D"
+        elif(AA in ['GLU','AGLU']):
+            return "E"
+        elif(AA in ['ASN','AASN']):
+            return "N"
+        elif(AA in ['GLN','AGLN']):
+            return "Q"
+        elif(AA in ['MET','AMET']):
+            return "M"
+        elif(AA in ['HIS','AHIS','HSD']):
+            return "H"
+        elif(AA in ['PRO','APRO']):
+            return "P"
+        elif(AA in ['PHE','APHE']):
+            return "F"
+        elif(AA in ['TYR','ATYR']):
+            return "Y"
+        elif(AA in ['TRP','ATRP']):
+            return "W"
+        else:
+            return None
+            # print ("Residues.singleResname() false" + AA)
+
+def triResname(AA):
+    if(len(AA) == 3):
+        return AA
+    else:
+        if(AA == "G"):
+            return "GLY"
+        elif(AA == "A"):
+            return "ALA"
+        elif(AA == "S"):
+            return "SER"
+        elif(AA == "C"):
+            return "CYS"
+        elif(AA == "V"):
+            return "VAL"
+        elif(AA == "I"):
+            return "ILE"
+        elif(AA == "L"):
+            return "LEU"
+        elif(AA == "T"):
+            return "THR"
+        elif(AA == "R"):
+            return "ARG"
+        elif(AA == "K"):
+            return "LYS"
+        elif(AA == "D"):
+            return "ASP"
+        elif(AA == "E"):
+            return "GLU"
+        elif(AA == "N"):
+            return "ASN"
+        elif(AA == "Q"):
+            return "GLN"
+        elif(AA == "M"):
+            return "MET"
+        elif(AA == "H"):
+            return "HIS"
+        elif(AA == "P"):
+            return "PRO"
+        elif(AA == "F"):
+            return "PHE"
+        elif(AA == "Y"):
+            return "TYR"
+        elif(AA == "W"):
+            return "TRP"
+        else:
+            return None
+            # print ("Residues.triResname() false" + AA)
+
+def getResidueDataFromSequence(fasta):
+
+    residuesData = []
+    for resid, resname in enumerate(fasta):
+        residuesData.append(Residue(resid + 1, resname))
+
+    return residuesData
+
+
+
+
+
+
+
+
+