206 lines
7.2 KiB
Python
206 lines
7.2 KiB
Python
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
from mpl_toolkits.mplot3d import Axes3D
|
|
|
|
from backbone import generate_backbone_chain
|
|
from nerf import place_atom_nerf
|
|
from ramachandran import RamachandranSampler
|
|
from sidechain import SIDECHAIN_TOPOLOGY, build_full_atom_chain, sample_chi_angle
|
|
|
|
def add_backbone_oxygens(chain, nerf_func):
|
|
"""
|
|
Adds the Carbonyl Oxygen (O) to every residue.
|
|
The C=O bond bisects the N-C-CA angle or is planar to the peptide bond.
|
|
"""
|
|
for i, res in enumerate(chain):
|
|
# We place O relative to C, CA, and N.
|
|
# Ref: C (parent), CA (grand), N (great-grand)
|
|
# Bond: 1.23 A (Double bond)
|
|
# Angle: ~121 degrees
|
|
# Torsion: 180 (Planar, trans to N-CA bond)
|
|
|
|
o_coord = nerf_func(
|
|
res['C'], res['CA'], res['N'],
|
|
bond_length=1.23,
|
|
bond_angle_deg=121.0,
|
|
torsion_angle_deg=180.0
|
|
)
|
|
|
|
# We store it in sidechain list for convenience in plotting/writing
|
|
if 'sidechain' not in res:
|
|
res['sidechain'] = []
|
|
|
|
# Insert at the beginning of sidechain list so it appears after C in PDB
|
|
res['sidechain'].insert(0, {'name': 'O', 'coord': o_coord})
|
|
|
|
return chain
|
|
|
|
def format_atom_name(name):
|
|
"""
|
|
PDB atom names must be 4 chars.
|
|
- 4-char names (HD21) occupy cols 13-16.
|
|
- <4 char names (CA, N, O) start at col 14.
|
|
"""
|
|
if len(name) == 4:
|
|
return name
|
|
else:
|
|
return f" {name:<3}" # " CA "
|
|
|
|
def save_to_pdb(chain, filename="unfolded_structure.pdb"):
|
|
"""
|
|
Writes the chain to a standard PDB format file.
|
|
"""
|
|
atom_serial = 1
|
|
|
|
with open(filename, 'w') as f:
|
|
f.write("REMARK 1 GENERATED BY UNFOLDED POLYPEPTIDE GENERATOR\n")
|
|
|
|
for i, res in enumerate(chain):
|
|
res_name = res['name']
|
|
chain_id = 'A'
|
|
res_seq = i + 1
|
|
|
|
# 1. Write Backbone Atoms in Order: N, CA, C
|
|
bb_order = ['N', 'CA', 'C']
|
|
|
|
# Combine backbone and sidechain into one list for writing
|
|
# (Backbone first, then sidechain)
|
|
all_atoms = []
|
|
for atom_key in bb_order:
|
|
all_atoms.append({'name': atom_key, 'coord': res[atom_key]})
|
|
|
|
# Add sidechain atoms (including O, H, etc.)
|
|
# Note: O is already inserted into 'sidechain' by our helper
|
|
if 'sidechain' in res:
|
|
all_atoms.extend(res['sidechain'])
|
|
|
|
for atom in all_atoms:
|
|
name = atom['name']
|
|
x, y, z = atom['coord']
|
|
|
|
# Determine Element (First letter of name usually)
|
|
element = name[0]
|
|
|
|
# PDB fixed-width format
|
|
# ATOM (1-6)
|
|
# Serial (7-11)
|
|
# Name (13-16)
|
|
# ResName (18-20)
|
|
# Chain (22)
|
|
# SeqNum (23-26)
|
|
# X, Y, Z (31-54)
|
|
# Occ (55-60), Temp (61-66)
|
|
# Element (77-78)
|
|
|
|
pdb_line = (
|
|
f"ATOM {atom_serial:>5} {format_atom_name(name):<4} {res_name:>3} "
|
|
f"{chain_id:>1}{res_seq:>4} "
|
|
f"{x:>8.3f}{y:>8.3f}{z:>8.3f}"
|
|
f"{1.00:>6.2f}{0.00:>6.2f} {element:>2}\n"
|
|
)
|
|
|
|
f.write(pdb_line)
|
|
atom_serial += 1
|
|
|
|
f.write("END\n")
|
|
print(f"Successfully saved PDB to: {filename}")
|
|
|
|
def visualize_structure(chain, mode='backbone'):
|
|
"""
|
|
Visualizes the protein structure.
|
|
Modes: 'ca_trace', 'backbone', 'all_atom'
|
|
"""
|
|
fig = plt.figure(figsize=(12, 10))
|
|
ax = fig.add_subplot(111, projection='3d')
|
|
|
|
# Extract CA coords for the main trace
|
|
ca_x = [res['CA'][0] for res in chain]
|
|
ca_y = [res['CA'][1] for res in chain]
|
|
ca_z = [res['CA'][2] for res in chain]
|
|
|
|
# Plot the main chain line
|
|
ax.plot(ca_x, ca_y, ca_z, color='grey', linewidth=2, label='Backbone Trace')
|
|
|
|
if mode == 'ca_trace':
|
|
ax.scatter(ca_x, ca_y, ca_z, s=50, c='blue', label='CA')
|
|
|
|
elif mode == 'backbone':
|
|
# Show N, CA, C
|
|
for res in chain:
|
|
ax.scatter(*res['N'], c='blue', s=30)
|
|
ax.scatter(*res['CA'], c='green', s=30)
|
|
ax.scatter(*res['C'], c='red', s=30)
|
|
|
|
elif mode == 'all_atom':
|
|
# Plot everything including sidechains
|
|
for res in chain:
|
|
# Backbone
|
|
ax.scatter(*res['N'], c='blue', s=20, alpha=0.6)
|
|
ax.scatter(*res['CA'], c='green', s=20, alpha=0.6)
|
|
ax.scatter(*res['C'], c='red', s=20, alpha=0.6)
|
|
|
|
if 'sidechain' in res:
|
|
for atom in res['sidechain']:
|
|
coord = atom['coord']
|
|
name = atom['name']
|
|
|
|
color = 'orange'
|
|
size = 10
|
|
|
|
if name.startswith('H'):
|
|
color = 'lightgrey'
|
|
size = 5
|
|
elif name.startswith('O'):
|
|
color = 'red'
|
|
size = 20
|
|
elif name.startswith('N'):
|
|
color = 'blue'
|
|
size = 20
|
|
elif name.startswith('S'):
|
|
color = 'yellow'
|
|
size = 30
|
|
|
|
ax.scatter(coord[0], coord[1], coord[2], c=color, s=size)
|
|
|
|
# Aesthetics
|
|
ax.set_xlabel('X ($\AA$)')
|
|
ax.set_ylabel('Y ($\AA$)')
|
|
ax.set_zlabel('Z ($\AA$)')
|
|
ax.set_title(f'Generated Structure ({len(chain)} residues) - Mode: {mode}')
|
|
plt.legend()
|
|
plt.show()
|
|
|
|
if __name__ == "__main__":
|
|
# --- 1. INITIALIZATION ---
|
|
# Import your previous classes/functions here
|
|
# from generator import generate_backbone_chain, build_full_atom_chain, ...
|
|
# For now, assuming they are in the same memory space
|
|
|
|
sampler = RamachandranSampler()
|
|
|
|
# --- 2. DEFINE SEQUENCE ---
|
|
# A nice peptide with diverse chemistry for electrostatic testing
|
|
# (Charged, Polar, Hydrophobic, Glycine, Proline)
|
|
sequence = ['MET', 'LYS', 'ASP', 'GLY', 'PHE', 'ARG', 'GLU', 'VAL', 'HIS', 'ALA']# * 1000
|
|
|
|
print(f"Generating unfolded structure for: {'-'.join(sequence)}")
|
|
|
|
# --- 3. GENERATE BACKBONE ---
|
|
# (Generates N, CA, C)
|
|
backbone, total_backtracks = generate_backbone_chain(sequence, place_atom_nerf, sampler, verbose=True)
|
|
|
|
# --- 4. BUILD SIDECHAINS & PROTONS ---
|
|
# (Generates Sidechains, HA, HB.., NH3+ term, COO- term)
|
|
full_struct = build_full_atom_chain(backbone, SIDECHAIN_TOPOLOGY, place_atom_nerf, sample_chi_angle)
|
|
|
|
# --- 5. ADD CARBONYL OXYGENS ---
|
|
# (Generates O on the backbone C)
|
|
full_struct = add_backbone_oxygens(full_struct, place_atom_nerf)
|
|
|
|
# --- 6. EXPORT TO PDB ---
|
|
save_to_pdb(full_struct, filename="unfolded_protein.pdb")
|
|
|
|
# --- 7. VISUALIZE ---
|
|
# 'all_atom' shows the hydrogens and sidechains
|
|
#visualize_structure(full_struct, mode='all_atom')
|