166 lines
5.9 KiB
Python
166 lines
5.9 KiB
Python
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
from mpl_toolkits.mplot3d import Axes3D
|
|
from nerf import place_atom_nerf
|
|
from ramachandran import RamachandranSampler
|
|
|
|
# Ideal geometry. Angstroms for lengths, degrees for angles.
|
|
GEO = {
|
|
'N_CA_len': 1.46,
|
|
'CA_C_len': 1.51,
|
|
'C_N_len': 1.33,
|
|
'N_H_len': 1.01,
|
|
|
|
'N_CA_C_angle': 111.0,
|
|
'CA_C_N_angle': 116.0,
|
|
'C_N_CA_angle': 122.0,
|
|
'C_N_H_angle': 119.0,
|
|
}
|
|
|
|
# Legacy function checking if two atoms (implementation as CAs) are within threshold.
|
|
def check_clashes(new_coord, existing_coords, threshold=3.0):
|
|
if len(existing_coords) == 0: return False
|
|
diff = existing_coords - new_coord
|
|
dist_sq = np.sum(diff**2, axis=1)
|
|
return np.any(dist_sq < threshold**2)
|
|
|
|
def generate_backbone_chain(sequence, nerf_func, sampler_instance,
|
|
clash_threshold=3.5,
|
|
max_retries_per_res=50,
|
|
backtrack_step=5,
|
|
verbose=False):
|
|
"""
|
|
Generates backbone with Backtracking capability.
|
|
If it gets stuck, it unwinds 'backtrack_step' residues and tries again.
|
|
"""
|
|
|
|
chain = []
|
|
occupied_ca_coords = []
|
|
|
|
# --- Residue 0 Initialization (Same as before) ---
|
|
res0_name = sequence[0]
|
|
n_0 = np.array([0.0, 0.0, 0.0])
|
|
ca_0 = np.array([GEO['N_CA_len'], 0.0, 0.0])
|
|
angle_rad = np.radians(GEO['N_CA_C_angle'])
|
|
c_0 = np.array([ca_0[0] + GEO['CA_C_len'] * np.cos(np.pi - angle_rad),
|
|
GEO['CA_C_len'] * np.sin(np.pi - angle_rad), 0.0])
|
|
|
|
chain.append({'name': res0_name, 'N': n_0, 'CA': ca_0, 'C': c_0, 'phi': None, 'psi': None})
|
|
occupied_ca_coords.append(ca_0)
|
|
|
|
# --- THE SMART LOOP ---
|
|
i = 1
|
|
total_backtracks = 0
|
|
|
|
while i < len(sequence):
|
|
prev_res = chain[i-1]
|
|
curr_res_name = sequence[i]
|
|
|
|
success = False
|
|
|
|
# Attempt to place this residue
|
|
for attempt in range(max_retries_per_res):
|
|
# 1. Sample
|
|
_, prev_psi = sampler_instance.sample(prev_res['name'])
|
|
curr_phi, curr_psi = sampler_instance.sample(curr_res_name)
|
|
|
|
# 2. NeRF Construction
|
|
n_new = nerf_func(prev_res['N'], prev_res['CA'], prev_res['C'],
|
|
GEO['C_N_len'], GEO['CA_C_N_angle'], prev_psi)
|
|
|
|
# Amide H (Skip Proline)
|
|
h_new = None
|
|
if curr_res_name != 'PRO':
|
|
h_new = nerf_func(n_new, prev_res['C'], prev_res['CA'],
|
|
GEO['N_H_len'], GEO['C_N_H_angle'], 180.0)
|
|
|
|
ca_new = nerf_func(prev_res['CA'], prev_res['C'], n_new,
|
|
GEO['N_CA_len'], GEO['C_N_CA_angle'], 180.0)
|
|
|
|
c_new = nerf_func(prev_res['C'], n_new, ca_new,
|
|
GEO['CA_C_len'], GEO['N_CA_C_angle'], curr_phi)
|
|
|
|
# 3. Clash Check
|
|
# Check against all CAs except the last 10 (local neighbors)
|
|
safe_buffer = 10
|
|
if len(occupied_ca_coords) > safe_buffer:
|
|
history = np.array(occupied_ca_coords[:-safe_buffer])
|
|
if check_clashes(ca_new, history, threshold=clash_threshold):
|
|
continue # Failed attempt
|
|
|
|
# 4. Success! Commit and Break Retry Loop
|
|
prev_res['psi'] = prev_psi # Lock in the psi that worked
|
|
|
|
new_res = {
|
|
'name': curr_res_name, 'N': n_new, 'CA': ca_new, 'C': c_new,
|
|
'phi': curr_phi, 'psi': curr_psi
|
|
}
|
|
if h_new is not None: new_res['H'] = h_new
|
|
|
|
chain.append(new_res)
|
|
occupied_ca_coords.append(ca_new)
|
|
success = True
|
|
break
|
|
|
|
if success:
|
|
i += 1
|
|
else:
|
|
# --- BACKTRACK LOGIC ---
|
|
total_backtracks += 1
|
|
target_idx = max(1, i - backtrack_step)
|
|
drop_count = i - target_idx
|
|
|
|
if verbose:
|
|
print(f" [Stuck at {i}] Backtracking {drop_count} steps to {target_idx}...")
|
|
|
|
chain = chain[:target_idx]
|
|
occupied_ca_coords = occupied_ca_coords[:target_idx]
|
|
i = target_idx
|
|
|
|
return chain, total_backtracks
|
|
|
|
# --- Visualization Helper ---
|
|
def plot_backbone(chain):
|
|
fig = plt.figure(figsize=(10, 8))
|
|
ax = fig.add_subplot(111, projection='3d')
|
|
|
|
# Extract coordinates
|
|
n_coords = np.array([res['N'] for res in chain])
|
|
ca_coords = np.array([res['CA'] for res in chain])
|
|
c_coords = np.array([res['C'] for res in chain])
|
|
|
|
# Plot trace (connect CA atoms)
|
|
ax.plot(ca_coords[:,0], ca_coords[:,1], ca_coords[:,2],
|
|
'-o', color='black', label='CA Trace', markersize=4, alpha=0.6)
|
|
|
|
# Plot atoms
|
|
ax.scatter(n_coords[:,0], n_coords[:,1], n_coords[:,2], c='blue', s=20, label='N')
|
|
ax.scatter(c_coords[:,0], c_coords[:,1], c_coords[:,2], c='red', s=20, label='C')
|
|
|
|
# Start/End markers
|
|
ax.text(ca_coords[0,0], ca_coords[0,1], ca_coords[0,2], "N-Term", color='green')
|
|
ax.text(ca_coords[-1,0], ca_coords[-1,1], ca_coords[-1,2], "C-Term", color='green')
|
|
|
|
ax.set_title(f"Unfolded Polypeptide ({len(chain)} residues)")
|
|
ax.set_xlabel("X (Angstrom)")
|
|
ax.set_ylabel("Y (Angstrom)")
|
|
ax.set_zlabel("Z (Angstrom)")
|
|
ax.legend()
|
|
plt.show()
|
|
|
|
# --- Main Execution ---
|
|
if __name__ == "__main__":
|
|
# 1. Setup
|
|
# (Assuming place_atom_nerf and RamachandranSampler are imported/defined above)
|
|
sampler = RamachandranSampler()
|
|
|
|
# 2. Define a test sequence
|
|
# A mix of General, Glycine, and Proline to test geometry
|
|
test_seq = ['MET', 'ALA', 'GLY', 'LYS', 'PRO', 'LEU', 'GLU', 'ALA', 'GLY', 'HIS'] * 30
|
|
|
|
# 3. Run
|
|
backbone = generate_backbone_chain(test_seq, place_atom_nerf, sampler)
|
|
|
|
# 4. Visualize
|
|
plot_backbone(backbone)
|