primacito/src/legacy/backbone.py

166 lines
5.9 KiB
Python

import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from nerf import place_atom_nerf
from ramachandran import RamachandranSampler
# Ideal geometry. Angstroms for lengths, degrees for angles.
GEO = {
'N_CA_len': 1.46,
'CA_C_len': 1.51,
'C_N_len': 1.33,
'N_H_len': 1.01,
'N_CA_C_angle': 111.0,
'CA_C_N_angle': 116.0,
'C_N_CA_angle': 122.0,
'C_N_H_angle': 119.0,
}
# Legacy function checking if two atoms (implementation as CAs) are within threshold.
def check_clashes(new_coord, existing_coords, threshold=3.0):
if len(existing_coords) == 0: return False
diff = existing_coords - new_coord
dist_sq = np.sum(diff**2, axis=1)
return np.any(dist_sq < threshold**2)
def generate_backbone_chain(sequence, nerf_func, sampler_instance,
clash_threshold=3.5,
max_retries_per_res=50,
backtrack_step=5,
verbose=False):
"""
Generates backbone with Backtracking capability.
If it gets stuck, it unwinds 'backtrack_step' residues and tries again.
"""
chain = []
occupied_ca_coords = []
# --- Residue 0 Initialization (Same as before) ---
res0_name = sequence[0]
n_0 = np.array([0.0, 0.0, 0.0])
ca_0 = np.array([GEO['N_CA_len'], 0.0, 0.0])
angle_rad = np.radians(GEO['N_CA_C_angle'])
c_0 = np.array([ca_0[0] + GEO['CA_C_len'] * np.cos(np.pi - angle_rad),
GEO['CA_C_len'] * np.sin(np.pi - angle_rad), 0.0])
chain.append({'name': res0_name, 'N': n_0, 'CA': ca_0, 'C': c_0, 'phi': None, 'psi': None})
occupied_ca_coords.append(ca_0)
# --- THE SMART LOOP ---
i = 1
total_backtracks = 0
while i < len(sequence):
prev_res = chain[i-1]
curr_res_name = sequence[i]
success = False
# Attempt to place this residue
for attempt in range(max_retries_per_res):
# 1. Sample
_, prev_psi = sampler_instance.sample(prev_res['name'])
curr_phi, curr_psi = sampler_instance.sample(curr_res_name)
# 2. NeRF Construction
n_new = nerf_func(prev_res['N'], prev_res['CA'], prev_res['C'],
GEO['C_N_len'], GEO['CA_C_N_angle'], prev_psi)
# Amide H (Skip Proline)
h_new = None
if curr_res_name != 'PRO':
h_new = nerf_func(n_new, prev_res['C'], prev_res['CA'],
GEO['N_H_len'], GEO['C_N_H_angle'], 180.0)
ca_new = nerf_func(prev_res['CA'], prev_res['C'], n_new,
GEO['N_CA_len'], GEO['C_N_CA_angle'], 180.0)
c_new = nerf_func(prev_res['C'], n_new, ca_new,
GEO['CA_C_len'], GEO['N_CA_C_angle'], curr_phi)
# 3. Clash Check
# Check against all CAs except the last 10 (local neighbors)
safe_buffer = 10
if len(occupied_ca_coords) > safe_buffer:
history = np.array(occupied_ca_coords[:-safe_buffer])
if check_clashes(ca_new, history, threshold=clash_threshold):
continue # Failed attempt
# 4. Success! Commit and Break Retry Loop
prev_res['psi'] = prev_psi # Lock in the psi that worked
new_res = {
'name': curr_res_name, 'N': n_new, 'CA': ca_new, 'C': c_new,
'phi': curr_phi, 'psi': curr_psi
}
if h_new is not None: new_res['H'] = h_new
chain.append(new_res)
occupied_ca_coords.append(ca_new)
success = True
break
if success:
i += 1
else:
# --- BACKTRACK LOGIC ---
total_backtracks += 1
target_idx = max(1, i - backtrack_step)
drop_count = i - target_idx
if verbose:
print(f" [Stuck at {i}] Backtracking {drop_count} steps to {target_idx}...")
chain = chain[:target_idx]
occupied_ca_coords = occupied_ca_coords[:target_idx]
i = target_idx
return chain, total_backtracks
# --- Visualization Helper ---
def plot_backbone(chain):
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')
# Extract coordinates
n_coords = np.array([res['N'] for res in chain])
ca_coords = np.array([res['CA'] for res in chain])
c_coords = np.array([res['C'] for res in chain])
# Plot trace (connect CA atoms)
ax.plot(ca_coords[:,0], ca_coords[:,1], ca_coords[:,2],
'-o', color='black', label='CA Trace', markersize=4, alpha=0.6)
# Plot atoms
ax.scatter(n_coords[:,0], n_coords[:,1], n_coords[:,2], c='blue', s=20, label='N')
ax.scatter(c_coords[:,0], c_coords[:,1], c_coords[:,2], c='red', s=20, label='C')
# Start/End markers
ax.text(ca_coords[0,0], ca_coords[0,1], ca_coords[0,2], "N-Term", color='green')
ax.text(ca_coords[-1,0], ca_coords[-1,1], ca_coords[-1,2], "C-Term", color='green')
ax.set_title(f"Unfolded Polypeptide ({len(chain)} residues)")
ax.set_xlabel("X (Angstrom)")
ax.set_ylabel("Y (Angstrom)")
ax.set_zlabel("Z (Angstrom)")
ax.legend()
plt.show()
# --- Main Execution ---
if __name__ == "__main__":
# 1. Setup
# (Assuming place_atom_nerf and RamachandranSampler are imported/defined above)
sampler = RamachandranSampler()
# 2. Define a test sequence
# A mix of General, Glycine, and Proline to test geometry
test_seq = ['MET', 'ALA', 'GLY', 'LYS', 'PRO', 'LEU', 'GLU', 'ALA', 'GLY', 'HIS'] * 30
# 3. Run
backbone = generate_backbone_chain(test_seq, place_atom_nerf, sampler)
# 4. Visualize
plot_backbone(backbone)