Added files from previous iteration for reference
This commit is contained in:
parent
a73dbc94a2
commit
24a55a56e0
168
src/legacy/backbone.py
Normal file
168
src/legacy/backbone.py
Normal file
|
|
@ -0,0 +1,168 @@
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from mpl_toolkits.mplot3d import Axes3D
|
||||||
|
from nerf import place_atom_nerf
|
||||||
|
from ramachandran import RamachandranSampler
|
||||||
|
|
||||||
|
# --- Constants (Idealized Geometry in Angstroms & Degrees) ---
|
||||||
|
GEO = {
|
||||||
|
'N_CA_len': 1.46,
|
||||||
|
'CA_C_len': 1.51,
|
||||||
|
'C_N_len': 1.33, # Peptide bond length
|
||||||
|
|
||||||
|
# --- MISSING KEYS ADDED HERE ---
|
||||||
|
'N_H_len': 1.01, # Backbone Amide H bond length
|
||||||
|
'C_N_H_angle': 119.0, # Angle for placing the H
|
||||||
|
# -------------------------------
|
||||||
|
|
||||||
|
# Bond Angles (Standard idealized values)
|
||||||
|
'N_CA_C_angle': 111.0,
|
||||||
|
'CA_C_N_angle': 116.0,
|
||||||
|
'C_N_CA_angle': 122.0,
|
||||||
|
}
|
||||||
|
|
||||||
|
def check_clashes(new_coord, existing_coords, threshold=3.0):
|
||||||
|
if len(existing_coords) == 0: return False
|
||||||
|
diff = existing_coords - new_coord
|
||||||
|
dist_sq = np.sum(diff**2, axis=1)
|
||||||
|
return np.any(dist_sq < threshold**2)
|
||||||
|
|
||||||
|
def generate_backbone_chain(sequence, nerf_func, sampler_instance,
|
||||||
|
clash_threshold=3.5,
|
||||||
|
max_retries_per_res=50,
|
||||||
|
backtrack_step=5,
|
||||||
|
verbose=False):
|
||||||
|
"""
|
||||||
|
Generates backbone with Backtracking capability.
|
||||||
|
If it gets stuck, it unwinds 'backtrack_step' residues and tries again.
|
||||||
|
"""
|
||||||
|
|
||||||
|
chain = []
|
||||||
|
occupied_ca_coords = []
|
||||||
|
|
||||||
|
# --- Residue 0 Initialization (Same as before) ---
|
||||||
|
res0_name = sequence[0]
|
||||||
|
n_0 = np.array([0.0, 0.0, 0.0])
|
||||||
|
ca_0 = np.array([GEO['N_CA_len'], 0.0, 0.0])
|
||||||
|
angle_rad = np.radians(GEO['N_CA_C_angle'])
|
||||||
|
c_0 = np.array([ca_0[0] + GEO['CA_C_len'] * np.cos(np.pi - angle_rad),
|
||||||
|
GEO['CA_C_len'] * np.sin(np.pi - angle_rad), 0.0])
|
||||||
|
|
||||||
|
chain.append({'name': res0_name, 'N': n_0, 'CA': ca_0, 'C': c_0, 'phi': None, 'psi': None})
|
||||||
|
occupied_ca_coords.append(ca_0)
|
||||||
|
|
||||||
|
# --- THE SMART LOOP ---
|
||||||
|
i = 1
|
||||||
|
total_backtracks = 0
|
||||||
|
|
||||||
|
while i < len(sequence):
|
||||||
|
prev_res = chain[i-1]
|
||||||
|
curr_res_name = sequence[i]
|
||||||
|
|
||||||
|
success = False
|
||||||
|
|
||||||
|
# Attempt to place this residue
|
||||||
|
for attempt in range(max_retries_per_res):
|
||||||
|
# 1. Sample
|
||||||
|
_, prev_psi = sampler_instance.sample(prev_res['name'])
|
||||||
|
curr_phi, curr_psi = sampler_instance.sample(curr_res_name)
|
||||||
|
|
||||||
|
# 2. NeRF Construction
|
||||||
|
n_new = nerf_func(prev_res['N'], prev_res['CA'], prev_res['C'],
|
||||||
|
GEO['C_N_len'], GEO['CA_C_N_angle'], prev_psi)
|
||||||
|
|
||||||
|
# Amide H (Skip Proline)
|
||||||
|
h_new = None
|
||||||
|
if curr_res_name != 'PRO':
|
||||||
|
h_new = nerf_func(n_new, prev_res['C'], prev_res['CA'],
|
||||||
|
GEO['N_H_len'], GEO['C_N_H_angle'], 180.0)
|
||||||
|
|
||||||
|
ca_new = nerf_func(prev_res['CA'], prev_res['C'], n_new,
|
||||||
|
GEO['N_CA_len'], GEO['C_N_CA_angle'], 180.0)
|
||||||
|
|
||||||
|
c_new = nerf_func(prev_res['C'], n_new, ca_new,
|
||||||
|
GEO['CA_C_len'], GEO['N_CA_C_angle'], curr_phi)
|
||||||
|
|
||||||
|
# 3. Clash Check
|
||||||
|
# Check against all CAs except the last 10 (local neighbors)
|
||||||
|
safe_buffer = 10
|
||||||
|
if len(occupied_ca_coords) > safe_buffer:
|
||||||
|
history = np.array(occupied_ca_coords[:-safe_buffer])
|
||||||
|
if check_clashes(ca_new, history, threshold=clash_threshold):
|
||||||
|
continue # Failed attempt
|
||||||
|
|
||||||
|
# 4. Success! Commit and Break Retry Loop
|
||||||
|
prev_res['psi'] = prev_psi # Lock in the psi that worked
|
||||||
|
|
||||||
|
new_res = {
|
||||||
|
'name': curr_res_name, 'N': n_new, 'CA': ca_new, 'C': c_new,
|
||||||
|
'phi': curr_phi, 'psi': curr_psi
|
||||||
|
}
|
||||||
|
if h_new is not None: new_res['H'] = h_new
|
||||||
|
|
||||||
|
chain.append(new_res)
|
||||||
|
occupied_ca_coords.append(ca_new)
|
||||||
|
success = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if success:
|
||||||
|
i += 1
|
||||||
|
else:
|
||||||
|
# --- BACKTRACK LOGIC ---
|
||||||
|
total_backtracks += 1
|
||||||
|
target_idx = max(1, i - backtrack_step)
|
||||||
|
drop_count = i - target_idx
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print(f" [Stuck at {i}] Backtracking {drop_count} steps to {target_idx}...")
|
||||||
|
|
||||||
|
chain = chain[:target_idx]
|
||||||
|
occupied_ca_coords = occupied_ca_coords[:target_idx]
|
||||||
|
i = target_idx
|
||||||
|
|
||||||
|
return chain, total_backtracks
|
||||||
|
|
||||||
|
# --- Visualization Helper ---
|
||||||
|
def plot_backbone(chain):
|
||||||
|
fig = plt.figure(figsize=(10, 8))
|
||||||
|
ax = fig.add_subplot(111, projection='3d')
|
||||||
|
|
||||||
|
# Extract coordinates
|
||||||
|
n_coords = np.array([res['N'] for res in chain])
|
||||||
|
ca_coords = np.array([res['CA'] for res in chain])
|
||||||
|
c_coords = np.array([res['C'] for res in chain])
|
||||||
|
|
||||||
|
# Plot trace (connect CA atoms)
|
||||||
|
ax.plot(ca_coords[:,0], ca_coords[:,1], ca_coords[:,2],
|
||||||
|
'-o', color='black', label='CA Trace', markersize=4, alpha=0.6)
|
||||||
|
|
||||||
|
# Plot atoms
|
||||||
|
ax.scatter(n_coords[:,0], n_coords[:,1], n_coords[:,2], c='blue', s=20, label='N')
|
||||||
|
ax.scatter(c_coords[:,0], c_coords[:,1], c_coords[:,2], c='red', s=20, label='C')
|
||||||
|
|
||||||
|
# Start/End markers
|
||||||
|
ax.text(ca_coords[0,0], ca_coords[0,1], ca_coords[0,2], "N-Term", color='green')
|
||||||
|
ax.text(ca_coords[-1,0], ca_coords[-1,1], ca_coords[-1,2], "C-Term", color='green')
|
||||||
|
|
||||||
|
ax.set_title(f"Unfolded Polypeptide ({len(chain)} residues)")
|
||||||
|
ax.set_xlabel("X (Angstrom)")
|
||||||
|
ax.set_ylabel("Y (Angstrom)")
|
||||||
|
ax.set_zlabel("Z (Angstrom)")
|
||||||
|
ax.legend()
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
# --- Main Execution ---
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# 1. Setup
|
||||||
|
# (Assuming place_atom_nerf and RamachandranSampler are imported/defined above)
|
||||||
|
sampler = RamachandranSampler()
|
||||||
|
|
||||||
|
# 2. Define a test sequence
|
||||||
|
# A mix of General, Glycine, and Proline to test geometry
|
||||||
|
test_seq = ['MET', 'ALA', 'GLY', 'LYS', 'PRO', 'LEU', 'GLU', 'ALA', 'GLY', 'HIS'] * 30
|
||||||
|
|
||||||
|
# 3. Run
|
||||||
|
backbone = generate_backbone_chain(test_seq, place_atom_nerf, sampler)
|
||||||
|
|
||||||
|
# 4. Visualize
|
||||||
|
plot_backbone(backbone)
|
||||||
108
src/legacy/benchmark.py
Normal file
108
src/legacy/benchmark.py
Normal file
|
|
@ -0,0 +1,108 @@
|
||||||
|
import numpy as np
|
||||||
|
import time
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
# Import the core functions from your other project files
|
||||||
|
from nerf import place_atom_nerf
|
||||||
|
from ramachandran import RamachandranSampler
|
||||||
|
from backbone import generate_backbone_chain
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# BENCHMARKING FUNCTIONS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
AMINO_ACIDS = [
|
||||||
|
'ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE',
|
||||||
|
'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL'
|
||||||
|
]
|
||||||
|
|
||||||
|
def generate_random_sequence(length):
|
||||||
|
"""Generates a random list of 3-letter amino acid codes."""
|
||||||
|
return list(np.random.choice(AMINO_ACIDS, size=length))
|
||||||
|
|
||||||
|
def run_benchmark():
|
||||||
|
"""
|
||||||
|
Runs the main benchmark and prints a summary report.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# --- Parameters to Test ---
|
||||||
|
# (Feel free to change these)
|
||||||
|
lengths_to_test = [50, 100, 200, 300, 400, 1000, 10000]
|
||||||
|
trials_per_setting = 3
|
||||||
|
clash_thresholds_to_test = [3.5, 3.0]
|
||||||
|
backtrack_steps = 5
|
||||||
|
|
||||||
|
# --- Setup ---
|
||||||
|
sampler = RamachandranSampler()
|
||||||
|
nerf_func = place_atom_nerf
|
||||||
|
results = []
|
||||||
|
|
||||||
|
print("Starting Polypeptide Generator Benchmark...")
|
||||||
|
print(f"Testing Lengths: {lengths_to_test}")
|
||||||
|
print(f"Testing Thresholds: {clash_thresholds_to_test} Å")
|
||||||
|
print(f"Trials per Setting: {trials_per_setting}\n")
|
||||||
|
|
||||||
|
# --- Main Loop ---
|
||||||
|
for threshold in clash_thresholds_to_test:
|
||||||
|
for length in lengths_to_test:
|
||||||
|
for trial in range(trials_per_setting):
|
||||||
|
print(f"Running: Threshold={threshold}Å, Length={length}, Trial={trial+1}/{trials_per_setting}...")
|
||||||
|
|
||||||
|
# Generate a new random sequence for this trial
|
||||||
|
seq = generate_random_sequence(length)
|
||||||
|
|
||||||
|
t_start = time.perf_counter()
|
||||||
|
|
||||||
|
# Call the imported function
|
||||||
|
chain, backtracks = generate_backbone_chain(
|
||||||
|
seq,
|
||||||
|
nerf_func,
|
||||||
|
sampler,
|
||||||
|
clash_threshold=threshold,
|
||||||
|
backtrack_step=backtrack_steps,
|
||||||
|
verbose=True # Set to True to see backtrack messages
|
||||||
|
)
|
||||||
|
|
||||||
|
t_end = time.perf_counter()
|
||||||
|
|
||||||
|
time_taken = t_end - t_start
|
||||||
|
|
||||||
|
# Store results
|
||||||
|
results.append({
|
||||||
|
"Threshold (Å)": threshold,
|
||||||
|
"Length (Res)": length,
|
||||||
|
"Trial": trial,
|
||||||
|
"Time (s)": time_taken,
|
||||||
|
"Backtracks": backtracks
|
||||||
|
})
|
||||||
|
|
||||||
|
# --- Analysis & Report ---
|
||||||
|
print("\n--- Benchmark Complete ---")
|
||||||
|
|
||||||
|
if not results:
|
||||||
|
print("No results to report.")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Convert to DataFrame for easy analysis
|
||||||
|
df = pd.DataFrame(results)
|
||||||
|
|
||||||
|
# Set display options for pandas
|
||||||
|
pd.set_option('display.precision', 3)
|
||||||
|
pd.set_option('display.width', 100)
|
||||||
|
|
||||||
|
print("\nFull Results:")
|
||||||
|
print(df)
|
||||||
|
|
||||||
|
# Group by threshold and length to see averages
|
||||||
|
summary = df.groupby(["Threshold (Å)", "Length (Res)"]).agg(
|
||||||
|
avg_time=('Time (s)', 'mean'),
|
||||||
|
std_time=('Time (s)', 'std'),
|
||||||
|
avg_backtracks=('Backtracks', 'mean'),
|
||||||
|
std_backtracks=('Backtracks', 'std')
|
||||||
|
)
|
||||||
|
|
||||||
|
print("\n--- Summary Report (Averages) ---")
|
||||||
|
print(summary)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
run_benchmark()
|
||||||
205
src/legacy/export.py
Normal file
205
src/legacy/export.py
Normal file
|
|
@ -0,0 +1,205 @@
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from mpl_toolkits.mplot3d import Axes3D
|
||||||
|
|
||||||
|
from backbone import generate_backbone_chain
|
||||||
|
from nerf import place_atom_nerf
|
||||||
|
from ramachandran import RamachandranSampler
|
||||||
|
from sidechain import SIDECHAIN_TOPOLOGY, build_full_atom_chain, sample_chi_angle
|
||||||
|
|
||||||
|
def add_backbone_oxygens(chain, nerf_func):
|
||||||
|
"""
|
||||||
|
Adds the Carbonyl Oxygen (O) to every residue.
|
||||||
|
The C=O bond bisects the N-C-CA angle or is planar to the peptide bond.
|
||||||
|
"""
|
||||||
|
for i, res in enumerate(chain):
|
||||||
|
# We place O relative to C, CA, and N.
|
||||||
|
# Ref: C (parent), CA (grand), N (great-grand)
|
||||||
|
# Bond: 1.23 A (Double bond)
|
||||||
|
# Angle: ~121 degrees
|
||||||
|
# Torsion: 180 (Planar, trans to N-CA bond)
|
||||||
|
|
||||||
|
o_coord = nerf_func(
|
||||||
|
res['C'], res['CA'], res['N'],
|
||||||
|
bond_length=1.23,
|
||||||
|
bond_angle_deg=121.0,
|
||||||
|
torsion_angle_deg=180.0
|
||||||
|
)
|
||||||
|
|
||||||
|
# We store it in sidechain list for convenience in plotting/writing
|
||||||
|
if 'sidechain' not in res:
|
||||||
|
res['sidechain'] = []
|
||||||
|
|
||||||
|
# Insert at the beginning of sidechain list so it appears after C in PDB
|
||||||
|
res['sidechain'].insert(0, {'name': 'O', 'coord': o_coord})
|
||||||
|
|
||||||
|
return chain
|
||||||
|
|
||||||
|
def format_atom_name(name):
|
||||||
|
"""
|
||||||
|
PDB atom names must be 4 chars.
|
||||||
|
- 4-char names (HD21) occupy cols 13-16.
|
||||||
|
- <4 char names (CA, N, O) start at col 14.
|
||||||
|
"""
|
||||||
|
if len(name) == 4:
|
||||||
|
return name
|
||||||
|
else:
|
||||||
|
return f" {name:<3}" # " CA "
|
||||||
|
|
||||||
|
def save_to_pdb(chain, filename="unfolded_structure.pdb"):
|
||||||
|
"""
|
||||||
|
Writes the chain to a standard PDB format file.
|
||||||
|
"""
|
||||||
|
atom_serial = 1
|
||||||
|
|
||||||
|
with open(filename, 'w') as f:
|
||||||
|
f.write("REMARK 1 GENERATED BY UNFOLDED POLYPEPTIDE GENERATOR\n")
|
||||||
|
|
||||||
|
for i, res in enumerate(chain):
|
||||||
|
res_name = res['name']
|
||||||
|
chain_id = 'A'
|
||||||
|
res_seq = i + 1
|
||||||
|
|
||||||
|
# 1. Write Backbone Atoms in Order: N, CA, C
|
||||||
|
bb_order = ['N', 'CA', 'C']
|
||||||
|
|
||||||
|
# Combine backbone and sidechain into one list for writing
|
||||||
|
# (Backbone first, then sidechain)
|
||||||
|
all_atoms = []
|
||||||
|
for atom_key in bb_order:
|
||||||
|
all_atoms.append({'name': atom_key, 'coord': res[atom_key]})
|
||||||
|
|
||||||
|
# Add sidechain atoms (including O, H, etc.)
|
||||||
|
# Note: O is already inserted into 'sidechain' by our helper
|
||||||
|
if 'sidechain' in res:
|
||||||
|
all_atoms.extend(res['sidechain'])
|
||||||
|
|
||||||
|
for atom in all_atoms:
|
||||||
|
name = atom['name']
|
||||||
|
x, y, z = atom['coord']
|
||||||
|
|
||||||
|
# Determine Element (First letter of name usually)
|
||||||
|
element = name[0]
|
||||||
|
|
||||||
|
# PDB fixed-width format
|
||||||
|
# ATOM (1-6)
|
||||||
|
# Serial (7-11)
|
||||||
|
# Name (13-16)
|
||||||
|
# ResName (18-20)
|
||||||
|
# Chain (22)
|
||||||
|
# SeqNum (23-26)
|
||||||
|
# X, Y, Z (31-54)
|
||||||
|
# Occ (55-60), Temp (61-66)
|
||||||
|
# Element (77-78)
|
||||||
|
|
||||||
|
pdb_line = (
|
||||||
|
f"ATOM {atom_serial:>5} {format_atom_name(name):<4} {res_name:>3} "
|
||||||
|
f"{chain_id:>1}{res_seq:>4} "
|
||||||
|
f"{x:>8.3f}{y:>8.3f}{z:>8.3f}"
|
||||||
|
f"{1.00:>6.2f}{0.00:>6.2f} {element:>2}\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
f.write(pdb_line)
|
||||||
|
atom_serial += 1
|
||||||
|
|
||||||
|
f.write("END\n")
|
||||||
|
print(f"Successfully saved PDB to: {filename}")
|
||||||
|
|
||||||
|
def visualize_structure(chain, mode='backbone'):
|
||||||
|
"""
|
||||||
|
Visualizes the protein structure.
|
||||||
|
Modes: 'ca_trace', 'backbone', 'all_atom'
|
||||||
|
"""
|
||||||
|
fig = plt.figure(figsize=(12, 10))
|
||||||
|
ax = fig.add_subplot(111, projection='3d')
|
||||||
|
|
||||||
|
# Extract CA coords for the main trace
|
||||||
|
ca_x = [res['CA'][0] for res in chain]
|
||||||
|
ca_y = [res['CA'][1] for res in chain]
|
||||||
|
ca_z = [res['CA'][2] for res in chain]
|
||||||
|
|
||||||
|
# Plot the main chain line
|
||||||
|
ax.plot(ca_x, ca_y, ca_z, color='grey', linewidth=2, label='Backbone Trace')
|
||||||
|
|
||||||
|
if mode == 'ca_trace':
|
||||||
|
ax.scatter(ca_x, ca_y, ca_z, s=50, c='blue', label='CA')
|
||||||
|
|
||||||
|
elif mode == 'backbone':
|
||||||
|
# Show N, CA, C
|
||||||
|
for res in chain:
|
||||||
|
ax.scatter(*res['N'], c='blue', s=30)
|
||||||
|
ax.scatter(*res['CA'], c='green', s=30)
|
||||||
|
ax.scatter(*res['C'], c='red', s=30)
|
||||||
|
|
||||||
|
elif mode == 'all_atom':
|
||||||
|
# Plot everything including sidechains
|
||||||
|
for res in chain:
|
||||||
|
# Backbone
|
||||||
|
ax.scatter(*res['N'], c='blue', s=20, alpha=0.6)
|
||||||
|
ax.scatter(*res['CA'], c='green', s=20, alpha=0.6)
|
||||||
|
ax.scatter(*res['C'], c='red', s=20, alpha=0.6)
|
||||||
|
|
||||||
|
if 'sidechain' in res:
|
||||||
|
for atom in res['sidechain']:
|
||||||
|
coord = atom['coord']
|
||||||
|
name = atom['name']
|
||||||
|
|
||||||
|
color = 'orange'
|
||||||
|
size = 10
|
||||||
|
|
||||||
|
if name.startswith('H'):
|
||||||
|
color = 'lightgrey'
|
||||||
|
size = 5
|
||||||
|
elif name.startswith('O'):
|
||||||
|
color = 'red'
|
||||||
|
size = 20
|
||||||
|
elif name.startswith('N'):
|
||||||
|
color = 'blue'
|
||||||
|
size = 20
|
||||||
|
elif name.startswith('S'):
|
||||||
|
color = 'yellow'
|
||||||
|
size = 30
|
||||||
|
|
||||||
|
ax.scatter(coord[0], coord[1], coord[2], c=color, s=size)
|
||||||
|
|
||||||
|
# Aesthetics
|
||||||
|
ax.set_xlabel('X ($\AA$)')
|
||||||
|
ax.set_ylabel('Y ($\AA$)')
|
||||||
|
ax.set_zlabel('Z ($\AA$)')
|
||||||
|
ax.set_title(f'Generated Structure ({len(chain)} residues) - Mode: {mode}')
|
||||||
|
plt.legend()
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# --- 1. INITIALIZATION ---
|
||||||
|
# Import your previous classes/functions here
|
||||||
|
# from generator import generate_backbone_chain, build_full_atom_chain, ...
|
||||||
|
# For now, assuming they are in the same memory space
|
||||||
|
|
||||||
|
sampler = RamachandranSampler()
|
||||||
|
|
||||||
|
# --- 2. DEFINE SEQUENCE ---
|
||||||
|
# A nice peptide with diverse chemistry for electrostatic testing
|
||||||
|
# (Charged, Polar, Hydrophobic, Glycine, Proline)
|
||||||
|
sequence = ['MET', 'LYS', 'ASP', 'GLY', 'PHE', 'ARG', 'GLU', 'VAL', 'HIS', 'ALA']# * 1000
|
||||||
|
|
||||||
|
print(f"Generating unfolded structure for: {'-'.join(sequence)}")
|
||||||
|
|
||||||
|
# --- 3. GENERATE BACKBONE ---
|
||||||
|
# (Generates N, CA, C)
|
||||||
|
backbone, total_backtracks = generate_backbone_chain(sequence, place_atom_nerf, sampler, verbose=True)
|
||||||
|
|
||||||
|
# --- 4. BUILD SIDECHAINS & PROTONS ---
|
||||||
|
# (Generates Sidechains, HA, HB.., NH3+ term, COO- term)
|
||||||
|
full_struct = build_full_atom_chain(backbone, SIDECHAIN_TOPOLOGY, place_atom_nerf, sample_chi_angle)
|
||||||
|
|
||||||
|
# --- 5. ADD CARBONYL OXYGENS ---
|
||||||
|
# (Generates O on the backbone C)
|
||||||
|
full_struct = add_backbone_oxygens(full_struct, place_atom_nerf)
|
||||||
|
|
||||||
|
# --- 6. EXPORT TO PDB ---
|
||||||
|
save_to_pdb(full_struct, filename="unfolded_protein.pdb")
|
||||||
|
|
||||||
|
# --- 7. VISUALIZE ---
|
||||||
|
# 'all_atom' shows the hydrogens and sidechains
|
||||||
|
#visualize_structure(full_struct, mode='all_atom')
|
||||||
77
src/legacy/nerf.py
Normal file
77
src/legacy/nerf.py
Normal file
|
|
@ -0,0 +1,77 @@
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
def place_atom_nerf(a, b, c, bond_length, bond_angle_deg, torsion_angle_deg):
|
||||||
|
"""
|
||||||
|
Calculates the coordinates of atom D given atoms A, B, C and internal coords.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
- a, b, c: (3,) numpy arrays of xyz coordinates of the previous atoms.
|
||||||
|
- bond_length: distance between c and d (Angstroms).
|
||||||
|
- bond_angle_deg: angle b-c-d (Degrees).
|
||||||
|
- torsion_angle_deg: dihedral angle a-b-c-d (Degrees).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
- d: (3,) numpy array of coordinates for the new atom.
|
||||||
|
"""
|
||||||
|
# 1. Convert to rad
|
||||||
|
bond_angle_rad = np.radians(bond_angle_deg)
|
||||||
|
torsion_angle_rad = np.radians(torsion_angle_deg)
|
||||||
|
|
||||||
|
# 2. Calculate the position of D in the local reference frame
|
||||||
|
# We align the local X-axis with the bond B->C.
|
||||||
|
# Note: The geometric calculation uses the complement of the bond angle
|
||||||
|
# so we use pi - bond_angle.
|
||||||
|
|
||||||
|
# D_local represents the vector C->D in the local frame
|
||||||
|
d_local = np.array([
|
||||||
|
bond_length * np.cos(np.pi - bond_angle_rad),
|
||||||
|
bond_length * np.sin(np.pi - bond_angle_rad) * np.cos(torsion_angle_rad),
|
||||||
|
bond_length * np.sin(np.pi - bond_angle_rad) * np.sin(torsion_angle_rad)
|
||||||
|
])
|
||||||
|
|
||||||
|
# 3. Build the transformation matrix from Local to Global Frame
|
||||||
|
|
||||||
|
# Vector from B to C (normalized) - this is our local X-axis
|
||||||
|
bc = c - b
|
||||||
|
bc_u = bc / np.linalg.norm(bc)
|
||||||
|
|
||||||
|
# Vector from A to B
|
||||||
|
ab = b - a
|
||||||
|
|
||||||
|
# Normal to the plane A-B-C (normalized) - this is our local Z-axis
|
||||||
|
# We use Cross Product to find the perpendicular vector
|
||||||
|
n = np.cross(ab, bc_u)
|
||||||
|
n_u = n / np.linalg.norm(n)
|
||||||
|
|
||||||
|
# The "Up" vector in the plane (normalized) - this is our local Y-axis
|
||||||
|
# It is perpendicular to both the bond BC and the normal N
|
||||||
|
m_u = np.cross(n_u, bc_u)
|
||||||
|
|
||||||
|
# Create the Rotation Matrix columns [x_axis, y_axis, z_axis]
|
||||||
|
# Shape: (3, 3)
|
||||||
|
rotation_matrix = np.column_stack((bc_u, m_u, n_u))
|
||||||
|
|
||||||
|
# 4. Transform D_local to Global Coordinates
|
||||||
|
# Global_Pos = Origin(C) + (Rotation * Local_Pos)
|
||||||
|
d_global = c + np.dot(rotation_matrix, d_local)
|
||||||
|
|
||||||
|
return d_global
|
||||||
|
|
||||||
|
# --- Simple Test Case ---
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Define 3 arbitrary starting atoms (A, B, C)
|
||||||
|
atom_a = np.array([0.0, 1.0, 0.0])
|
||||||
|
atom_b = np.array([0.0, 0.0, 0.0]) # Origin
|
||||||
|
atom_c = np.array([1.5, 0.0, 0.0]) # Along X-axis
|
||||||
|
|
||||||
|
# Define internal coordinates for the next atom (D)
|
||||||
|
length = 1.5 # Angstroms
|
||||||
|
b_angle = 120.0 # Degrees
|
||||||
|
torsion = 90.0 # Degrees (Should point "out" of the screen/plane)
|
||||||
|
|
||||||
|
new_atom = place_atom_nerf(atom_a, atom_b, atom_c, length, b_angle, torsion)
|
||||||
|
|
||||||
|
print(f"Atom A: {atom_a}")
|
||||||
|
print(f"Atom B: {atom_b}")
|
||||||
|
print(f"Atom C: {atom_c}")
|
||||||
|
print(f"Atom D (Calculated): {np.round(new_atom, 3)}")
|
||||||
83
src/legacy/ramachandran.py
Normal file
83
src/legacy/ramachandran.py
Normal file
|
|
@ -0,0 +1,83 @@
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
class RamachandranSampler:
|
||||||
|
def __init__(self):
|
||||||
|
# Format: 'Region_Name': {
|
||||||
|
# 'phi_mean': x, 'phi_sigma': x,
|
||||||
|
# 'psi_mean': x, 'psi_sigma': x,
|
||||||
|
# 'weight': probability
|
||||||
|
# }
|
||||||
|
|
||||||
|
# 1. GENERAL (18 Amino Acids)
|
||||||
|
# Tuned for "Random Coil" (High Beta/PPII, Low Alpha)
|
||||||
|
self.general_dist = [
|
||||||
|
# Beta / PPII Region (Favored in unfolded states)
|
||||||
|
{'phi_m': -120, 'phi_s': 20, 'psi_m': 140, 'psi_s': 20, 'w': 0.60},
|
||||||
|
# Alpha-Right Region (Less common in unfolded, but present)
|
||||||
|
{'phi_m': -60, 'phi_s': 15, 'psi_m': -40, 'psi_s': 15, 'w': 0.30},
|
||||||
|
# Alpha-Left / Bridge (Rare)
|
||||||
|
{'phi_m': 60, 'phi_s': 15, 'psi_m': 40, 'psi_s': 15, 'w': 0.10}
|
||||||
|
]
|
||||||
|
|
||||||
|
# 2. GLYCINE (Flexible)
|
||||||
|
# Glycine can visit valid regions in all 4 quadrants
|
||||||
|
self.glycine_dist = [
|
||||||
|
# Top Left (Beta/PPII)
|
||||||
|
{'phi_m': -100, 'phi_s': 30, 'psi_m': 140, 'psi_s': 30, 'w': 0.3},
|
||||||
|
# Bottom Left (Alpha-R)
|
||||||
|
{'phi_m': -60, 'phi_s': 30, 'psi_m': -30, 'psi_s': 30, 'w': 0.2},
|
||||||
|
# Top Right (Left-handed Helix - Unique to Gly)
|
||||||
|
{'phi_m': 60, 'phi_s': 30, 'psi_m': 30, 'psi_s': 30, 'w': 0.2},
|
||||||
|
# Bottom Right (Inverted Beta)
|
||||||
|
{'phi_m': 100, 'phi_s': 30, 'psi_m': -140,'psi_s': 30, 'w': 0.3}
|
||||||
|
]
|
||||||
|
|
||||||
|
# 3. PROLINE (Rigid)
|
||||||
|
# Phi is strictly locked around -63 degrees
|
||||||
|
self.proline_dist = [
|
||||||
|
# Beta/PPII (Dominant)
|
||||||
|
{'phi_m': -63, 'phi_s': 5, 'psi_m': 150, 'psi_s': 20, 'w': 0.8},
|
||||||
|
# Alpha (Minor)
|
||||||
|
{'phi_m': -63, 'phi_s': 5, 'psi_m': -35, 'psi_s': 20, 'w': 0.2}
|
||||||
|
]
|
||||||
|
|
||||||
|
def _get_distribution(self, res_name):
|
||||||
|
if res_name == 'GLY':
|
||||||
|
return self.glycine_dist
|
||||||
|
elif res_name == 'PRO':
|
||||||
|
return self.proline_dist
|
||||||
|
else:
|
||||||
|
return self.general_dist
|
||||||
|
|
||||||
|
def sample(self, res_name):
|
||||||
|
"""
|
||||||
|
Returns a tuple (phi, psi) in degrees for the given residue.
|
||||||
|
"""
|
||||||
|
dist_options = self._get_distribution(res_name)
|
||||||
|
|
||||||
|
# 1. Pick a region based on weights
|
||||||
|
weights = [d['w'] for d in dist_options]
|
||||||
|
# Normalize weights to ensure sum is 1.0 (to avoid float errors)
|
||||||
|
weights = np.array(weights) / np.sum(weights)
|
||||||
|
|
||||||
|
choice_idx = np.random.choice(len(dist_options), p=weights)
|
||||||
|
selected = dist_options[choice_idx]
|
||||||
|
|
||||||
|
# 2. Sample Gaussian for that region
|
||||||
|
phi = np.random.normal(selected['phi_m'], selected['phi_s'])
|
||||||
|
psi = np.random.normal(selected['psi_m'], selected['psi_s'])
|
||||||
|
|
||||||
|
# 3. Wrap angles to [-180, 180] range (optional but good practice)
|
||||||
|
phi = (phi + 180) % 360 - 180
|
||||||
|
psi = (psi + 180) % 360 - 180
|
||||||
|
|
||||||
|
return phi, psi
|
||||||
|
|
||||||
|
# --- Usage Example ---
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sampler = RamachandranSampler()
|
||||||
|
|
||||||
|
print("Sampling 5 residues...")
|
||||||
|
for res in ['ALA', 'GLY', 'PRO', 'TRP', 'VAL']:
|
||||||
|
phi, psi = sampler.sample(res)
|
||||||
|
print(f"{res}: Phi={phi:.1f}, Psi={psi:.1f}")
|
||||||
539
src/legacy/sidechain.py
Normal file
539
src/legacy/sidechain.py
Normal file
|
|
@ -0,0 +1,539 @@
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from backbone import generate_backbone_chain
|
||||||
|
from nerf import place_atom_nerf
|
||||||
|
from ramachandran import RamachandranSampler
|
||||||
|
|
||||||
|
# ==========================================
|
||||||
|
# 1. TOPOLOGY DICTIONARY (With Hydrogens)
|
||||||
|
# ==========================================
|
||||||
|
# Note: Bond lengths/angles are from Engh & Huber / Amber99 parameters.
|
||||||
|
# 'p' = parent, 'gp' = grandparent, 'ggp' = great-grandparent
|
||||||
|
|
||||||
|
SIDECHAIN_TOPOLOGY = {
|
||||||
|
'GLY': [
|
||||||
|
# Glycine has two Alpha Hydrogens
|
||||||
|
{'atom': 'HA2', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.09, 'theta': 109.5, 'chi': 120.0},
|
||||||
|
{'atom': 'HA3', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.09, 'theta': 109.5, 'chi': -120.0}
|
||||||
|
],
|
||||||
|
|
||||||
|
'ALA': [
|
||||||
|
{'atom': 'HA', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.09, 'theta': 109.5, 'chi': -120.0},
|
||||||
|
{'atom': 'CB', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.52, 'theta': 109.5, 'chi': 122.5},
|
||||||
|
# Methyl Hydrogens (Staggered)
|
||||||
|
{'atom': 'HB1', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 60.0},
|
||||||
|
{'atom': 'HB2', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 180.0},
|
||||||
|
{'atom': 'HB3', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': -60.0}
|
||||||
|
],
|
||||||
|
|
||||||
|
'VAL': [
|
||||||
|
{'atom': 'HA', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.09, 'theta': 109.5, 'chi': -120.0},
|
||||||
|
{'atom': 'CB', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.52, 'theta': 109.5, 'chi': 122.5},
|
||||||
|
{'atom': 'HB', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 'chi1_plus_240'}, # 3rd pos
|
||||||
|
|
||||||
|
{'atom': 'CG1', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.52, 'theta': 109.5, 'chi': 'chi1'},
|
||||||
|
{'atom': 'HG11','p': 'CG1','gp': 'CB', 'ggp': 'CA','b': 1.09, 'theta': 109.5, 'chi': 60.0},
|
||||||
|
{'atom': 'HG12','p': 'CG1','gp': 'CB', 'ggp': 'CA','b': 1.09, 'theta': 109.5, 'chi': 180.0},
|
||||||
|
{'atom': 'HG13','p': 'CG1','gp': 'CB', 'ggp': 'CA','b': 1.09, 'theta': 109.5, 'chi': -60.0},
|
||||||
|
|
||||||
|
{'atom': 'CG2', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.52, 'theta': 109.5, 'chi': 'chi1_plus_120'},
|
||||||
|
{'atom': 'HG21','p': 'CG2','gp': 'CB', 'ggp': 'CA','b': 1.09, 'theta': 109.5, 'chi': 60.0},
|
||||||
|
{'atom': 'HG22','p': 'CG2','gp': 'CB', 'ggp': 'CA','b': 1.09, 'theta': 109.5, 'chi': 180.0},
|
||||||
|
{'atom': 'HG23','p': 'CG2','gp': 'CB', 'ggp': 'CA','b': 1.09, 'theta': 109.5, 'chi': -60.0}
|
||||||
|
],
|
||||||
|
|
||||||
|
'LEU': [
|
||||||
|
{'atom': 'HA', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.09, 'theta': 109.5, 'chi': -120.0},
|
||||||
|
{'atom': 'CB', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.52, 'theta': 109.5, 'chi': 122.5},
|
||||||
|
{'atom': 'HB2', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 'chi1_plus_120'},
|
||||||
|
{'atom': 'HB3', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 'chi1_plus_240'},
|
||||||
|
|
||||||
|
{'atom': 'CG', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.52, 'theta': 109.5, 'chi': 'chi1'},
|
||||||
|
{'atom': 'HG', 'p': 'CG', 'gp': 'CB', 'ggp': 'CA', 'b': 1.09, 'theta': 109.5, 'chi': 'chi2_plus_240'},
|
||||||
|
|
||||||
|
{'atom': 'CD1', 'p': 'CG', 'gp': 'CB', 'ggp': 'CA', 'b': 1.52, 'theta': 109.5, 'chi': 'chi2'},
|
||||||
|
{'atom': 'HD11','p': 'CD1','gp': 'CG', 'ggp': 'CB','b': 1.09, 'theta': 109.5, 'chi': 60.0},
|
||||||
|
{'atom': 'HD12','p': 'CD1','gp': 'CG', 'ggp': 'CB','b': 1.09, 'theta': 109.5, 'chi': 180.0},
|
||||||
|
{'atom': 'HD13','p': 'CD1','gp': 'CG', 'ggp': 'CB','b': 1.09, 'theta': 109.5, 'chi': -60.0},
|
||||||
|
|
||||||
|
{'atom': 'CD2', 'p': 'CG', 'gp': 'CB', 'ggp': 'CA', 'b': 1.52, 'theta': 109.5, 'chi': 'chi2_plus_120'},
|
||||||
|
{'atom': 'HD21','p': 'CD2','gp': 'CG', 'ggp': 'CB','b': 1.09, 'theta': 109.5, 'chi': 60.0},
|
||||||
|
{'atom': 'HD22','p': 'CD2','gp': 'CG', 'ggp': 'CB','b': 1.09, 'theta': 109.5, 'chi': 180.0},
|
||||||
|
{'atom': 'HD23','p': 'CD2','gp': 'CG', 'ggp': 'CB','b': 1.09, 'theta': 109.5, 'chi': -60.0}
|
||||||
|
],
|
||||||
|
|
||||||
|
'ILE': [
|
||||||
|
{'atom': 'HA', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.09, 'theta': 109.5, 'chi': -120.0},
|
||||||
|
{'atom': 'CB', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.52, 'theta': 109.5, 'chi': 122.5},
|
||||||
|
{'atom': 'HB', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 'chi1_plus_240'},
|
||||||
|
|
||||||
|
{'atom': 'CG1', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.52, 'theta': 109.5, 'chi': 'chi1'},
|
||||||
|
{'atom': 'HG12','p': 'CG1','gp': 'CB', 'ggp': 'CA', 'b': 1.09, 'theta': 109.5, 'chi': 'chi2_plus_120'},
|
||||||
|
{'atom': 'HG13','p': 'CG1','gp': 'CB', 'ggp': 'CA', 'b': 1.09, 'theta': 109.5, 'chi': 'chi2_plus_240'},
|
||||||
|
|
||||||
|
{'atom': 'CG2', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.52, 'theta': 109.5, 'chi': 'chi1_plus_120'},
|
||||||
|
{'atom': 'HG21','p': 'CG2','gp': 'CB', 'ggp': 'CA', 'b': 1.09, 'theta': 109.5, 'chi': 60.0},
|
||||||
|
{'atom': 'HG22','p': 'CG2','gp': 'CB', 'ggp': 'CA', 'b': 1.09, 'theta': 109.5, 'chi': 180.0},
|
||||||
|
{'atom': 'HG23','p': 'CG2','gp': 'CB', 'ggp': 'CA', 'b': 1.09, 'theta': 109.5, 'chi': -60.0},
|
||||||
|
|
||||||
|
{'atom': 'CD1', 'p': 'CG1','gp': 'CB', 'ggp': 'CA', 'b': 1.52, 'theta': 109.5, 'chi': 'chi2'},
|
||||||
|
{'atom': 'HD11','p': 'CD1','gp': 'CG1','ggp': 'CB', 'b': 1.09, 'theta': 109.5, 'chi': 60.0},
|
||||||
|
{'atom': 'HD12','p': 'CD1','gp': 'CG1','ggp': 'CB', 'b': 1.09, 'theta': 109.5, 'chi': 180.0},
|
||||||
|
{'atom': 'HD13','p': 'CD1','gp': 'CG1','ggp': 'CB', 'b': 1.09, 'theta': 109.5, 'chi': -60.0}
|
||||||
|
],
|
||||||
|
|
||||||
|
'MET': [
|
||||||
|
{'atom': 'HA', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.09, 'theta': 109.5, 'chi': -120.0},
|
||||||
|
{'atom': 'CB', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.52, 'theta': 109.5, 'chi': 122.5},
|
||||||
|
{'atom': 'HB2', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 'chi1_plus_120'},
|
||||||
|
{'atom': 'HB3', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 'chi1_plus_240'},
|
||||||
|
|
||||||
|
{'atom': 'CG', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.52, 'theta': 109.5, 'chi': 'chi1'},
|
||||||
|
{'atom': 'HG2', 'p': 'CG', 'gp': 'CB', 'ggp': 'CA', 'b': 1.09, 'theta': 109.5, 'chi': 'chi2_plus_120'},
|
||||||
|
{'atom': 'HG3', 'p': 'CG', 'gp': 'CB', 'ggp': 'CA', 'b': 1.09, 'theta': 109.5, 'chi': 'chi2_plus_240'},
|
||||||
|
|
||||||
|
{'atom': 'SD', 'p': 'CG', 'gp': 'CB', 'ggp': 'CA', 'b': 1.80, 'theta': 109.5, 'chi': 'chi2'},
|
||||||
|
{'atom': 'CE', 'p': 'SD', 'gp': 'CG', 'ggp': 'CB', 'b': 1.80, 'theta': 100.0, 'chi': 'chi3'},
|
||||||
|
{'atom': 'HE1', 'p': 'CE', 'gp': 'SD', 'ggp': 'CG', 'b': 1.09, 'theta': 109.5, 'chi': 60.0},
|
||||||
|
{'atom': 'HE2', 'p': 'CE', 'gp': 'SD', 'ggp': 'CG', 'b': 1.09, 'theta': 109.5, 'chi': 180.0},
|
||||||
|
{'atom': 'HE3', 'p': 'CE', 'gp': 'SD', 'ggp': 'CG', 'b': 1.09, 'theta': 109.5, 'chi': -60.0}
|
||||||
|
],
|
||||||
|
|
||||||
|
'PHE': [
|
||||||
|
{'atom': 'HA', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.09, 'theta': 109.5, 'chi': -120.0},
|
||||||
|
{'atom': 'CB', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.52, 'theta': 109.5, 'chi': 122.5},
|
||||||
|
{'atom': 'HB2', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 'chi1_plus_120'},
|
||||||
|
{'atom': 'HB3', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 'chi1_plus_240'},
|
||||||
|
|
||||||
|
{'atom': 'CG', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.50, 'theta': 113.0, 'chi': 'chi1'},
|
||||||
|
{'atom': 'CD1', 'p': 'CG', 'gp': 'CB', 'ggp': 'CA', 'b': 1.40, 'theta': 120.0, 'chi': 'chi2'},
|
||||||
|
{'atom': 'HD1', 'p': 'CD1','gp': 'CG', 'ggp': 'CB', 'b': 1.08, 'theta': 120.0, 'chi': 0.0}, # Planar
|
||||||
|
|
||||||
|
{'atom': 'CD2', 'p': 'CG', 'gp': 'CB', 'ggp': 'CA', 'b': 1.40, 'theta': 120.0, 'chi': 'chi2_plus_180'},
|
||||||
|
{'atom': 'HD2', 'p': 'CD2','gp': 'CG', 'ggp': 'CB', 'b': 1.08, 'theta': 120.0, 'chi': 0.0},
|
||||||
|
|
||||||
|
{'atom': 'CE1', 'p': 'CD1','gp': 'CG', 'ggp': 'CB', 'b': 1.40, 'theta': 120.0, 'chi': 180.0},
|
||||||
|
{'atom': 'HE1', 'p': 'CE1','gp': 'CD1','ggp': 'CG', 'b': 1.08, 'theta': 120.0, 'chi': 180.0},
|
||||||
|
|
||||||
|
{'atom': 'CE2', 'p': 'CD2','gp': 'CG', 'ggp': 'CB', 'b': 1.40, 'theta': 120.0, 'chi': 180.0},
|
||||||
|
{'atom': 'HE2', 'p': 'CE2','gp': 'CD2','ggp': 'CG', 'b': 1.08, 'theta': 120.0, 'chi': 180.0},
|
||||||
|
|
||||||
|
{'atom': 'CZ', 'p': 'CE1','gp': 'CD1','ggp': 'CG', 'b': 1.40, 'theta': 120.0, 'chi': 0.0},
|
||||||
|
{'atom': 'HZ', 'p': 'CZ', 'gp': 'CE1','ggp': 'CD1','b': 1.08, 'theta': 120.0, 'chi': 180.0}
|
||||||
|
],
|
||||||
|
|
||||||
|
'TYR': [
|
||||||
|
{'atom': 'HA', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.09, 'theta': 109.5, 'chi': -120.0},
|
||||||
|
{'atom': 'CB', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.52, 'theta': 109.5, 'chi': 122.5},
|
||||||
|
{'atom': 'HB2', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 'chi1_plus_120'},
|
||||||
|
{'atom': 'HB3', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 'chi1_plus_240'},
|
||||||
|
|
||||||
|
{'atom': 'CG', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.50, 'theta': 113.0, 'chi': 'chi1'},
|
||||||
|
{'atom': 'CD1', 'p': 'CG', 'gp': 'CB', 'ggp': 'CA', 'b': 1.40, 'theta': 120.0, 'chi': 'chi2'},
|
||||||
|
{'atom': 'HD1', 'p': 'CD1','gp': 'CG', 'ggp': 'CB', 'b': 1.08, 'theta': 120.0, 'chi': 0.0},
|
||||||
|
|
||||||
|
{'atom': 'CD2', 'p': 'CG', 'gp': 'CB', 'ggp': 'CA', 'b': 1.40, 'theta': 120.0, 'chi': 'chi2_plus_180'},
|
||||||
|
{'atom': 'HD2', 'p': 'CD2','gp': 'CG', 'ggp': 'CB', 'b': 1.08, 'theta': 120.0, 'chi': 0.0},
|
||||||
|
|
||||||
|
{'atom': 'CE1', 'p': 'CD1','gp': 'CG', 'ggp': 'CB', 'b': 1.40, 'theta': 120.0, 'chi': 180.0},
|
||||||
|
{'atom': 'HE1', 'p': 'CE1','gp': 'CD1','ggp': 'CG', 'b': 1.08, 'theta': 120.0, 'chi': 180.0},
|
||||||
|
|
||||||
|
{'atom': 'CE2', 'p': 'CD2','gp': 'CG', 'ggp': 'CB', 'b': 1.40, 'theta': 120.0, 'chi': 180.0},
|
||||||
|
{'atom': 'HE2', 'p': 'CE2','gp': 'CD2','ggp': 'CG', 'b': 1.08, 'theta': 120.0, 'chi': 180.0},
|
||||||
|
|
||||||
|
{'atom': 'CZ', 'p': 'CE1','gp': 'CD1','ggp': 'CG', 'b': 1.40, 'theta': 120.0, 'chi': 0.0},
|
||||||
|
{'atom': 'OH', 'p': 'CZ', 'gp': 'CE1','ggp': 'CD1','b': 1.36, 'theta': 120.0, 'chi': 180.0},
|
||||||
|
# Polar Hydroxyl H - Needs Sampling!
|
||||||
|
{'atom': 'HH', 'p': 'OH', 'gp': 'CZ', 'ggp': 'CE1', 'b': 0.96, 'theta': 109.5, 'chi': 'chi_hydroxyl'}
|
||||||
|
],
|
||||||
|
|
||||||
|
'TRP': [
|
||||||
|
{'atom': 'HA', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.09, 'theta': 109.5, 'chi': -120.0},
|
||||||
|
{'atom': 'CB', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.52, 'theta': 109.5, 'chi': 122.5},
|
||||||
|
{'atom': 'HB2', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 'chi1_plus_120'},
|
||||||
|
{'atom': 'HB3', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 'chi1_plus_240'},
|
||||||
|
|
||||||
|
{'atom': 'CG', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.50, 'theta': 113.0, 'chi': 'chi1'},
|
||||||
|
{'atom': 'CD1', 'p': 'CG', 'gp': 'CB', 'ggp': 'CA', 'b': 1.37, 'theta': 127.0, 'chi': 'chi2'},
|
||||||
|
{'atom': 'HD1', 'p': 'CD1','gp': 'CG', 'ggp': 'CB', 'b': 1.08, 'theta': 126.0, 'chi': 180.0},
|
||||||
|
|
||||||
|
{'atom': 'CD2', 'p': 'CG', 'gp': 'CB', 'ggp': 'CA', 'b': 1.43, 'theta': 127.0, 'chi': 'chi2_plus_180'},
|
||||||
|
{'atom': 'NE1', 'p': 'CD1','gp': 'CG', 'ggp': 'CB', 'b': 1.38, 'theta': 110.0, 'chi': 180.0},
|
||||||
|
{'atom': 'HE1', 'p': 'NE1','gp': 'CD1','ggp': 'CG', 'b': 1.01, 'theta': 125.0, 'chi': 180.0},
|
||||||
|
|
||||||
|
{'atom': 'CE2', 'p': 'CD2','gp': 'CG', 'ggp': 'CB', 'b': 1.40, 'theta': 108.0, 'chi': 0.0},
|
||||||
|
{'atom': 'CE3', 'p': 'CD2','gp': 'CG', 'ggp': 'CB', 'b': 1.40, 'theta': 130.0, 'chi': 180.0},
|
||||||
|
{'atom': 'HE3', 'p': 'CE3','gp': 'CD2','ggp': 'CG', 'b': 1.08, 'theta': 120.0, 'chi': 0.0},
|
||||||
|
|
||||||
|
{'atom': 'CZ2', 'p': 'CE2','gp': 'CD2','ggp': 'CG', 'b': 1.40, 'theta': 120.0, 'chi': 180.0},
|
||||||
|
{'atom': 'HZ2', 'p': 'CZ2','gp': 'CE2','ggp': 'CD2','b': 1.08, 'theta': 120.0, 'chi': 180.0},
|
||||||
|
|
||||||
|
{'atom': 'CZ3', 'p': 'CE3','gp': 'CD2','ggp': 'CG', 'b': 1.40, 'theta': 120.0, 'chi': 0.0},
|
||||||
|
{'atom': 'HZ3', 'p': 'CZ3','gp': 'CE3','ggp': 'CD2','b': 1.08, 'theta': 120.0, 'chi': 180.0},
|
||||||
|
|
||||||
|
{'atom': 'CH2', 'p': 'CZ2','gp': 'CE2','ggp': 'CD2','b': 1.40, 'theta': 120.0, 'chi': 0.0},
|
||||||
|
{'atom': 'HH2', 'p': 'CH2','gp': 'CZ2','ggp': 'CE2','b': 1.08, 'theta': 120.0, 'chi': 180.0},
|
||||||
|
],
|
||||||
|
|
||||||
|
'SER': [
|
||||||
|
{'atom': 'HA', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.09, 'theta': 109.5, 'chi': -120.0},
|
||||||
|
{'atom': 'CB', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.52, 'theta': 109.5, 'chi': 122.5},
|
||||||
|
{'atom': 'HB2', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 'chi1_plus_120'},
|
||||||
|
{'atom': 'HB3', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 'chi1_plus_240'},
|
||||||
|
|
||||||
|
{'atom': 'OG', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.42, 'theta': 109.5, 'chi': 'chi1'},
|
||||||
|
{'atom': 'HG', 'p': 'OG', 'gp': 'CB', 'ggp': 'CA', 'b': 0.96, 'theta': 109.5, 'chi': 'chi_hydroxyl'}
|
||||||
|
],
|
||||||
|
|
||||||
|
'THR': [
|
||||||
|
{'atom': 'HA', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.09, 'theta': 109.5, 'chi': -120.0},
|
||||||
|
{'atom': 'CB', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.52, 'theta': 109.5, 'chi': 122.5},
|
||||||
|
{'atom': 'HB', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 'chi1_plus_240'},
|
||||||
|
|
||||||
|
{'atom': 'OG1', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.42, 'theta': 109.5, 'chi': 'chi1'},
|
||||||
|
{'atom': 'HG1', 'p': 'OG1','gp': 'CB', 'ggp': 'CA', 'b': 0.96, 'theta': 109.5, 'chi': 'chi_hydroxyl'},
|
||||||
|
|
||||||
|
{'atom': 'CG2', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.52, 'theta': 109.5, 'chi': 'chi1_plus_120'},
|
||||||
|
{'atom': 'HG21','p': 'CG2','gp': 'CB', 'ggp': 'CA', 'b': 1.09, 'theta': 109.5, 'chi': 60.0},
|
||||||
|
{'atom': 'HG22','p': 'CG2','gp': 'CB', 'ggp': 'CA', 'b': 1.09, 'theta': 109.5, 'chi': 180.0},
|
||||||
|
{'atom': 'HG23','p': 'CG2','gp': 'CB', 'ggp': 'CA', 'b': 1.09, 'theta': 109.5, 'chi': -60.0},
|
||||||
|
],
|
||||||
|
|
||||||
|
'CYS': [
|
||||||
|
{'atom': 'HA', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.09, 'theta': 109.5, 'chi': -120.0},
|
||||||
|
{'atom': 'CB', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.52, 'theta': 109.5, 'chi': 122.5},
|
||||||
|
{'atom': 'HB2', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 'chi1_plus_120'},
|
||||||
|
{'atom': 'HB3', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 'chi1_plus_240'},
|
||||||
|
|
||||||
|
{'atom': 'SG', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.81, 'theta': 109.5, 'chi': 'chi1'},
|
||||||
|
{'atom': 'HG', 'p': 'SG', 'gp': 'CB', 'ggp': 'CA', 'b': 1.34, 'theta': 96.0, 'chi': 'chi_hydroxyl'} # SH is 90deg, approx
|
||||||
|
],
|
||||||
|
|
||||||
|
'ASP': [ # COO- (Deprotonated)
|
||||||
|
{'atom': 'HA', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.09, 'theta': 109.5, 'chi': -120.0},
|
||||||
|
{'atom': 'CB', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.52, 'theta': 109.5, 'chi': 122.5},
|
||||||
|
{'atom': 'HB2', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 'chi1_plus_120'},
|
||||||
|
{'atom': 'HB3', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 'chi1_plus_240'},
|
||||||
|
|
||||||
|
{'atom': 'CG', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.52, 'theta': 113.0, 'chi': 'chi1'},
|
||||||
|
{'atom': 'OD1', 'p': 'CG', 'gp': 'CB', 'ggp': 'CA', 'b': 1.25, 'theta': 120.0, 'chi': 'chi2'},
|
||||||
|
{'atom': 'OD2', 'p': 'CG', 'gp': 'CB', 'ggp': 'CA', 'b': 1.25, 'theta': 120.0, 'chi': 'chi2_plus_180'}
|
||||||
|
],
|
||||||
|
|
||||||
|
'ASN': [
|
||||||
|
{'atom': 'HA', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.09, 'theta': 109.5, 'chi': -120.0},
|
||||||
|
{'atom': 'CB', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.52, 'theta': 109.5, 'chi': 122.5},
|
||||||
|
{'atom': 'HB2', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 'chi1_plus_120'},
|
||||||
|
{'atom': 'HB3', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 'chi1_plus_240'},
|
||||||
|
|
||||||
|
{'atom': 'CG', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.52, 'theta': 113.0, 'chi': 'chi1'},
|
||||||
|
{'atom': 'OD1', 'p': 'CG', 'gp': 'CB', 'ggp': 'CA', 'b': 1.24, 'theta': 120.0, 'chi': 'chi2'},
|
||||||
|
{'atom': 'ND2', 'p': 'CG', 'gp': 'CB', 'ggp': 'CA', 'b': 1.33, 'theta': 120.0, 'chi': 'chi2_plus_180'},
|
||||||
|
{'atom': 'HD21','p': 'ND2','gp': 'CG', 'ggp': 'CB', 'b': 1.01, 'theta': 120.0, 'chi': 0.0},
|
||||||
|
{'atom': 'HD22','p': 'ND2','gp': 'CG', 'ggp': 'CB', 'b': 1.01, 'theta': 120.0, 'chi': 180.0},
|
||||||
|
],
|
||||||
|
|
||||||
|
'GLU': [ # COO- (Deprotonated)
|
||||||
|
{'atom': 'HA', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.09, 'theta': 109.5, 'chi': -120.0},
|
||||||
|
{'atom': 'CB', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.52, 'theta': 109.5, 'chi': 122.5},
|
||||||
|
{'atom': 'HB2', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 'chi1_plus_120'},
|
||||||
|
{'atom': 'HB3', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 'chi1_plus_240'},
|
||||||
|
|
||||||
|
{'atom': 'CG', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.52, 'theta': 109.5, 'chi': 'chi1'},
|
||||||
|
{'atom': 'HG2', 'p': 'CG', 'gp': 'CB', 'ggp': 'CA', 'b': 1.09, 'theta': 109.5, 'chi': 'chi2_plus_120'},
|
||||||
|
{'atom': 'HG3', 'p': 'CG', 'gp': 'CB', 'ggp': 'CA', 'b': 1.09, 'theta': 109.5, 'chi': 'chi2_plus_240'},
|
||||||
|
|
||||||
|
{'atom': 'CD', 'p': 'CG', 'gp': 'CB', 'ggp': 'CA', 'b': 1.52, 'theta': 113.0, 'chi': 'chi2'},
|
||||||
|
{'atom': 'OE1', 'p': 'CD', 'gp': 'CG', 'ggp': 'CB', 'b': 1.25, 'theta': 120.0, 'chi': 'chi3'},
|
||||||
|
{'atom': 'OE2', 'p': 'CD', 'gp': 'CG', 'ggp': 'CB', 'b': 1.25, 'theta': 120.0, 'chi': 'chi3_plus_180'}
|
||||||
|
],
|
||||||
|
|
||||||
|
'GLN': [
|
||||||
|
{'atom': 'HA', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.09, 'theta': 109.5, 'chi': -120.0},
|
||||||
|
{'atom': 'CB', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.52, 'theta': 109.5, 'chi': 122.5},
|
||||||
|
{'atom': 'HB2', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 'chi1_plus_120'},
|
||||||
|
{'atom': 'HB3', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 'chi1_plus_240'},
|
||||||
|
|
||||||
|
{'atom': 'CG', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.52, 'theta': 109.5, 'chi': 'chi1'},
|
||||||
|
{'atom': 'HG2', 'p': 'CG', 'gp': 'CB', 'ggp': 'CA', 'b': 1.09, 'theta': 109.5, 'chi': 'chi2_plus_120'},
|
||||||
|
{'atom': 'HG3', 'p': 'CG', 'gp': 'CB', 'ggp': 'CA', 'b': 1.09, 'theta': 109.5, 'chi': 'chi2_plus_240'},
|
||||||
|
|
||||||
|
{'atom': 'CD', 'p': 'CG', 'gp': 'CB', 'ggp': 'CA', 'b': 1.52, 'theta': 113.0, 'chi': 'chi2'},
|
||||||
|
{'atom': 'OE1', 'p': 'CD', 'gp': 'CG', 'ggp': 'CB', 'b': 1.24, 'theta': 120.0, 'chi': 'chi3'},
|
||||||
|
{'atom': 'NE2', 'p': 'CD', 'gp': 'CG', 'ggp': 'CB', 'b': 1.33, 'theta': 120.0, 'chi': 'chi3_plus_180'},
|
||||||
|
{'atom': 'HE21','p': 'NE2','gp': 'CD', 'ggp': 'CG', 'b': 1.01, 'theta': 120.0, 'chi': 0.0},
|
||||||
|
{'atom': 'HE22','p': 'NE2','gp': 'CD', 'ggp': 'CG', 'b': 1.01, 'theta': 120.0, 'chi': 180.0},
|
||||||
|
],
|
||||||
|
|
||||||
|
'LYS': [ # NH3+ (Protonated)
|
||||||
|
{'atom': 'HA', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.09, 'theta': 109.5, 'chi': -120.0},
|
||||||
|
{'atom': 'CB', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.52, 'theta': 109.5, 'chi': 122.5},
|
||||||
|
{'atom': 'HB2', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 'chi1_plus_120'},
|
||||||
|
{'atom': 'HB3', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 'chi1_plus_240'},
|
||||||
|
|
||||||
|
{'atom': 'CG', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.52, 'theta': 109.5, 'chi': 'chi1'},
|
||||||
|
{'atom': 'HG2', 'p': 'CG', 'gp': 'CB', 'ggp': 'CA', 'b': 1.09, 'theta': 109.5, 'chi': 'chi2_plus_120'},
|
||||||
|
{'atom': 'HG3', 'p': 'CG', 'gp': 'CB', 'ggp': 'CA', 'b': 1.09, 'theta': 109.5, 'chi': 'chi2_plus_240'},
|
||||||
|
|
||||||
|
{'atom': 'CD', 'p': 'CG', 'gp': 'CB', 'ggp': 'CA', 'b': 1.52, 'theta': 109.5, 'chi': 'chi2'},
|
||||||
|
{'atom': 'HD2', 'p': 'CD', 'gp': 'CG', 'ggp': 'CB', 'b': 1.09, 'theta': 109.5, 'chi': 'chi3_plus_120'},
|
||||||
|
{'atom': 'HD3', 'p': 'CD', 'gp': 'CG', 'ggp': 'CB', 'b': 1.09, 'theta': 109.5, 'chi': 'chi3_plus_240'},
|
||||||
|
|
||||||
|
{'atom': 'CE', 'p': 'CD', 'gp': 'CG', 'ggp': 'CB', 'b': 1.52, 'theta': 109.5, 'chi': 'chi3'},
|
||||||
|
{'atom': 'HE2', 'p': 'CE', 'gp': 'CD', 'ggp': 'CG', 'b': 1.09, 'theta': 109.5, 'chi': 'chi4_plus_120'},
|
||||||
|
{'atom': 'HE3', 'p': 'CE', 'gp': 'CD', 'ggp': 'CG', 'b': 1.09, 'theta': 109.5, 'chi': 'chi4_plus_240'},
|
||||||
|
|
||||||
|
{'atom': 'NZ', 'p': 'CE', 'gp': 'CD', 'ggp': 'CG', 'b': 1.49, 'theta': 109.5, 'chi': 'chi4'},
|
||||||
|
# Protonated Amine Hydrogens
|
||||||
|
{'atom': 'HZ1', 'p': 'NZ', 'gp': 'CE', 'ggp': 'CD', 'b': 1.01, 'theta': 109.5, 'chi': 60.0},
|
||||||
|
{'atom': 'HZ2', 'p': 'NZ', 'gp': 'CE', 'ggp': 'CD', 'b': 1.01, 'theta': 109.5, 'chi': 180.0},
|
||||||
|
{'atom': 'HZ3', 'p': 'NZ', 'gp': 'CE', 'ggp': 'CD', 'b': 1.01, 'theta': 109.5, 'chi': -60.0},
|
||||||
|
],
|
||||||
|
|
||||||
|
'ARG': [ # Charged Guanidinium
|
||||||
|
{'atom': 'HA', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.09, 'theta': 109.5, 'chi': -120.0},
|
||||||
|
{'atom': 'CB', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.52, 'theta': 109.5, 'chi': 122.5},
|
||||||
|
{'atom': 'HB2', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 'chi1_plus_120'},
|
||||||
|
{'atom': 'HB3', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 'chi1_plus_240'},
|
||||||
|
|
||||||
|
{'atom': 'CG', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.52, 'theta': 109.5, 'chi': 'chi1'},
|
||||||
|
{'atom': 'HG2', 'p': 'CG', 'gp': 'CB', 'ggp': 'CA', 'b': 1.09, 'theta': 109.5, 'chi': 'chi2_plus_120'},
|
||||||
|
{'atom': 'HG3', 'p': 'CG', 'gp': 'CB', 'ggp': 'CA', 'b': 1.09, 'theta': 109.5, 'chi': 'chi2_plus_240'},
|
||||||
|
|
||||||
|
{'atom': 'CD', 'p': 'CG', 'gp': 'CB', 'ggp': 'CA', 'b': 1.52, 'theta': 109.5, 'chi': 'chi2'},
|
||||||
|
{'atom': 'HD2', 'p': 'CD', 'gp': 'CG', 'ggp': 'CB', 'b': 1.09, 'theta': 109.5, 'chi': 'chi3_plus_120'},
|
||||||
|
{'atom': 'HD3', 'p': 'CD', 'gp': 'CG', 'ggp': 'CB', 'b': 1.09, 'theta': 109.5, 'chi': 'chi3_plus_240'},
|
||||||
|
|
||||||
|
{'atom': 'NE', 'p': 'CD', 'gp': 'CG', 'ggp': 'CB', 'b': 1.46, 'theta': 110.0, 'chi': 'chi3'},
|
||||||
|
{'atom': 'HE', 'p': 'NE', 'gp': 'CD', 'ggp': 'CG', 'b': 1.01, 'theta': 118.0, 'chi': 180.0}, # Approx
|
||||||
|
|
||||||
|
{'atom': 'CZ', 'p': 'NE', 'gp': 'CD', 'ggp': 'CG', 'b': 1.33, 'theta': 120.0, 'chi': 'chi4'},
|
||||||
|
{'atom': 'NH1', 'p': 'CZ', 'gp': 'NE', 'ggp': 'CD', 'b': 1.33, 'theta': 120.0, 'chi': 'chi5'},
|
||||||
|
{'atom': 'HH11','p': 'NH1','gp': 'CZ', 'ggp': 'NE', 'b': 1.01, 'theta': 120.0, 'chi': 0.0},
|
||||||
|
{'atom': 'HH12','p': 'NH1','gp': 'CZ', 'ggp': 'NE', 'b': 1.01, 'theta': 120.0, 'chi': 180.0},
|
||||||
|
|
||||||
|
{'atom': 'NH2', 'p': 'CZ', 'gp': 'NE', 'ggp': 'CD', 'b': 1.33, 'theta': 120.0, 'chi': 'chi5_plus_180'},
|
||||||
|
{'atom': 'HH21','p': 'NH2','gp': 'CZ', 'ggp': 'NE', 'b': 1.01, 'theta': 120.0, 'chi': 0.0},
|
||||||
|
{'atom': 'HH22','p': 'NH2','gp': 'CZ', 'ggp': 'NE', 'b': 1.01, 'theta': 120.0, 'chi': 180.0},
|
||||||
|
],
|
||||||
|
|
||||||
|
'HIS': [ # Neutral Epsilon Tautomer (H on NE2)
|
||||||
|
{'atom': 'HA', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.09, 'theta': 109.5, 'chi': -120.0},
|
||||||
|
{'atom': 'CB', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.52, 'theta': 109.5, 'chi': 122.5},
|
||||||
|
{'atom': 'HB2', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 'chi1_plus_120'},
|
||||||
|
{'atom': 'HB3', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 'chi1_plus_240'},
|
||||||
|
|
||||||
|
{'atom': 'CG', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.50, 'theta': 114.0, 'chi': 'chi1'},
|
||||||
|
{'atom': 'ND1', 'p': 'CG', 'gp': 'CB', 'ggp': 'CA', 'b': 1.38, 'theta': 122.0, 'chi': 'chi2'},
|
||||||
|
{'atom': 'CD2', 'p': 'CG', 'gp': 'CB', 'ggp': 'CA', 'b': 1.36, 'theta': 122.0, 'chi': 'chi2_plus_180'},
|
||||||
|
{'atom': 'HD2', 'p': 'CD2','gp': 'CG', 'ggp': 'CB', 'b': 1.08, 'theta': 125.0, 'chi': 180.0},
|
||||||
|
|
||||||
|
{'atom': 'CE1', 'p': 'ND1','gp': 'CG', 'ggp': 'CB', 'b': 1.33, 'theta': 108.0, 'chi': 180.0},
|
||||||
|
{'atom': 'HE1', 'p': 'CE1','gp': 'ND1','ggp': 'CG', 'b': 1.08, 'theta': 125.0, 'chi': 180.0},
|
||||||
|
|
||||||
|
{'atom': 'NE2', 'p': 'CD2','gp': 'CG', 'ggp': 'CB', 'b': 1.37, 'theta': 108.0, 'chi': 0.0},
|
||||||
|
{'atom': 'HE2', 'p': 'NE2','gp': 'CD2','ggp': 'CG', 'b': 1.01, 'theta': 125.0, 'chi': 180.0},
|
||||||
|
],
|
||||||
|
|
||||||
|
'PRO': [
|
||||||
|
{'atom': 'HA', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.09, 'theta': 109.5, 'chi': -120.0},
|
||||||
|
{'atom': 'CB', 'p': 'CA', 'gp': 'N', 'ggp': 'C_prev', 'b': 1.52, 'theta': 103.0, 'chi': 115.0},
|
||||||
|
{'atom': 'HB2', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 'chi1_plus_120'},
|
||||||
|
{'atom': 'HB3', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.09, 'theta': 109.5, 'chi': 'chi1_plus_240'},
|
||||||
|
|
||||||
|
{'atom': 'CG', 'p': 'CB', 'gp': 'CA', 'ggp': 'N', 'b': 1.50, 'theta': 104.0, 'chi': 'chi1'},
|
||||||
|
{'atom': 'HG2', 'p': 'CG', 'gp': 'CB', 'ggp': 'CA', 'b': 1.09, 'theta': 109.5, 'chi': 'chi2_plus_120'},
|
||||||
|
{'atom': 'HG3', 'p': 'CG', 'gp': 'CB', 'ggp': 'CA', 'b': 1.09, 'theta': 109.5, 'chi': 'chi2_plus_240'},
|
||||||
|
|
||||||
|
{'atom': 'CD', 'p': 'CG', 'gp': 'CB', 'ggp': 'CA', 'b': 1.52, 'theta': 105.0, 'chi': 'chi2'},
|
||||||
|
{'atom': 'HD2', 'p': 'CD', 'gp': 'CG', 'ggp': 'CB', 'b': 1.09, 'theta': 109.5, 'chi': 'chi2_plus_120'},
|
||||||
|
{'atom': 'HD3', 'p': 'CD', 'gp': 'CG', 'ggp': 'CB', 'b': 1.09, 'theta': 109.5, 'chi': 'chi2_plus_240'},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
# ==========================================
|
||||||
|
# 2. UPDATED PYTHON FUNCTIONS
|
||||||
|
# ==========================================
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
def sample_chi_angle(chi_name):
|
||||||
|
"""
|
||||||
|
Samples a sidechain torsion angle (chi).
|
||||||
|
Handles specialized sampling for Hydroxyls/Amines.
|
||||||
|
"""
|
||||||
|
# Basic checks for branching
|
||||||
|
if 'plus_' in chi_name:
|
||||||
|
pass # Handled by offset logic in main loop
|
||||||
|
|
||||||
|
# Specific Sampling Distributions
|
||||||
|
if 'chi_hydroxyl' in chi_name: # Ser, Thr, Tyr
|
||||||
|
# OH groups can rotate, but prefer staggered
|
||||||
|
return np.random.choice([60.0, 180.0, -60.0])
|
||||||
|
|
||||||
|
return np.random.choice([60.0, 180.0, -60.0])
|
||||||
|
|
||||||
|
|
||||||
|
def build_full_atom_chain(backbone_chain, topology_dict, nerf_func, chi_sampler):
|
||||||
|
"""
|
||||||
|
Attaches sidechain atoms AND Backone H to a pre-generated backbone.
|
||||||
|
"""
|
||||||
|
full_chain = backbone_chain
|
||||||
|
|
||||||
|
for i, res in enumerate(full_chain):
|
||||||
|
res_name = res['name']
|
||||||
|
|
||||||
|
# --- 1. Add Backbone Amide H (The 'H' Atom) ---
|
||||||
|
# Logic: Not on Proline. Bisects the C-N-CA angle.
|
||||||
|
# Only if not N-term (N-term handled separately as NH3)
|
||||||
|
if i > 0 and res_name != 'PRO':
|
||||||
|
# Ref: N, C_prev, CA_prev.
|
||||||
|
# Bond 1.01, Angle 119 (approx bisect), Torsion 180 (planar)
|
||||||
|
h_coord = nerf_func(
|
||||||
|
res['N'], # A (New atom connects here)
|
||||||
|
full_chain[i-1]['C'], # B
|
||||||
|
full_chain[i-1]['CA'], # C
|
||||||
|
bond_length=1.01,
|
||||||
|
bond_angle_deg=119.0,
|
||||||
|
torsion_angle_deg=180.0
|
||||||
|
)
|
||||||
|
if 'sidechain' not in res: res['sidechain'] = []
|
||||||
|
res['sidechain'].append({'name': 'H', 'coord': h_coord})
|
||||||
|
|
||||||
|
|
||||||
|
# --- 2. Initialize Local Coords ---
|
||||||
|
template = topology_dict.get(res_name, [])
|
||||||
|
|
||||||
|
local_atom_coords = {
|
||||||
|
'N': res['N'],
|
||||||
|
'CA': res['CA'],
|
||||||
|
'C': res['C'],
|
||||||
|
}
|
||||||
|
|
||||||
|
# Handle Previous Residue Reference
|
||||||
|
if i == 0:
|
||||||
|
# Ghost atom for N-terminus
|
||||||
|
ghost_c = np.array([-0.7, 1.2, 0.0])
|
||||||
|
local_atom_coords['C_prev'] = ghost_c
|
||||||
|
else:
|
||||||
|
local_atom_coords['C_prev'] = full_chain[i-1]['C']
|
||||||
|
|
||||||
|
# Sampled chi dictionary to maintain branch consistency
|
||||||
|
sampled_chi_values = {}
|
||||||
|
|
||||||
|
if 'sidechain' not in res: res['sidechain'] = []
|
||||||
|
|
||||||
|
# --- 3. Sidechain Loop (Includes HA, HB, etc.) ---
|
||||||
|
for atom_def in template:
|
||||||
|
atom_name = atom_def['atom']
|
||||||
|
|
||||||
|
# Resolve references
|
||||||
|
if atom_name in ['CB', 'HA', 'HA2', 'HA3']:
|
||||||
|
# These connect to CA but reference C_prev for chirality/geometry
|
||||||
|
ggp_coord = local_atom_coords['C_prev']
|
||||||
|
else:
|
||||||
|
ggp_coord = local_atom_coords[atom_def['ggp']]
|
||||||
|
|
||||||
|
gp_coord = local_atom_coords[atom_def['gp']]
|
||||||
|
p_coord = local_atom_coords[atom_def['p']]
|
||||||
|
|
||||||
|
# Resolve Torsion
|
||||||
|
chi_param = atom_def['chi']
|
||||||
|
torsion = 0.0
|
||||||
|
|
||||||
|
if isinstance(chi_param, float):
|
||||||
|
torsion = chi_param
|
||||||
|
elif isinstance(chi_param, str):
|
||||||
|
base_chi_name = chi_param.split('_plus')[0]
|
||||||
|
|
||||||
|
if base_chi_name in sampled_chi_values:
|
||||||
|
base_torsion = sampled_chi_values[base_chi_name]
|
||||||
|
else:
|
||||||
|
base_torsion = chi_sampler(base_chi_name)
|
||||||
|
sampled_chi_values[base_chi_name] = base_torsion
|
||||||
|
|
||||||
|
# Apply geometric offsets
|
||||||
|
if 'plus_120' in chi_param:
|
||||||
|
torsion = base_torsion + 120.0
|
||||||
|
elif 'plus_240' in chi_param or 'minus_120' in chi_param:
|
||||||
|
torsion = base_torsion + 240.0 # Equivalent to -120
|
||||||
|
elif 'plus_180' in chi_param:
|
||||||
|
torsion = base_torsion + 180.0
|
||||||
|
else:
|
||||||
|
torsion = base_torsion
|
||||||
|
|
||||||
|
# Place Atom
|
||||||
|
new_atom_coord = nerf_func(
|
||||||
|
ggp_coord, gp_coord, p_coord,
|
||||||
|
bond_length=atom_def['b'],
|
||||||
|
bond_angle_deg=atom_def['theta'],
|
||||||
|
torsion_angle_deg=torsion
|
||||||
|
)
|
||||||
|
|
||||||
|
# Store
|
||||||
|
local_atom_coords[atom_name] = new_atom_coord
|
||||||
|
res['sidechain'].append({'name': atom_name, 'coord': new_atom_coord})
|
||||||
|
|
||||||
|
# --- 4. Termini Handling (Electrostatics are critical here) ---
|
||||||
|
|
||||||
|
# N-Terminus: Protonate Nitrogen (Needs H1, H2, H3)
|
||||||
|
if i == 0:
|
||||||
|
# Simple tetrahedral geometry relative to CA-N bond
|
||||||
|
# We manually place 3 H's roughly staggered
|
||||||
|
for k, offset in enumerate([60, 180, 300]):
|
||||||
|
h_term = nerf_func(
|
||||||
|
res['C'], res['CA'], res['N'], # Note reversed order for N-term projection
|
||||||
|
bond_length=1.01, bond_angle_deg=109.5, torsion_angle_deg=offset
|
||||||
|
)
|
||||||
|
res['sidechain'].append({'name': f'H{k+1}', 'coord': h_term})
|
||||||
|
|
||||||
|
# C-Terminus: Add OXT (Terminal Oxygen)
|
||||||
|
if i == len(full_chain) - 1:
|
||||||
|
# Bisecting angle of N-CA-C
|
||||||
|
oxt = nerf_func(
|
||||||
|
res['N'], res['CA'], res['C'],
|
||||||
|
bond_length=1.25, bond_angle_deg=117.0, torsion_angle_deg=180.0
|
||||||
|
)
|
||||||
|
res['sidechain'].append({'name': 'OXT', 'coord': oxt})
|
||||||
|
|
||||||
|
return full_chain
|
||||||
|
|
||||||
|
# --- Main Execution (Partial) ---
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# --- Assuming we have all functions and data defined above ---
|
||||||
|
# 1. Setup
|
||||||
|
sampler = RamachandranSampler()
|
||||||
|
|
||||||
|
# 2. Define a test sequence
|
||||||
|
test_seq = ['MET', 'ALA', 'GLY', 'LYS', 'PRO', 'LEU', 'VAL', 'PHE']
|
||||||
|
|
||||||
|
# 3. Generate Backbone
|
||||||
|
print("Generating backbone...")
|
||||||
|
backbone = generate_backbone_chain(test_seq, place_atom_nerf, sampler)
|
||||||
|
|
||||||
|
# 4. Build Sidechains
|
||||||
|
print("Building sidechains...")
|
||||||
|
full_structure = build_full_atom_chain(
|
||||||
|
backbone,
|
||||||
|
SIDECHAIN_TOPOLOGY,
|
||||||
|
place_atom_nerf,
|
||||||
|
sample_chi_angle
|
||||||
|
)
|
||||||
|
|
||||||
|
print("\n--- Structure Generation Complete ---")
|
||||||
|
|
||||||
|
# --- Verification ---
|
||||||
|
print(f"\nVerifying {full_structure[5]['name']} (LEU):") # Leucine is index 5
|
||||||
|
for atom in full_structure[5]['sidechain']:
|
||||||
|
print(f" {atom['name']}: {np.round(atom['coord'], 2)}")
|
||||||
|
|
||||||
|
print(f"\nVerifying {full_structure[6]['name']} (VAL):") # Valine is index 6
|
||||||
|
for atom in full_structure[6]['sidechain']:
|
||||||
|
print(f" {atom['name']}: {np.round(atom['coord'], 2)}")
|
||||||
|
|
||||||
|
print(f"\nVerifying {full_structure[7]['name']} (PHE):") # Phenylalanine is index 7
|
||||||
|
for atom in full_structure[7]['sidechain']:
|
||||||
|
print(f" {atom['name']}: {np.round(atom['coord'], 2)}")
|
||||||
|
|
||||||
|
print(f"\nVerifying {full_structure[2]['name']} (GLY):") # Glycine is index 2
|
||||||
|
print(f" Sidechain atoms: {len(full_structure[2]['sidechain'])}")
|
||||||
|
|
@ -2,9 +2,9 @@ from dunbrack import DunbrackRotamerLibrary
|
||||||
|
|
||||||
rl = DunbrackRotamerLibrary()
|
rl = DunbrackRotamerLibrary()
|
||||||
|
|
||||||
res_name = "LYS"
|
res_name = "PHE"
|
||||||
|
|
||||||
print(f"Rotamers for {res_name}")
|
print(f"Rotamers for {res_name}")
|
||||||
|
|
||||||
for rotamer in rl.rotamer_params(res_name, 100, 100):
|
for rotamer in rl.rotamer_params(res_name, 100, 100):
|
||||||
print(rotamer.p, rotamer.chis)
|
print(rotamer.p, rotamer.chis)
|
||||||
Loading…
Reference in New Issue
Block a user