import numpy as np import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from backbone import generate_backbone_chain from nerf import place_atom_nerf from ramachandran import RamachandranSampler from sidechain import SIDECHAIN_TOPOLOGY, build_full_atom_chain, sample_chi_angle def add_backbone_oxygens(chain, nerf_func): """ Adds the Carbonyl Oxygen (O) to every residue. The C=O bond bisects the N-C-CA angle or is planar to the peptide bond. """ for i, res in enumerate(chain): # We place O relative to C, CA, and N. # Ref: C (parent), CA (grand), N (great-grand) # Bond: 1.23 A (Double bond) # Angle: ~121 degrees # Torsion: 180 (Planar, trans to N-CA bond) o_coord = nerf_func( res['C'], res['CA'], res['N'], bond_length=1.23, bond_angle_deg=121.0, torsion_angle_deg=180.0 ) # We store it in sidechain list for convenience in plotting/writing if 'sidechain' not in res: res['sidechain'] = [] # Insert at the beginning of sidechain list so it appears after C in PDB res['sidechain'].insert(0, {'name': 'O', 'coord': o_coord}) return chain def format_atom_name(name): """ PDB atom names must be 4 chars. - 4-char names (HD21) occupy cols 13-16. - <4 char names (CA, N, O) start at col 14. """ if len(name) == 4: return name else: return f" {name:<3}" # " CA " def save_to_pdb(chain, filename="unfolded_structure.pdb"): """ Writes the chain to a standard PDB format file. """ atom_serial = 1 with open(filename, 'w') as f: f.write("REMARK 1 GENERATED BY UNFOLDED POLYPEPTIDE GENERATOR\n") for i, res in enumerate(chain): res_name = res['name'] chain_id = 'A' res_seq = i + 1 # 1. Write Backbone Atoms in Order: N, CA, C bb_order = ['N', 'CA', 'C'] # Combine backbone and sidechain into one list for writing # (Backbone first, then sidechain) all_atoms = [] for atom_key in bb_order: all_atoms.append({'name': atom_key, 'coord': res[atom_key]}) # Add sidechain atoms (including O, H, etc.) # Note: O is already inserted into 'sidechain' by our helper if 'sidechain' in res: all_atoms.extend(res['sidechain']) for atom in all_atoms: name = atom['name'] x, y, z = atom['coord'] # Determine Element (First letter of name usually) element = name[0] # PDB fixed-width format # ATOM (1-6) # Serial (7-11) # Name (13-16) # ResName (18-20) # Chain (22) # SeqNum (23-26) # X, Y, Z (31-54) # Occ (55-60), Temp (61-66) # Element (77-78) pdb_line = ( f"ATOM {atom_serial:>5} {format_atom_name(name):<4} {res_name:>3} " f"{chain_id:>1}{res_seq:>4} " f"{x:>8.3f}{y:>8.3f}{z:>8.3f}" f"{1.00:>6.2f}{0.00:>6.2f} {element:>2}\n" ) f.write(pdb_line) atom_serial += 1 f.write("END\n") print(f"Successfully saved PDB to: {filename}") def visualize_structure(chain, mode='backbone'): """ Visualizes the protein structure. Modes: 'ca_trace', 'backbone', 'all_atom' """ fig = plt.figure(figsize=(12, 10)) ax = fig.add_subplot(111, projection='3d') # Extract CA coords for the main trace ca_x = [res['CA'][0] for res in chain] ca_y = [res['CA'][1] for res in chain] ca_z = [res['CA'][2] for res in chain] # Plot the main chain line ax.plot(ca_x, ca_y, ca_z, color='grey', linewidth=2, label='Backbone Trace') if mode == 'ca_trace': ax.scatter(ca_x, ca_y, ca_z, s=50, c='blue', label='CA') elif mode == 'backbone': # Show N, CA, C for res in chain: ax.scatter(*res['N'], c='blue', s=30) ax.scatter(*res['CA'], c='green', s=30) ax.scatter(*res['C'], c='red', s=30) elif mode == 'all_atom': # Plot everything including sidechains for res in chain: # Backbone ax.scatter(*res['N'], c='blue', s=20, alpha=0.6) ax.scatter(*res['CA'], c='green', s=20, alpha=0.6) ax.scatter(*res['C'], c='red', s=20, alpha=0.6) if 'sidechain' in res: for atom in res['sidechain']: coord = atom['coord'] name = atom['name'] color = 'orange' size = 10 if name.startswith('H'): color = 'lightgrey' size = 5 elif name.startswith('O'): color = 'red' size = 20 elif name.startswith('N'): color = 'blue' size = 20 elif name.startswith('S'): color = 'yellow' size = 30 ax.scatter(coord[0], coord[1], coord[2], c=color, s=size) # Aesthetics ax.set_xlabel('X ($\AA$)') ax.set_ylabel('Y ($\AA$)') ax.set_zlabel('Z ($\AA$)') ax.set_title(f'Generated Structure ({len(chain)} residues) - Mode: {mode}') plt.legend() plt.show() if __name__ == "__main__": # --- 1. INITIALIZATION --- # Import your previous classes/functions here # from generator import generate_backbone_chain, build_full_atom_chain, ... # For now, assuming they are in the same memory space sampler = RamachandranSampler() # --- 2. DEFINE SEQUENCE --- # A nice peptide with diverse chemistry for electrostatic testing # (Charged, Polar, Hydrophobic, Glycine, Proline) sequence = ['MET', 'LYS', 'ASP', 'GLY', 'PHE', 'ARG', 'GLU', 'VAL', 'HIS', 'ALA']# * 1000 print(f"Generating unfolded structure for: {'-'.join(sequence)}") # --- 3. GENERATE BACKBONE --- # (Generates N, CA, C) backbone, total_backtracks = generate_backbone_chain(sequence, place_atom_nerf, sampler, verbose=True) # --- 4. BUILD SIDECHAINS & PROTONS --- # (Generates Sidechains, HA, HB.., NH3+ term, COO- term) full_struct = build_full_atom_chain(backbone, SIDECHAIN_TOPOLOGY, place_atom_nerf, sample_chi_angle) # --- 5. ADD CARBONYL OXYGENS --- # (Generates O on the backbone C) full_struct = add_backbone_oxygens(full_struct, place_atom_nerf) # --- 6. EXPORT TO PDB --- save_to_pdb(full_struct, filename="unfolded_protein.pdb") # --- 7. VISUALIZE --- # 'all_atom' shows the hydrogens and sidechains #visualize_structure(full_struct, mode='all_atom')