primacito/src/legacy/export.py

import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

from backbone import generate_backbone_chain
from nerf import place_atom_nerf
from ramachandran import RamachandranSampler
from sidechain import SIDECHAIN_TOPOLOGY, build_full_atom_chain, sample_chi_angle

def add_backbone_oxygens(chain, nerf_func):
    """
    Adds the Carbonyl Oxygen (O) to every residue.
    The C=O bond bisects the N-C-CA angle or is planar to the peptide bond.
    """
    for i, res in enumerate(chain):
        # We place O relative to C, CA, and N.
        # Ref: C (parent), CA (grand), N (great-grand)
        # Bond: 1.23 A (Double bond)
        # Angle: ~121 degrees
        # Torsion: 180 (Planar, trans to N-CA bond)

        o_coord = nerf_func(
            res['C'], res['CA'], res['N'],
            bond_length=1.23,
            bond_angle_deg=121.0,
            torsion_angle_deg=180.0
        )

        # We store it in sidechain list for convenience in plotting/writing
        if 'sidechain' not in res:
            res['sidechain'] = []

        # Insert at the beginning of sidechain list so it appears after C in PDB
        res['sidechain'].insert(0, {'name': 'O', 'coord': o_coord})

    return chain

def format_atom_name(name):
    """
    PDB atom names must be 4 chars.
    - 4-char names (HD21) occupy cols 13-16.
    - <4 char names (CA, N, O) start at col 14.
    """
    if len(name) == 4:
        return name
    else:
        return f" {name:<3}" # " CA "

def save_to_pdb(chain, filename="unfolded_structure.pdb"):
    """
    Writes the chain to a standard PDB format file.
    """
    atom_serial = 1

    with open(filename, 'w') as f:
        f.write("REMARK   1 GENERATED BY UNFOLDED POLYPEPTIDE GENERATOR\n")

        for i, res in enumerate(chain):
            res_name = res['name']
            chain_id = 'A'
            res_seq = i + 1

            # 1. Write Backbone Atoms in Order: N, CA, C
            bb_order = ['N', 'CA', 'C']

            # Combine backbone and sidechain into one list for writing
            # (Backbone first, then sidechain)
            all_atoms = []
            for atom_key in bb_order:
                all_atoms.append({'name': atom_key, 'coord': res[atom_key]})

            # Add sidechain atoms (including O, H, etc.)
            # Note: O is already inserted into 'sidechain' by our helper
            if 'sidechain' in res:
                all_atoms.extend(res['sidechain'])

            for atom in all_atoms:
                name = atom['name']
                x, y, z = atom['coord']

                # Determine Element (First letter of name usually)
                element = name[0]

                # PDB fixed-width format
                # ATOM  (1-6)
                # Serial (7-11)
                # Name (13-16)
                # ResName (18-20)
                # Chain (22)
                # SeqNum (23-26)
                # X, Y, Z (31-54)
                # Occ (55-60), Temp (61-66)
                # Element (77-78)

                pdb_line = (
                    f"ATOM  {atom_serial:>5} {format_atom_name(name):<4} {res_name:>3} "
                    f"{chain_id:>1}{res_seq:>4}    "
                    f"{x:>8.3f}{y:>8.3f}{z:>8.3f}"
                    f"{1.00:>6.2f}{0.00:>6.2f}          {element:>2}\n"
                )

                f.write(pdb_line)
                atom_serial += 1

        f.write("END\n")
    print(f"Successfully saved PDB to: {filename}")

def visualize_structure(chain, mode='backbone'):
    """
    Visualizes the protein structure.
    Modes: 'ca_trace', 'backbone', 'all_atom'
    """
    fig = plt.figure(figsize=(12, 10))
    ax = fig.add_subplot(111, projection='3d')

    # Extract CA coords for the main trace
    ca_x = [res['CA'][0] for res in chain]
    ca_y = [res['CA'][1] for res in chain]
    ca_z = [res['CA'][2] for res in chain]

    # Plot the main chain line
    ax.plot(ca_x, ca_y, ca_z, color='grey', linewidth=2, label='Backbone Trace')

    if mode == 'ca_trace':
        ax.scatter(ca_x, ca_y, ca_z, s=50, c='blue', label='CA')

    elif mode == 'backbone':
        # Show N, CA, C
        for res in chain:
            ax.scatter(*res['N'], c='blue', s=30)
            ax.scatter(*res['CA'], c='green', s=30)
            ax.scatter(*res['C'], c='red', s=30)

    elif mode == 'all_atom':
        # Plot everything including sidechains
        for res in chain:
            # Backbone
            ax.scatter(*res['N'], c='blue', s=20, alpha=0.6)
            ax.scatter(*res['CA'], c='green', s=20, alpha=0.6)
            ax.scatter(*res['C'], c='red', s=20, alpha=0.6)

            if 'sidechain' in res:
                for atom in res['sidechain']:
                    coord = atom['coord']
                    name = atom['name']

                    color = 'orange'
                    size = 10

                    if name.startswith('H'):
                        color = 'lightgrey'
                        size = 5
                    elif name.startswith('O'):
                        color = 'red'
                        size = 20
                    elif name.startswith('N'):
                        color = 'blue'
                        size = 20
                    elif name.startswith('S'):
                        color = 'yellow'
                        size = 30

                    ax.scatter(coord[0], coord[1], coord[2], c=color, s=size)

    # Aesthetics
    ax.set_xlabel('X ($\AA$)')
    ax.set_ylabel('Y ($\AA$)')
    ax.set_zlabel('Z ($\AA$)')
    ax.set_title(f'Generated Structure ({len(chain)} residues) - Mode: {mode}')
    plt.legend()
    plt.show()

if __name__ == "__main__":
    # --- 1. INITIALIZATION ---
    # Import your previous classes/functions here
    # from generator import generate_backbone_chain, build_full_atom_chain, ...
    # For now, assuming they are in the same memory space

    sampler = RamachandranSampler()

    # --- 2. DEFINE SEQUENCE ---
    # A nice peptide with diverse chemistry for electrostatic testing
    # (Charged, Polar, Hydrophobic, Glycine, Proline)
    sequence = ['MET', 'LYS', 'ASP', 'GLY', 'PHE', 'ARG', 'GLU', 'VAL', 'HIS', 'ALA']# * 1000

    print(f"Generating unfolded structure for: {'-'.join(sequence)}")

    # --- 3. GENERATE BACKBONE ---
    # (Generates N, CA, C)
    backbone, total_backtracks = generate_backbone_chain(sequence, place_atom_nerf, sampler, verbose=True)

    # --- 4. BUILD SIDECHAINS & PROTONS ---
    # (Generates Sidechains, HA, HB.., NH3+ term, COO- term)
    full_struct = build_full_atom_chain(backbone, SIDECHAIN_TOPOLOGY, place_atom_nerf, sample_chi_angle)

    # --- 5. ADD CARBONYL OXYGENS ---
    # (Generates O on the backbone C)
    full_struct = add_backbone_oxygens(full_struct, place_atom_nerf)

    # --- 6. EXPORT TO PDB ---
    save_to_pdb(full_struct, filename="unfolded_protein.pdb")

    # --- 7. VISUALIZE ---
    # 'all_atom' shows the hydrogens and sidechains
    #visualize_structure(full_struct, mode='all_atom')