Source code for pyqmmm.qm.residue_decomposition

"""Extract and compare energies from a residue decomposition analysis"""

import os
import re
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import AutoMinorLocator

[docs]def natural_sort(s):
    """
    Splits the input string on digits and returns a list with 
    integers for digit components and lower-case strings for others.

    Parameters
    ----------
    s : str
        Input string to be split.

    Returns
    -------
    list
        List of integers and lower-case strings.

    """
    return [int(text) if text.isdigit() else text.lower()
            for text in re.split(r'(\d+)', s)]


[docs]def get_energy(filename):
    """
    Extracts the final energy from the input file.

    Parameters
    ----------
    filename : str
        Path to the file.

    Returns
    -------
    float
        Final energy found in the file, or None if not found.

    """
    final_energy = None
    with open(filename, 'r') as file:
        for line in file:
            if "FINAL ENERGY:" in line:
                final_energy = float(line.split()[2])
    return final_energy


[docs]def process_directory(path, ignore):
    """
    Processes directories, finding 'qmscript.out' files and retrieving energies.

    Parameters
    ----------
    path : str
        Directory to process.
    ignore : list
        List of directory names to ignore.

    Returns
    -------
    dict
        Dictionary of {dir_name: energy} pairs.
    
    """
    energies = {}
    paths_to_process = []
    for root, dirs, files in os.walk(path):
        if 'qmscript.out' in files and os.path.basename(root) not in ignore:
            paths_to_process.append(root)
            
    for path in paths_to_process:
        dir_name = os.path.basename(path)
        energy = get_energy(os.path.join(path, 'qmscript.out'))
        if energy:
            energies[dir_name] = energy
    return energies


[docs]def plot_data(df):
    """
    Plots data from the DataFrame.

    Parameters
    ----------
    df : pandas.DataFrame
        DataFrame containing the data to plot.

    """
    color_codes = [
        "#FF5733",  # Bright Red Orange
        "#8AD7A5",  # Soft Mint Green
        "#FFD700",  # Golden Yellow
        "#6B2C91",  # Deep Violet
        "#4A8F79",  # Vintage Teal
        "#F08080",  # Light Coral
        "#50B4B7",  # Aqua Marine
        "#A9A9A9",  # Dark Gray
        "#6D9EEB",  # Dodger Blue
        "#C27BA0",  # Antique Fuchsia
        "#1D2951",  # Oxford Blue
        "#F4D03F",  # Naples Yellow
        "#64C4ED",  # Baby Blue Eyes
        "#E67F83",  # English Vermillion
        "#D99058",  # Lion
        "#228B22",  # Forest Green
        "#FF69B4",  # Bright Pink
        "#32CD32",  # Lime Green
        "#4169E1",  # Royal Blue
        "#DC143C",  # Crimson
        "#D2691E",  # Chocolate Brown
    ]
    fig, ax = plt.subplots()
    ax.axhline(0, color='silver', linewidth=1.5, zorder=1)  # Increased linewidth to 1.5

    for i, (index, row) in enumerate(df.iterrows()):
        legend_label = index.split('_')[1] if '_' in index else index
        if legend_label == 'all':
            ax.plot(row.index, row, label=legend_label, color='black', linewidth=2, zorder=3)  # Draw 0_all trace as bold black line
        else:
            ax.plot(row.index, row, label=legend_label, color=color_codes[i % len(color_codes)], zorder=2)
            
    # Position the legend outside the plot on the right-hand side
    ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    ax.set_xlabel('pathway intermediates', weight="bold")
    ax.set_ylabel('relative energy (kcal/mol)', weight="bold")
    plt.xticks(rotation=90)
    ax.yaxis.set_minor_locator(AutoMinorLocator(2))  # This will add 1 minor tick between each pair of major ticks
    plt.tight_layout()
    plt.savefig('energies_plot.svg', bbox_inches='tight')
    plt.savefig('energies_plot.png', dpi=300, bbox_inches='tight')

[docs]def format_plot() -> None:
    """
    General plotting parameters for the Kulik Lab.

    """
    font = {"family": "sans-serif", "weight": "bold", "size": 10}
    plt.rc("font", **font)
    plt.rcParams["ytick.minor.size"] = 4
    plt.rcParams["ytick.minor.width"] = 2
    plt.rcParams["xtick.major.pad"] = 5
    plt.rcParams["ytick.major.pad"] = 5
    plt.rcParams["axes.linewidth"] = 2
    plt.rcParams["xtick.major.size"] = 7
    plt.rcParams["xtick.major.width"] = 2
    plt.rcParams["ytick.major.size"] = 7
    plt.rcParams["ytick.major.width"] = 2
    plt.rcParams["xtick.direction"] = "in"
    plt.rcParams["ytick.direction"] = "in"
    plt.rcParams["xtick.top"] = True
    plt.rcParams["ytick.right"] = True
    plt.rcParams["svg.fonttype"] = "none"

[docs]def residue_decomposition():
    """
    Performs the residue decomposition analysis, reading in energy 
    data from files, processing, plotting and saving the results.
    
    """
    ignore = ['0_prep']
    kcal_per_hartree = 627.509  # Replace with the correct conversion factor

    base_path = './'  # Replace with the correct path to the base directory
    data = {}
    for folder_name in sorted(os.listdir(base_path), key=natural_sort):
        if folder_name in ignore:
            continue
        folder_path = os.path.join(base_path, folder_name)
        if os.path.isdir(folder_path):
            data[folder_name] = process_directory(folder_path, ignore)

    df = pd.DataFrame(data)
    df = df.applymap(lambda x: x * kcal_per_hartree if pd.notnull(x) else x)
    
    # Make energies relative to the first energy in each row
    df = df.subtract(df.iloc[:, 0], axis=0)

    # Remove prefix from column names for x-axis labels
    df.columns = [name.split('_', 1)[1] if '_' in name else name for name in df.columns]
    df.columns = ['${\mathrm{TS}}_{\mathrm{' + name[:-3] + '}}$' if name.endswith('_TS') else name for name in df.columns]

    format_plot()
    plot_data(df)
    df.to_csv('energies.csv')

if __name__ == '__main__':
    residue_decomposition()