Source code for pyqmmm.qm.eda_analyze

import os
import re
import csv
import matplotlib.pyplot as plt

# Conversion factor
kj_to_kcal = 0.239006


[docs]def extract_energies(file_path): energies = {} with open(file_path, "r") as f: lines = f.readlines() # Extract energies from the "Decomposition of frozen interaction energy" section for idx, line in enumerate(lines): if "Decomposition of frozen interaction energy" in line: try: energies["Electrostatics"] = ( float( re.search( r"E_cls_elec\(solv\)\s+\(kJ/mol\) = ([-+]?[\d.]+)", lines[idx + 11], ).group(1) ) * kj_to_kcal ) except AttributeError: print( f"Couldn't find Electrostatics energy in {file_path}. Line content: {lines[idx+11]}" ) energies["Electrostatics"] = 0 try: energies["Repulsion"] = ( float( re.search( r"E_mod_pauli\s+\(MOD PAULI\) \(kJ/mol\) = ([-+]?[\d.]+)", lines[idx + 6], ).group(1) ) * kj_to_kcal ) except AttributeError: print( f"Couldn't find Repulsion energy in {file_path}. Line content: {lines[idx+6]}" ) energies["Repulsion"] = 0 try: energies["Dispersion"] = ( float( re.search( r"E_cls_disp\s+\(CLS DISP\)\s+\(kJ/mol\) = ([-+]?[\d.]+)", lines[idx + 7], ).group(1) ) * kj_to_kcal ) except AttributeError: print( f"Couldn't find Dispersion energy in {file_path}. Line content: {lines[idx+7]}" ) energies["Dispersion"] = 0 if "Simplified EDA Summary (kJ/mol)" in line: try: energies["SOLVATION"] = ( float( re.search( r"SOLVATION\s+([-+]?[\d.]+)", lines[idx + 3] ).group(1) ) * kj_to_kcal ) except AttributeError: print( f"Couldn't find SOLVATION energy in {file_path}. Line content: {lines[idx+3]}" ) energies["SOLVATION"] = 0 try: energies["POLARIZATION"] = ( float( re.search( r"POLARIZATION\s+([-+]?[\d.]+)", lines[idx + 6] ).group(1) ) * kj_to_kcal ) except AttributeError: print( f"Couldn't find POLARIZATION energy in {file_path}. Line content: {lines[idx+6]}" ) energies["POLARIZATION"] = 0 try: energies["CHARGE TRANSFER"] = ( float( re.search( r"CHARGE TRANSFER\s+([-+]?[\d.]+)", lines[idx + 7] ).group(1) ) * kj_to_kcal ) except AttributeError: print( f"Couldn't find CHARGE TRANSFER energy in {file_path}. Line content: {lines[idx+7]}" ) energies["CHARGE TRANSFER"] = 0 return energies
[docs]def format_plot() -> None: """ General plotting parameters for the Kulik Lab. """ font = {"family": "sans-serif", "weight": "bold", "size": 10} plt.rc("font", **font) plt.rcParams["xtick.major.pad"] = 5 plt.rcParams["ytick.major.pad"] = 5 plt.rcParams["axes.linewidth"] = 2 plt.rcParams["xtick.major.size"] = 7 plt.rcParams["xtick.major.width"] = 2 plt.rcParams["ytick.major.size"] = 7 plt.rcParams["ytick.major.width"] = 2 plt.rcParams["xtick.direction"] = "in" plt.rcParams["ytick.direction"] = "in" plt.rcParams["xtick.top"] = True plt.rcParams["ytick.right"] = True plt.rcParams["svg.fonttype"] = "none"
[docs]def main(): # Ask for residue name from user residue_name = input("What residue is this for (e.g., His12)? ") # Collecting all folder names in the current directory and sorting based on the prefix number folder_names = sorted( [folder for folder in os.listdir() if os.path.isdir(folder)], key=lambda x: int(x.split("_")[0]), ) # Lists to store results intermediates = [] solvation_energies = [] electrostatics_energies = [] repulsion_energies = [] dispersion_energies = [] polarization_energies = [] charge_transfer_energies = [] for folder in folder_names: intermediate_name = folder.split("_")[1] intermediates.append(intermediate_name) # Find the desired .out file in the folder file_name = [ file for file in os.listdir(folder) if "qmscript" in file and file.endswith(".out") ] if not file_name: print(f"No matching .out file found in folder {folder}. Skipping...") continue file_path = os.path.join(folder, file_name[0]) # Extract energies energies = extract_energies(file_path) solvation_energies.append(energies.get("SOLVATION", 0)) electrostatics_energies.append(energies.get("Electrostatics", 0)) repulsion_energies.append(energies.get("Repulsion", 0)) dispersion_energies.append(energies.get("Dispersion", 0)) polarization_energies.append(energies.get("POLARIZATION", 0)) charge_transfer_energies.append(energies.get("CHARGE TRANSFER", 0)) # Plotting format_plot() plt.figure(figsize=(6, 5)) # plt.plot( # intermediates, solvation_energies, marker="o", label="solvation", color="b" # ) plt.plot( intermediates, electrostatics_energies, marker="o", label="electrostatics", color="orange", ) plt.plot( intermediates, repulsion_energies, marker="o", label="repulsion", color="r" ) plt.plot( intermediates, dispersion_energies, marker="o", label="dispersion", color="b" ) plt.plot( intermediates, polarization_energies, marker="o", label="polarization", color="g", ) plt.plot( intermediates, charge_transfer_energies, marker="o", label="charge transfer", color="purple", ) plt.xlabel("intermediates", weight="bold") plt.ylabel(f"{residue_name} energy (kcal/mol)", weight="bold") plt.legend( bbox_to_anchor=(1.04, 0.8), loc="center left", borderaxespad=0, frameon=False ) plt.xticks(rotation=90) # Save figures plt.savefig(f"ALMO-EDA_{residue_name}.png", bbox_inches="tight", dpi=300) plt.savefig(f"ALMO-EDA_{residue_name}.svg", bbox_inches="tight") # Writing to CSV with open("EDA_results.csv", "w", newline="") as csvfile: writer = csv.writer(csvfile) writer.writerow( [ "intermediate", "solvation", "electrostatics", "repulsion", "dispersion", "polarization", "charge transfer", ] ) for i in range(len(intermediates)): writer.writerow( [ intermediates[i], solvation_energies[i], electrostatics_energies[i], repulsion_energies[i], dispersion_energies[i], polarization_energies[i], charge_transfer_energies[i], ] )
if __name__ == "__main__": main()