Source code for pyqmmm.qm.plot_combined_nebs

"""Generalizable script for plotting the PES of an xyz trajectory with NEB boundaries."""

import matplotlib.pyplot as plt
import numpy as np
import csv
import time
import os
import re

HARTREE_TO_KCAL = 627.509

[docs]def get_xyz_files(): """ List all NEB xyz files in the current directory, sorted numerically. """ files = os.listdir('.') xyz_files = [f for f in files if re.match(r'^\d+\.xyz$', f)] xyz_files.sort(key=lambda x: int(re.match(r'(\d+)', x).group())) return xyz_files
[docs]def write_energies_to_csv(total_energies, filename="energy_plot_data.csv"): """ Write the energies to a CSV file. Parameters ---------- total_energies : list List of concatenated energies for all NEBs in kcal/mol. filename : str The name of the output CSV file. """ with open(filename, "w", newline="") as csvfile: writer = csv.writer(csvfile) writer.writerow(["Frame", "Energy (kcal/mol)"]) for i, energy in enumerate(total_energies): writer.writerow([i, energy])
[docs]def parse_energy(line, software): """ Parse the energy from a line based on the software used. Parameters ---------- line : str Line from the file containing energy information. software: str The software used for the calculation. Returns ------- float The energy extracted from the line, in Hartrees. """ if software == "ORCA-MEP" or software == "ORCA-IRC": energy_str = line.split()[5] elif software == "ORCA": energy_str = line.split()[4] elif software == "TeraChem-scan": energy_str = line.split()[4] elif software == "TeraChem-opt": energy_str = line.split()[0] else: raise ValueError(f"Unsupported software: {software}") return float(energy_str)
[docs]def get_trajectory_energies(filename, software): """ Parse the energies from an xyz trajectory file. Parameters ---------- filename : str Path to the trajectory file. software: str Software used for the calculation. Returns ------- tuple First value is a list of absolute energies for each frame, in kcal/mol. Second value is the first frame energy in kcal/mol. Third value is a list of absolute energies in Hartrees. """ energies_hartrees = [] with open(filename, "r") as f: while True: line = f.readline() if not line: break # end of file atom_count = int(line.strip()) # next line should contain energy energy_line = f.readline().strip() # parse and append energy energies_hartrees.append(parse_energy(energy_line, software)) # skip atom lines for _ in range(atom_count): f.readline() # Convert energies to kcal/mol (absolute energies) energies_kcal_abs = [e * HARTREE_TO_KCAL for e in energies_hartrees] first_energy_kcal = energies_kcal_abs[0] return energies_kcal_abs, first_energy_kcal, energies_hartrees
[docs]def identify_software(line): """ Identify the software used for the calculation from a line. Parameters ---------- line : str Line from the file. Returns ------- str Identifier of the software used for the calculation. """ if "ORCA-job qmscript_MEP" in line: return "ORCA-MEP" elif "ORCA-job qmscript_IRC_Full" in line: return "ORCA-IRC" elif "ORCA-job qmscript" in line: return "ORCA" elif "Converged Job" in line: return "TeraChem-scan" elif "TeraChem" in line: return "TeraChem-opt" else: raise ValueError(f"Could not identify software from line: {line}")
[docs]def format_plot() -> None: """ General plotting parameters. """ font = {"family": "sans-serif", "weight": "bold", "size": 10} plt.rc("font", **font) plt.rcParams["xtick.major.pad"] = 5 plt.rcParams["ytick.major.pad"] = 5 plt.rcParams["axes.linewidth"] = 2 plt.rcParams["xtick.major.size"] = 7 plt.rcParams["xtick.major.width"] = 2 plt.rcParams["ytick.major.size"] = 7 plt.rcParams["ytick.major.width"] = 2 plt.rcParams["xtick.direction"] = "in" plt.rcParams["ytick.direction"] = "in" plt.rcParams["xtick.top"] = True plt.rcParams["ytick.right"] = True plt.rcParams["svg.fonttype"] = "none"
[docs]def collect_data(): """ Collect energies from the NEB xyz files and calculate NEB boundaries. Returns ------- list A list of concatenated absolute energies for all NEBs in kcal/mol. float The first frame energy of the first NEB in kcal/mol. list A list of cumulative frame counts where each NEB ends. """ xyz_files = get_xyz_files() energies_by_file = {} first_energies = [] frames_per_neb = [] for idx, filename in enumerate(xyz_files): with open(filename, "r") as f: # read two lines to get to the software info f.readline() software_line = f.readline() software = identify_software(software_line) energies_kcal_abs, first_energy_kcal, energies_hartrees = get_trajectory_energies(filename, software) energies_by_file[filename] = energies_kcal_abs first_energies.append(first_energy_kcal) if idx < len(xyz_files) - 1: frames_per_neb.append(len(energies_kcal_abs) - 1) # Exclude last frame else: frames_per_neb.append(len(energies_kcal_abs)) # Include last frame for the last NEB # Compute cumulative frames to get NEB boundaries cumulative_frames = [0] for frames in frames_per_neb: cumulative_frames.append(cumulative_frames[-1] + frames) # NEB boundaries (excluding the final total frame count) neb_boundaries = cumulative_frames[1:-1] first_energy_kcal_of_first_neb = first_energies[0] # Concatenate energies, excluding duplicate frames total_energies_abs = [] for idx, filename in enumerate(xyz_files): energies = energies_by_file[filename] if idx < len(xyz_files) - 1: energies = energies[:-1] # Exclude last frame except for last NEB total_energies_abs.extend(energies) return total_energies_abs, first_energy_kcal_of_first_neb, neb_boundaries
[docs]def plot_data(total_energies_abs, first_energy_kcal_of_first_neb, dim_list, neb_boundaries): """ Plot the collected energies with NEB boundaries. Parameters ---------- total_energies_abs : list List of concatenated absolute energies for all NEBs in kcal/mol. first_energy_kcal_of_first_neb : float The first frame energy of the first NEB in kcal/mol. dim_list : list Dimensions for the plot. neb_boundaries : list List of cumulative frame counts where each NEB ends. """ format_plot() fig, ax = plt.subplots(figsize=(dim_list[0], dim_list[1])) # Adjust energies so that first frame is at 0 kcal/mol energies = [e - first_energy_kcal_of_first_neb for e in total_energies_abs] x_values = range(len(energies)) # Plotting the energy line ax.plot( x_values, energies, marker="o", linestyle="-", color='b', ) # Add vertical lines at NEB boundaries for boundary in neb_boundaries: ax.axvline(x=boundary, color='black', linestyle='-', lw=3, alpha=0.1, zorder=0) # Annotate the highest point max_energy = max(energies) max_energy_frame = energies.index(max_energy) ax.annotate(f"{max_energy:.1f}", (max_energy_frame, max_energy), textcoords="offset points", xytext=(0, 10), ha='center', va='bottom') # Extend y-axis to fit annotation y_lim = ax.get_ylim() ax.set_ylim(y_lim[0], y_lim[1] + (y_lim[1] - y_lim[0]) * 0.1) # Adding 10% padding at the top ax.set_xlabel("NEB frames", weight="bold") ax.set_ylabel("Relative energy (kcal/mol)", weight="bold") # Save the figure with enough padding extensions = ["png", "svg"] for ext in extensions: plt.savefig( f"energy_plot.{ext}", dpi=600, bbox_inches="tight", format=ext, )
[docs]def plot_energies(): """ Main function that combines previous functions to generate the plot. """ print("\n.---------------------------.") print("| WELCOME TO ENERGY PLOTTER |") print(".---------------------------.\n") print("> Generates a plot of an xyz trajectory.") print("> Can handle an arbitrary number of xyz trajectories") start_time = time.time() # Used to report the execution speed dim = input(" > What dimensions would you like the plot (e.g. 5,4)? ") if dim: x_dim = int(dim.split(",")[0]) y_dim = int(dim.split(",")[1]) dim_list = [x_dim, y_dim] else: dim_list = [4, 4] total_energies_abs, first_energy_kcal_of_first_neb, neb_boundaries = collect_data() write_energies_to_csv(total_energies_abs) # Write energies to CSV plot_data(total_energies_abs, first_energy_kcal_of_first_neb, dim_list, neb_boundaries) total_time = round(time.time() - start_time, 3) # Seconds to run the function job_summary = f""" --------------------------ENERGY PLOTTER END-------------------------- RESULT: Plotted energies for combined NEBs. OUTPUT: Created a plot called 'energy_plot.png' and a CSV file called 'energy_plot_data.csv' in the current directory. TIME: Total execution time: {total_time} seconds. --------------------------------------------------------------------\n """ print(job_summary)
if __name__ == "__main__": plot_energies()