Source code for pyqmmm.qm.create_mep_trj

import re

[docs]def create_neb_mep_trj_from_out(): # Define the file paths orca_output_file = 'orca.out' output_xyz_file = 'qmscript_MEP_trj.xyz' # Define regex patterns for extracting data # Adjusted to handle one-letter and two-letter element symbols coordinate_pattern = re.compile(r'(REACTANT|PRODUCT|IMAGE \d+ \((ANGSTROEM|BOHR)\))\n-+\n((?:[A-Z][a-z]?\s+-?\d+\.\d+\s+-?\d+\.\d+\s+-?\d+\.\d+\s*\n)+)') energy_pattern = re.compile(r'\s+\d+\s+\S+\s+(-?\d+\.\d+)') # Read the ORCA output file with open(orca_output_file, 'r') as file: orca_output = file.read() # Extract coordinates and energies coordinates = coordinate_pattern.findall(orca_output) path_summary_snippet = orca_output.split('PATH SUMMARY')[1].split('---------------------------------------------------------------')[1].strip() energies = energy_pattern.findall(path_summary_snippet) # Adjust coordinates list to include REACTANT and PRODUCT properly coordinates = [('REACTANT', coordinates[0][1], coordinates[0][2])] + coordinates + [('PRODUCT', coordinates[-1][1], coordinates[-1][2])] # Ensure the number of coordinates and energies match assert len(coordinates) == len(energies), "Mismatch between number of coordinates and energies." # Write the extracted data to the new XYZ file with open(output_xyz_file, 'w') as file: for i, (coord_type, unit, coord_data) in enumerate(coordinates): lines = coord_data.strip().split('\n') num_atoms = len(lines) energy = energies[i] title_line = f"Coordinates from ORCA-job qmscript_MEP E {energy}" file.write(f"{num_atoms}\n") file.write(f"{title_line}\n") for line in lines: file.write(line + '\n') output_xyz_file