Source code for pyqmmm.md.cluster_frame_indexer

"""This script will find all frames in the main cluster and condense them."""


[docs]def get_clusters(file): """ Opens the CPPTraj cnumvtime.dat file, which contains every frame and its assigned cluster. Creates a list containing the indices of all frames in cluster 0, which will always be the predominant cluster. Parameters ---------- file : str The file containing the cluster assignments for every frame. Returns ------- cluster_list : list A list of all the frame indices from cluster 0. """ cluster_list = [] with open(file, "r") as cluster_file: # Skip first line as it is just text next(cluster_file) for line in cluster_file: frame_num = int(line.split()[0]) cluster_num = int(line.split()[1]) if cluster_num == 0: cluster_list.append(frame_num) return cluster_list
[docs]def condense_numbering(cluster_list): seq = [] final = [] last = 0 for index, val in enumerate(cluster_list): if last + 1 == val or index == 0: seq.append(val) last = val else: if len(seq) > 1: final.append(str(seq[0]) + "-" + str(seq[len(seq) - 1])) else: final.append(str(seq[0])) seq = [] seq.append(val) last = val if index == len(cluster_list) - 1: if len(seq) > 1: final.append(str(seq[0]) + "-" + str(seq[len(seq) - 1])) else: final.append(str(seq[0])) final_selection = ",".join(map(str, final)) return final_selection
[docs]def main(): """ Condenses the frame indices of a clustered MD run into a single string. Examples -------- Run the following to generate the CPPTraj output first >> parm welo5_solv.prmtop >> trajin constP_prod.mdcrd >> trajout output.mdcrd onlyframes 2,5-200,202-400 """ # Provide the user with general info about this utility print("\n.-----------------------.") print("| CLUSTER FRAME INDEXER |") print(".-----------------------.\n") print("Run this script in the same directory as cnumvtime.dat.") print("The script prints the frame indices.\n") print("It will also provide an interval if you want only a subset.\n") # This is the standard name but feel free to change it if yours is different cluster_definitions_file = "cnumvtime.dat" cluster_list = get_clusters(cluster_definitions_file) final_selection = condense_numbering(cluster_list) # Important output for the user print(f" > Total frames: {len(cluster_list)}") print(f" > Final selection: {final_selection}")
if __name__ == "__main__": main()