Source code for pyqmmm.md.edia_chain_calculator

"""This script takes EDIA output and cacluates the average chain EDIA scores."""

import matplotlib.pyplot as plt
import pandas as pd


[docs]def get_edia(): # Get user input csv = input("Name of your EDIA CSV file without extension?: ") chains = input('Chains to compare (e.g., "ABCD")? ') residues = input('Key residues of interest (e.g., "9,23,134" or "none"): ') # Open CSV file into pandas dataframe df = pd.read_csv("{}.csv".format(csv)) df_all = df[["Chain", "EDIAm", "Median EDIA"]] # Examine user chains input and return errors chains = chains.upper() chain_list = [] for chain in chains: if chain in df_all.Chain.values: print("Chain {} found.".format(chain)) chain_list.append(chain) else: print("Chain {} not found.".format(chain)) print("-------------------") print() # Create empty dataframe to store means df_means = pd.DataFrame( index=[chain for chain in chain_list], columns=["EDIAm", "EDIA"] ) # Create empty dataframe to store standard error df_se = pd.DataFrame( index=[chain for chain in chain_list], columns=["EDIAm", "EDIA"] ) # Calculate the Avg. EDIAm, Avg. EDIA, and Std. Error for chain in chain_list: # Isolate a specific chain isolated_chain = df_all.loc[df["Chain"] == chain] # Get the mean for EDIAm and EDIA avg_ediam = isolated_chain["EDIAm"].mean() avg_edia = isolated_chain["Median EDIA"].mean() # Store it in the df_means dataframe df_means.loc[chain] = [avg_ediam, avg_edia] # Get the standard error for EDIAm and EDIA std_err_ediam = isolated_chain["EDIAm"].sem() std_err_edia = isolated_chain["Median EDIA"].sem() # Store it in the df_means dataframe df_se.loc[chain] = [std_err_ediam, std_err_edia] # Rank order the chains by EDIA score sorted_df = df_means.sort_values(by="EDIA", ascending=False) print("Your sorted results for all residues:") print(sorted_df) print("-------------------") print() # Plot the findings # Plot constants colors = ["#ff16ea", "#020cfa"] # Plot parameters plt.rc("axes", linewidth=2.5) fig, ax = plt.subplots() df_means.plot.bar(yerr=df_se, ax=ax, capsize=4, rot=0, color=colors) plt.title("Chain Comparison", fontsize=18) plt.ylabel("EDIA Score", fontsize=16) plt.xlabel("Chains", fontsize=16) plt.xticks(rotation=0) plt.tick_params(labelsize=14) plt.legend(fontsize=12, loc="lower right") plt.savefig("chain_comparison.pdf", bbox_inches="tight") print("Figure has been created.") print("-------------------") print() # Select out key columns df_res = df[["ID", "Chain", "EDIAm", "Median EDIA"]] # Examine user residue input and return errors if residues != "none": residue_split = residues.split(",") residue_split = [int(i) for i in residue_split] residue_list = [] for residue in residue_split: if residue in df_res.ID.values: print("Residue {} found.".format(residue)) residue_list.append(residue) else: print("Residue {} not found.".format(residue)) print("-------------------") # Select only residues the user asked for df_sel_res = df_res[df_res.ID.isin(residue_list)] for residue in residue_list: isolated_residue = df_sel_res.loc[df_sel_res["ID"] == residue] chains_present = isolated_residue["Chain"].values for chain in chain_list: if chain not in chains_present: df_row = {"ID": residue, "Chain": chain, "EDIAm": 0, "Median EDIA": 0} df_sel_res = df_sel_res.append(df_row, ignore_index=True) # Create empty dataframe to store means df_res_means = pd.DataFrame(index=chain_list, columns=["EDIAm", "EDIA"]) # Create empty dataframe to store standard error df_res_se = pd.DataFrame(index=chain_list, columns=["EDIAm", "EDIA"]) # Calculate the Avg. EDIAm, Avg. EDIA, and Std. Error for chain in chain_list: # Isolate a specific chain isolated_residue = df_sel_res.loc[df_sel_res["Chain"] == chain] # Get the mean for EDIAm and EDIA avg_res_ediam = isolated_residue["EDIAm"].mean() avg_res_edia = isolated_residue["Median EDIA"].mean() # Store it in the df_means dataframe df_res_means.loc[chain] = [avg_res_ediam, avg_res_edia] # Get the standard error for EDIAm and EDIA std_err_res_ediam = isolated_residue["EDIAm"].sem() std_err_res_edia = isolated_residue["Median EDIA"].sem() # Store it in the df_means dataframe df_res_se.loc[chain] = [std_err_res_ediam, std_err_res_edia] return df_res_means
[docs]def edia_chain_calculator(): # Rank order the chains by EDIA score df_res_means = get_edia() sorted_res_df = df_res_means.sort_values(by="EDIA", ascending=False) print("Your sorted results for specific residues:") print(sorted_res_df) print("-------------------") print() # Plot the findings plt.rc("axes", linewidth=2.5) fig, ax = plt.subplots() df_res_means.plot.bar(yerr=df_res_se, ax=ax, capsize=4, rot=0, color=colors) plt.title("Chain Comparison for Select Residues", fontsize=18) plt.ylabel("EDIA Score", fontsize=16) plt.xlabel("Chains", fontsize=16) plt.xticks(rotation=0) plt.tick_params(labelsize=14) plt.legend(fontsize=12, loc="lower right") plt.savefig("chain_res_comp.pdf", bbox_inches="tight") print("Figure has been created.") print("-------------------")
if __name__ == "__main__": edia_chain_calculator()