Source code for src.processing.MASICmerger

from src.processing.MSGFplusMerger import MSGFplusMerger
from utility.utils import timeit

import os
import pandas as pd
import fnmatch

[docs]class MASICmerger(MSGFplusMerger): '''Run for each dataset ''' def __init__(self, folder): ''' :param folder: ''' self.parent_folder = folder self.MSGFjobs_MASIC_resultant=None self.file_pattern_types = {"masic": "{}SICstats.txt"} self.masic=[] @timeit def merge_msgfplus_msaic(self, MSGF_df ): ''' 1. Read in the MASIC job: "*_SICstats.txt" in masic_DF with added JobNum column 2. Inner-join: MSGFjobs_Merged and masic_DF.FragScanNumber over JobNum <--> JobNum Scan <-->FragScanNumber 3. create MSGFjobs_MASIC_resultant dataframe. ''' masic_folder = os.path.join(self.parent_folder, 'DMS_MASICjob') for cur_path, directories, files in os.walk(masic_folder): for file in files: if fnmatch.fnmatch(file, self.file_pattern_types["masic"].format('*')): self.masic.append(os.path.join(cur_path, file)) try: masic_DF= pd.read_csv(self.masic[0], sep='\t') except Exception as e: print("MASIC has multiple files!") masic_DF= masic_DF.rename(columns={'FragScanNumber': 'Scan'}) self.MSGFjobs_MASIC_resultant = pd.merge(MSGF_df, masic_DF, how='left', left_on=['Scan'], right_on=['Scan']) self.write_to_disk(self.MSGFjobs_MASIC_resultant , self.parent_folder, "MSGFjobs_MASIC_resultant.tsv" )
# print(masic_DF.shape, masic_DF.columns.values) # print('`' * 5) # print(self.MSGFjobs_MASIC_resultant.shape, self.MSGFjobs_MASIC_resultant.columns.values) # print('`' * 5)