from src.data_access.via_DMS.Input import Input
from src.data_access.via_DMS.QueryBuilder import QueryBuilder
from src.data_access.via_DMS.FileOperations import FileOperations
from src.processing.DatasetsMerger import DatasetsMerger
from src.analysis import internalAnalysis
import os
import fnmatch
[docs]class Workflow:
'''Automate the Meta-proteomics workflow'''
def __init__(self, mode=None, InputType = None, path_to_data=None, project_name=None,
UserInput=None, CombineDatasets= None, SelectAnalysis=None):
'''
Intiate values from command line.
:param mode:
:param InputType:
:param path_to_data:
:param project_name:
:param UserInput:
:param CombineDatasets:
:param SelectAnalysis:
'''
self.Mode= mode
self.InputType= InputType
self.Storage = path_to_data
self.Project = project_name
self.UserInput= UserInput
self.CombineDatasets = CombineDatasets
self.SelectAnalysis = SelectAnalysis
[docs] def run_Analysis(self, on_file, analysis_type):
'''
Run desired analysis on a file.
:meta public:
:param analysis_type:
:return:
'''
if analysis_type == "internal" :
print("Run Internal analysis on {}".format(on_file))
#internalAnalysis(on_file)
elif analysis_type == "ficus":
print("Run Ficus analysis on {}".format(on_file))
#ficusAnalysis(on_file)
else: #"both"
print("Run Internal & Ficus analysis on {}".format(on_file))
# internalAnalysis(on_file)
# ficusAnalysis(on_file)
[docs] def start_downStreamAnalysis(self, result_path):
'''
Decides to run analysis on combined results vs single dataset.
:meta public:
:param result_path:
:return:
'''
if self.CombineDatasets:
# generate report on single file.
self.run_Analysis(result_path + "resultants_df.tsv", self.SelectAnalysis)
else:
# generate report on multiple files.
for path, subdirs, files in os.walk(result_path):
for file in files:
if fnmatch.fnmatch(file, "MSGFjobs_MASIC_resultant.tsv"):
self.run_Analysis( os.path.join(path, file), self.SelectAnalysis)
[docs] def start_merging(self, folder):
'''
Start merging MSGF and MASIC jobs
:meta public:
:param folder:
:return:
'''
merge = DatasetsMerger(folder, self.CombineDatasets)
result_path = merge.merge_all_jobs_in_UserInput()
return result_path
[docs] def download_data_from_DMS(self,user_obj):
'''
build & execute query to dowload data from DMS
:meta public:
:param user_obj: input from shell script.
:return: path to !!
'''
myQuery= QueryBuilder(user_obj, self.Storage, self.Project)
myQuery.execute()
analysis_jobs, parent_data_folder, job_info= myQuery.analysis_jobs, myQuery.parent_data_folder, myQuery.job_info
file_obj= FileOperations(analysis_jobs, parent_data_folder, job_info)
file_obj.get_files()
return parent_data_folder
[docs] def start_workflow(self):
'''
Runs the workflow in 3 stages
1. Download relevant datasets from specified source.
2. Aggregation of analysis tools{MSGF+, MASIC} results: to extract useful data from datasets.
3. Generation of experimental report.
:meta public:
:return:
'''
# TODO: Use User-mode: to suppress file creations & Developer-mode: to Generate files!
## prepare user's input
user_obj = Input()
if self.InputType is None:
# user_obj.user_input() # nomore manual execution
pass
else:
user_obj.other_input(self.InputType, self.UserInput)
## 1.
data_parent_folder= self.download_data_from_DMS(user_obj)
print("Input Data located at @:{}".format(data_parent_folder))
## 2.
result_path= self.start_merging(data_parent_folder)
print("Merged jobs located at @:{}".format(result_path))
## 3.
self.start_downStreamAnalysis(result_path)
print("Generated reports @:{}".format(result_path))
print('`'*5)
print("Finished running Meta-proteomics pipeline!")