44from pathlib import Path
55from multiprocessing import Pool
66from corems .molecular_id .search .database_interfaces import MSPInterface
7+ from corems .mass_spectra .input .corems_hdf5 import ReadCoreMSHDFMassSpectra
78
89from support_code .nmdc .lipidomics .lipidomics_workflow import (
9- instantiate_lcms_obj ,
10- set_params_on_lcms_obj ,
11- check_scan_translator ,
12- add_mass_features ,
13- molecular_formula_search ,
14- export_results ,
1510 run_lipid_sp_ms1 ,
11+ process_ms2
1612)
1713
18-
19- def run_lcms_metabolomics_workflow (
20- file_dir ,
21- out_dir ,
22- params_toml ,
23- msp_file_path ,
24- scan_translator = None ,
25- verbose = True ,
26- cores = 1 ,
27- ):
28- # Make output dir and get list of files to process
29- out_dir .mkdir (parents = True , exist_ok = True )
30- files_list = list (file_dir .glob ("*.raw" ))
31- out_paths_list = [out_dir / f .stem for f in files_list ]
32-
33- # Prepare search databases for ms2 search
34- my_msp_FE = prepare_metadata (msp_file_path )
35-
36- # Run signal processing, get associated ms1, add associated ms2, do ms1 molecular search, and export temp results
37- # Note that this is exactly the same as the lipidomics workflow
38- if cores == 1 or len (files_list ) == 1 :
39- for file_in , file_out in list (zip (files_list , out_paths_list )):
40- print (f"Processing { file_in } " )
41- run_lipid_sp_ms1 (
42- file_in = str (file_in ),
43- out_path = str (file_out ),
44- params_toml = params_toml ,
45- scan_translator = scan_translator ,
46- verbose = verbose ,
47- return_mzs = False ,
48- )
49- elif cores > 1 :
50- raise ValueError (
51- "Parallel processing is not yet supported for LCMS metabolomics workflow."
52- )
53-
54-
5514def prepare_metadata (msp_file_path ):
5615 print ("Parsing MSP file..." )
5716 my_msp = MSPInterface (file_path = msp_file_path )
@@ -63,7 +22,7 @@ def prepare_metadata(msp_file_path):
6322 msp_positive , metabolite_metadata_positive = (
6423 my_msp .get_metabolomics_spectra_library (
6524 polarity = "positive" ,
66- format = "df " ,
25+ format = "flashentropy " ,
6726 normalize = True ,
6827 fe_kwargs = {
6928 "normalize_intensity" : True ,
@@ -98,6 +57,68 @@ def prepare_metadata(msp_file_path):
9857
9958 return metadata
10059
60+ def run_ms2_search (out_path , metadata , scan_translator = None ):
61+ """Run ms2 spectral search and export final results
62+
63+ Parameters
64+ ----------
65+ out_path : str or Path
66+ Path to output file
67+ metadata : dict
68+ Dict with keys "mzs", "fe", and "molecular_metadata" with values of dicts of precursor mzs (negative and positive), flash entropy search databases (negative and positive), and molecular metadata, respectively
69+
70+ Returns
71+ -------
72+ None, runs ms2 spectral search and exports final results
73+ """
74+ # Read in the intermediate results
75+ out_path = Path (out_path )
76+ out_path_hdf5 = str (out_path ) + ".corems/" + out_path .stem + ".hdf5"
77+ parser = ReadCoreMSHDFMassSpectra (out_path_hdf5 )
78+ myLCMSobj = parser .get_lcms_obj ()
79+ process_ms2 (myLCMSobj , metadata , scan_translator = scan_translator )
80+
81+ def run_lcms_metabolomics_workflow (
82+ file_dir ,
83+ out_dir ,
84+ params_toml ,
85+ msp_file_path ,
86+ scan_translator = None ,
87+ verbose = True ,
88+ cores = 1 ,
89+ ):
90+ # Make output dir and get list of files to process
91+ out_dir .mkdir (parents = True , exist_ok = True )
92+ files_list = list (file_dir .glob ("*.raw" ))
93+ out_paths_list = [out_dir / f .stem for f in files_list ]
94+
95+ # Prepare search databases for ms2 search
96+ my_msp_FE = prepare_metadata (msp_file_path )
97+
98+ # Run signal processing, get associated ms1, add associated ms2, do ms1 molecular search, and export temp results
99+ # Note that this is exactly the same as the lipidomics workflow
100+ if cores == 1 or len (files_list ) == 1 :
101+ for file_in , file_out in list (zip (files_list , out_paths_list )):
102+ print (f"Processing { file_in } " )
103+ run_lipid_sp_ms1 (
104+ file_in = str (file_in ),
105+ out_path = str (file_out ),
106+ params_toml = params_toml ,
107+ scan_translator = scan_translator ,
108+ verbose = verbose ,
109+ return_mzs = False ,
110+ )
111+ #TODO KRH: No need to save hdf5 and re-open, can combine sp and ms2 search with lcms_obj in memory
112+ run_ms2_search (
113+ out_path = str (file_out ),
114+ metadata = my_msp_FE ,
115+ scan_translator = scan_translator ,
116+ )
117+ elif cores > 1 :
118+ raise ValueError (
119+ "Parallel processing is not yet supported for LCMS metabolomics workflow."
120+ )
121+
101122
102123if __name__ == "__main__" :
103124 # Set input variables to run
0 commit comments