Source code for ipfx.bin.get_fx_output

import ipfx.lims_queries as lq
import glob
import os
import pandas as pd
import argparse

NO_SPECIMEN = "No_specimen_in_LIMS"
NO_OUTPUT_FILE = "No_feature_extraction_output"


[docs]def get_fx_output_json(specimen_id): """ Find in LIMS the full path to the json output of the feature extraction module If more than one file exists, then chose the latest version Parameters ---------- specimen_id Returns ------- file_name: string """ query = """ select err.storage_directory, err.id from specimens sp join ephys_roi_results err on err.id = sp.ephys_roi_result_id where sp.id = %d """ % specimen_id res = lq.query(query) if res: err_dir = res[0]["storage_directory"] file_list = glob.glob(os.path.join(err_dir, '*EPHYS_FEATURE_EXTRACTION_*_output.json')) if file_list: latest_file = max(file_list, key=os.path.getctime) # get the most recent file return latest_file else: return NO_OUTPUT_FILE else: return NO_SPECIMEN
[docs]def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('--input_file', type=str, required=True) parser.add_argument('--output_file', type=str, required=True) args = parser.parse_args() return vars(args)
[docs]def main(): """ Usage: $python get_fx_output.py --input_file IN_FILE --output_file OUT_FILE IN_FILE: name of the input file including a single column with the header 'specimen_id' OUT_FILE: name of the output file that includes columns 'specimen_id' and 'fx_output_json' """ kwargs = parse_args() specimen_file = pd.read_csv(kwargs["input_file"], sep=" ") specimen_ids = specimen_file["specimen_id"].values fx_out = [ {"specimen_id": specimen_id, "fx_output_json": get_fx_output_json(specimen_id)} for specimen_id in specimen_ids] fx_out_df = pd.DataFrame(fx_out) fx_out_df.to_csv(kwargs["output_file"], sep=" ", na_rep="NA")
if __name__=="__main__": main()