Mercurial > repos > rliterman > csp2
comparison CSP2/bin/saveSNPDiffs.py @ 39:93393808f415
"planemo upload"
author | rliterman |
---|---|
date | Thu, 12 Dec 2024 13:53:15 -0500 |
parents | 893a6993efe3 |
children |
comparison
equal
deleted
inserted
replaced
38:ee512a230a1e | 39:93393808f415 |
---|---|
56 with open(snpdiffs_file, 'r') as file: | 56 with open(snpdiffs_file, 'r') as file: |
57 top_line = file.readline().lstrip('#').strip().split('\t') | 57 top_line = file.readline().lstrip('#').strip().split('\t') |
58 header_rows.append(processHeader(top_line,snpdiffs_file,trim_name)) | 58 header_rows.append(processHeader(top_line,snpdiffs_file,trim_name)) |
59 | 59 |
60 output_data = pd.concat(header_rows, ignore_index=True) | 60 output_data = pd.concat(header_rows, ignore_index=True) |
61 output_data.to_csv(summary_file, sep='\t', index=False) | 61 with open(summary_file,"w") as f: |
62 output_data.to_csv(f, sep='\t', index=False) | |
62 | 63 |
63 # If ref_ids is empty, save isolate data | 64 # If ref_ids is empty, save isolate data |
64 ref_header = ['Reference_ID','Reference_Assembly','Reference_Contig_Count','Reference_Assembly_Bases','Reference_N50','Reference_N90','Reference_L50','Reference_L90','Reference_SHA256'] | 65 ref_header = ['Reference_ID','Reference_Assembly','Reference_Contig_Count','Reference_Assembly_Bases','Reference_N50','Reference_N90','Reference_L50','Reference_L90','Reference_SHA256'] |
65 query_header = ['Query_ID','Query_Assembly','Query_Contig_Count','Query_Assembly_Bases','Query_N50','Query_N90','Query_L50','Query_L90','Query_SHA256'] | 66 query_header = ['Query_ID','Query_Assembly','Query_Contig_Count','Query_Assembly_Bases','Query_N50','Query_N90','Query_L50','Query_L90','Query_SHA256'] |
66 isolate_header = ["Isolate_ID","Assembly_Path","Contig_Count","Assembly_Bases","N50","N90","L50","L90","SHA256"] | 67 isolate_header = ["Isolate_ID","Assembly_Path","Contig_Count","Assembly_Bases","N50","N90","L50","L90","SHA256"] |
77 combined_df['Isolate_Type'] = np.where(combined_df['Isolate_ID'].isin(ref_ids), 'Reference', 'Query') | 78 combined_df['Isolate_Type'] = np.where(combined_df['Isolate_ID'].isin(ref_ids), 'Reference', 'Query') |
78 combined_df = combined_df.drop_duplicates() | 79 combined_df = combined_df.drop_duplicates() |
79 cols = combined_df.columns.tolist() | 80 cols = combined_df.columns.tolist() |
80 cols = cols[:1] + cols[-1:] + cols[1:-1] | 81 cols = cols[:1] + cols[-1:] + cols[1:-1] |
81 combined_df = combined_df[cols] | 82 combined_df = combined_df[cols] |
82 combined_df.to_csv(isolate_data_file, sep='\t', index=False) | 83 with open(isolate_data_file,"w") as f: |
84 combined_df.to_csv(f, sep='\t', index=False) | |
83 | 85 |