Mercurial > repos > rliterman > csp2
diff CSP2/bin/compileSNPResults.py @ 39:93393808f415
"planemo upload"
author | rliterman |
---|---|
date | Thu, 12 Dec 2024 13:53:15 -0500 |
parents | 01431fa12065 |
children |
line wrap: on
line diff
--- a/CSP2/bin/compileSNPResults.py Wed Dec 11 12:04:20 2024 -0500 +++ b/CSP2/bin/compileSNPResults.py Thu Dec 12 13:53:15 2024 -0500 @@ -467,17 +467,20 @@ # Output data # Mean assembly stats -isolate_mean_df.reset_index().to_csv(mean_isolate_file,sep='\t',index=False) +with open(mean_isolate_file, 'w') as f: + isolate_mean_df.reset_index().to_csv(f,sep='\t',index=False) # Isolate assembly stats isolate_assembly_stats = isolate_stats.loc[isolate_stats['Measure'].isin(['Contig_Count','Assembly_Bases','L50','L90','N50','N90'])].drop(['Min','Max','StdDev','Count'],axis=1).rename(columns = {'Mean':'Value'}) -isolate_assembly_stats.to_csv(isolate_assembly_stats_file,sep='\t',index=False) +with open(isolate_assembly_stats_file,'w') as f: + isolate_assembly_stats.to_csv(f,sep='\t',index=False) # Isolate alignment stats isolate_align_stats = pd.concat([align_stats,isolate_cocalled_stats,isolate_snp_stats,isolate_stdev_stats]).reset_index(drop=True) for col in ['Min', 'Mean', 'Max', 'StdDev', 'Zscore']: isolate_align_stats[col] = isolate_align_stats[col].astype("float").round(3) -isolate_align_stats.to_csv(align_stats_file,sep='\t',index=False) +with open(align_stats_file,'w') as f: + isolate_align_stats.to_csv(f,sep='\t',index=False) # Reference Assembly Stats ref_align_summary_df = ref_summary_df.loc[(~ref_summary_df['Measure'].isin(['Contig_Count','Assembly_Bases','L50','L90','N50','N90'])) & (~pd.isna(ref_summary_df['Zscore']))] @@ -492,7 +495,9 @@ ref_isolate_align_stats = align_stats.loc[(align_stats['Isolate_Type'] == "Reference") & (align_stats['Measure'].isin(['Self_Aligned','Compare_Aligned']))].drop(['Isolate_Type'],axis=1).rename(columns = {'Isolate_ID':'Reference_ID'})[['Reference_ID','Measure','Mean','StdDev','Min','Max','Count','Zscore','QC']] ref_mean_summary_stats = pd.concat([ref_mean_summary_df,ref_isolate_align_stats]) -ref_mean_summary_stats.to_csv(ref_mean_summary_file,sep='\t',index=False) + +with open(ref_mean_summary_file,'w') as f: + ref_mean_summary_stats.to_csv(f,sep='\t',index=False) end_time = time.time() @@ -505,10 +510,12 @@ # Comparisons if multiple refs if len(reference_ids) > 1: - comparison_df.to_csv(snp_comparison_file,sep="\t",index = False) + with open(snp_comparison_file,"w") as f: + comparison_df.to_csv(f,sep="\t",index = False) log.write(f"\t- Saved SNP distance comparisons across references to {snp_comparison_file}\n") # Failures/warnings if warn_fail_df.shape[0] > 0: - warn_fail_df.to_csv(qc_file,sep="\t",index=False) + with open(qc_file,"w") as f: + warn_fail_df.to_csv(f,sep="\t",index=False) log.write(f"\t- Saved QC warnings/failures to {qc_file}\n") \ No newline at end of file